From fd262367ba6cdeb9cd2382e08f2b34871e54edcb Mon Sep 17 00:00:00 2001
From: LTLA <infinite.monkeys.with.keyboards@gmail.com>
Date: Fri, 24 May 2019 22:06:45 -0700
Subject: [PATCH] Reoxygenated, updated NEWS.

---
 NAMESPACE            |  2 +-
 inst/NEWS.Rd         |  2 ++
 man/runColDataPCA.Rd | 76 ++++++++++++++++++++++++++++++++++++++++++++
 man/runPCA.Rd        | 49 ++++++++++------------------
 4 files changed, 96 insertions(+), 33 deletions(-)
 create mode 100644 man/runColDataPCA.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 7f083da8..5a9689e4 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -42,6 +42,7 @@ export(plotScater)
 export(plotTSNE)
 export(plotUMAP)
 export(readSparseCounts)
+export(runColDataPCA)
 export(runDiffusionMap)
 export(runMDS)
 export(runTSNE)
@@ -80,7 +81,6 @@ importFrom(BiocParallel,bplapply)
 importFrom(BiocParallel,bpmapply)
 importFrom(BiocParallel,bpnworkers)
 importFrom(BiocSingular,ExactParam)
-importFrom(BiocSingular,IrlbaParam)
 importFrom(BiocSingular,runPCA)
 importFrom(DelayedArray,DelayedArray)
 importFrom(DelayedArray,sweep)
diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd
index f88fd0d2..16ae26a0 100644
--- a/inst/NEWS.Rd
+++ b/inst/NEWS.Rd
@@ -6,6 +6,8 @@
     \item Removed deprecated dplyr verbs.
     \item Removed deprecated method= option in runPCA().
     Increased ncomponents= default to 50.
+    Deprecated use_coldata= and related options in favour of runColDataPCA().
+    \item Added runColDataPCA() function for running PCA on colData().
     \item Pass all ... options to biomaRt::useMart() in getBMFeatureAnnos().
   }
 }
diff --git a/man/runColDataPCA.Rd b/man/runColDataPCA.Rd
new file mode 100644
index 00000000..9bb41ac0
--- /dev/null
+++ b/man/runColDataPCA.Rd
@@ -0,0 +1,76 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/runColDataPCA.R
+\name{runColDataPCA}
+\alias{runColDataPCA}
+\title{Perform PCA on column metadata}
+\usage{
+runColDataPCA(x, ncomponents = 2, scale_features = TRUE,
+  selected_variables = NULL, detect_outliers = FALSE,
+  BSPARAM = ExactParam(), BPPARAM = SerialParam())
+}
+\arguments{
+\item{x}{A \linkS4class{SingleCellExperiment} object.}
+
+\item{ncomponents}{Numeric scalar indicating the number of principal components to obtain.
+This will override any \code{ntop} argument if specified.}
+
+\item{scale_features}{Logical scalar, should the expression values be standardised so that each feature has unit variance?
+This will also remove features with standard deviations below 1e-8.}
+
+\item{selected_variables}{List of strings or a character vector indicating which variables in \code{colData(x)} to use.
+If a list, each entry can take the form described in \code{?"\link{scater-vis-var}"}.}
+
+\item{detect_outliers}{Logical indicating whether outliers should be detected based on PCA coordinates.}
+
+\item{BSPARAM}{A \linkS4class{BiocSingularParam} object specifying which algorithm should be used to perform the PCA.}
+
+\item{BPPARAM}{A \linkS4class{BiocParallelParam} object specifying whether the PCA should be parallelized.}
+}
+\value{
+A SingleCellExperiment object containing the first \code{ncomponent} principal coordinates for each cell,
+stored in the \code{"PCA_coldata"} entry of the \code{reducedDims} slot.
+The proportion of variance explained by each PC is stored as a numeric vector in the \code{"percentVar"} attribute.
+}
+\description{
+Perform a principal components analysis (PCA) on cells, 
+based on the column metadata in a SingleCellExperiment object.
+}
+\details{
+This function performs PCA on column-level metadata instead of the gene expression matrix. 
+The \code{selected_variables} defaults to a vector containing:
+\itemize{
+\item \code{"pct_counts_top_100_features"}
+\item \code{"total_features_by_counts"}
+\item \code{"pct_counts_feature_control"}
+\item \code{"total_features_feature_control"}
+\item \code{"log10_total_counts_endogenous"}
+\item \code{"log10_total_counts_feature_control"}
+}
+This can be useful for identifying outliers cells based on QC metrics, 
+especially when combined with \code{detect_outliers=TRUE}.
+If outlier identification is enabled, the output \code{colData} will contain a logical \code{outlier} field.
+This specifies the cells that correspond to the identified outliers.
+}
+\examples{
+## Set up an example SingleCellExperiment
+data("sc_example_counts")
+data("sc_example_cell_info")
+example_sce <- SingleCellExperiment(
+    assays = list(counts = sc_example_counts),
+    colData = sc_example_cell_info
+)
+example_sce <- normalize(example_sce)
+
+example_sce <- calculateQCMetrics(example_sce,
+    feature_controls=list(Spike=1:10))
+example_sce <- runColDataPCA(example_sce)
+reducedDimNames(example_sce)
+head(reducedDim(example_sce))
+
+}
+\seealso{
+\code{\link[scater]{runPCA}}, for the corresponding method operating on expression data.
+}
+\author{
+Aaron Lun, based on code by Davis McCarthy
+}
diff --git a/man/runPCA.Rd b/man/runPCA.Rd
index e53711af..d2c8d6aa 100644
--- a/man/runPCA.Rd
+++ b/man/runPCA.Rd
@@ -3,7 +3,7 @@
 \docType{methods}
 \name{runPCA,SingleCellExperiment-method}
 \alias{runPCA,SingleCellExperiment-method}
-\title{Perform PCA on cell-level data}
+\title{Perform PCA on expression data}
 \usage{
 \S4method{runPCA}{SingleCellExperiment}(x, ncomponents = 50,
   ntop = 500, exprs_values = "logcounts", feature_set = NULL,
@@ -18,7 +18,7 @@
 
 \item{ntop}{Numeric scalar specifying the number of most variable features to use for PCA.}
 
-\item{exprs_values}{Integer scalar or string indicating which assay of \code{object} should be used to obtain the expression values for the calculations.}
+\item{exprs_values}{Integer scalar or string indicating which assay of \code{x} contains the expression values of interest.}
 
 \item{feature_set}{Character vector of row names, a logical vector or a numeric vector of indices indicating a set of features to use for PCA.
 This will override any \code{ntop} argument if specified.}
@@ -26,50 +26,35 @@ This will override any \code{ntop} argument if specified.}
 \item{scale_features}{Logical scalar, should the expression values be standardised so that each feature has unit variance?
 This will also remove features with standard deviations below 1e-8.}
 
-\item{use_coldata}{Logical scalar specifying whether the column data should be used instead of expression values to perform PCA.}
+\item{use_coldata}{Deprecated, use \code{\link{runColDataPCA}} instead.}
 
-\item{selected_variables}{List of strings or a character vector indicating which variables in \code{colData(object)} to use for PCA when \code{use_coldata=TRUE}.
-If a list, each entry can take the form described in \code{?"\link{scater-vis-var}"}.}
+\item{selected_variables}{Deprecated, use \code{\link{runColDataPCA}} instead.}
 
-\item{detect_outliers}{Logical scalar, should outliers be detected based on PCA coordinates generated from column-level metadata?}
+\item{detect_outliers}{Deprecated, use \code{\link{runColDataPCA}} instead.}
 
 \item{BSPARAM}{A \linkS4class{BiocSingularParam} object specifying which algorithm should be used to perform the PCA.}
 
 \item{BPPARAM}{A \linkS4class{BiocParallelParam} object specifying whether the PCA should be parallelized.}
 }
 \value{
-A SingleCellExperiment object containing the first \code{ncomponent} principal coordinates for each cell.
-If \code{use_coldata=FALSE}, this is stored in the \code{"PCA"} entry of the \code{reducedDims} slot.
-Otherwise, it is stored in the \code{"PCA_coldata"} entry.
-
+A SingleCellExperiment object containing the first \code{ncomponent} principal coordinates for each cell,
+stored in the \code{"PCA"} entry of the \code{reducedDims} slot.
 The proportion of variance explained by each PC is stored as a numeric vector in the \code{"percentVar"} attribute of the reduced dimension matrix.
-Note that this will only be of length equal to \code{ncomponents} when \code{method} is not \code{"prcomp"}.
-This is because approximate PCA methods do not compute singular values for all components.
 }
 \description{
-Perform a principal components analysis (PCA) on cells, based on the data in a SingleCellExperiment object.
+Perform a principal components analysis (PCA) on cells, 
+based on the expression data in a SingleCellExperiment object.
 }
 \details{
-The function \code{\link{prcomp}} is used internally to do the PCA when \code{method="prcomp"}.
-Alternatively, the \pkg{irlba} package can be used, which performs a fast approximation of PCA through the \code{\link[irlba]{prcomp_irlba}} function.
-This is especially useful for large, sparse matrices.
-
-Note that \code{\link[irlba]{prcomp_irlba}} involves a random initialization, after which it converges towards the exact PCs.
+Algorithms like \code{BSPARAM=IrlbaParam()} or \code{RandomParam()} involve
+a random initialization, after which it converges towards the exact PCs.
 This means that the result will change slightly across different runs.
-For full reproducibility, users should call \code{\link{set.seed}} prior to running \code{runPCA} with \code{method="irlba"}.
+For full reproducibility, users should call \code{\link{set.seed}} prior to running \code{runPCA} with such algorithms.
 
-If \code{use_coldata=TRUE}, PCA will be performed on column-level metadata instead of the gene expression matrix. 
-The \code{selected_variables} defaults to a vector containing:
-\itemize{
-\item \code{"pct_counts_top_100_features"}
-\item \code{"total_features_by_counts"}
-\item \code{"pct_counts_feature_control"}
-\item \code{"total_features_feature_control"}
-\item \code{"log10_total_counts_endogenous"}
-\item \code{"log10_total_counts_feature_control"}
-}
-This can be useful for identifying outliers cells based on QC metrics, especially when combined with \code{detect_outliers=TRUE}.
-If outlier identification is enabled, the \code{outlier} field of the output \code{colData} will contain the identified outliers.
+In the returned output, 
+the vector of proportion of variance explained may not have length equal to the total number of available PCs.
+This is because not all PCA methods are guaranteed to compute singular values for all components.
+As such, the proportions of variance explained - while accurate - may not sum to unity.
 }
 \examples{
 ## Set up an example SingleCellExperiment
@@ -86,7 +71,7 @@ reducedDimNames(example_sce)
 head(reducedDim(example_sce))
 }
 \seealso{
-\code{\link{prcomp}}, \code{\link[scater]{plotPCA}}
+\code{\link{runPCA}}, \code{\link[scater]{plotPCA}}
 }
 \author{
 Aaron Lun, based on code by Davis McCarthy