From fd262367ba6cdeb9cd2382e08f2b34871e54edcb Mon Sep 17 00:00:00 2001 From: LTLA Date: Fri, 24 May 2019 22:06:45 -0700 Subject: [PATCH] Reoxygenated, updated NEWS. --- NAMESPACE | 2 +- inst/NEWS.Rd | 2 ++ man/runColDataPCA.Rd | 76 ++++++++++++++++++++++++++++++++++++++++++++ man/runPCA.Rd | 49 ++++++++++------------------ 4 files changed, 96 insertions(+), 33 deletions(-) create mode 100644 man/runColDataPCA.Rd diff --git a/NAMESPACE b/NAMESPACE index 7f083da8..5a9689e4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -42,6 +42,7 @@ export(plotScater) export(plotTSNE) export(plotUMAP) export(readSparseCounts) +export(runColDataPCA) export(runDiffusionMap) export(runMDS) export(runTSNE) @@ -80,7 +81,6 @@ importFrom(BiocParallel,bplapply) importFrom(BiocParallel,bpmapply) importFrom(BiocParallel,bpnworkers) importFrom(BiocSingular,ExactParam) -importFrom(BiocSingular,IrlbaParam) importFrom(BiocSingular,runPCA) importFrom(DelayedArray,DelayedArray) importFrom(DelayedArray,sweep) diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd index f88fd0d2..16ae26a0 100644 --- a/inst/NEWS.Rd +++ b/inst/NEWS.Rd @@ -6,6 +6,8 @@ \item Removed deprecated dplyr verbs. \item Removed deprecated method= option in runPCA(). Increased ncomponents= default to 50. + Deprecated use_coldata= and related options in favour of runColDataPCA(). + \item Added runColDataPCA() function for running PCA on colData(). \item Pass all ... options to biomaRt::useMart() in getBMFeatureAnnos(). } } diff --git a/man/runColDataPCA.Rd b/man/runColDataPCA.Rd new file mode 100644 index 00000000..9bb41ac0 --- /dev/null +++ b/man/runColDataPCA.Rd @@ -0,0 +1,76 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/runColDataPCA.R +\name{runColDataPCA} +\alias{runColDataPCA} +\title{Perform PCA on column metadata} +\usage{ +runColDataPCA(x, ncomponents = 2, scale_features = TRUE, + selected_variables = NULL, detect_outliers = FALSE, + BSPARAM = ExactParam(), BPPARAM = SerialParam()) +} +\arguments{ +\item{x}{A \linkS4class{SingleCellExperiment} object.} + +\item{ncomponents}{Numeric scalar indicating the number of principal components to obtain. +This will override any \code{ntop} argument if specified.} + +\item{scale_features}{Logical scalar, should the expression values be standardised so that each feature has unit variance? +This will also remove features with standard deviations below 1e-8.} + +\item{selected_variables}{List of strings or a character vector indicating which variables in \code{colData(x)} to use. +If a list, each entry can take the form described in \code{?"\link{scater-vis-var}"}.} + +\item{detect_outliers}{Logical indicating whether outliers should be detected based on PCA coordinates.} + +\item{BSPARAM}{A \linkS4class{BiocSingularParam} object specifying which algorithm should be used to perform the PCA.} + +\item{BPPARAM}{A \linkS4class{BiocParallelParam} object specifying whether the PCA should be parallelized.} +} +\value{ +A SingleCellExperiment object containing the first \code{ncomponent} principal coordinates for each cell, +stored in the \code{"PCA_coldata"} entry of the \code{reducedDims} slot. +The proportion of variance explained by each PC is stored as a numeric vector in the \code{"percentVar"} attribute. +} +\description{ +Perform a principal components analysis (PCA) on cells, +based on the column metadata in a SingleCellExperiment object. +} +\details{ +This function performs PCA on column-level metadata instead of the gene expression matrix. +The \code{selected_variables} defaults to a vector containing: +\itemize{ +\item \code{"pct_counts_top_100_features"} +\item \code{"total_features_by_counts"} +\item \code{"pct_counts_feature_control"} +\item \code{"total_features_feature_control"} +\item \code{"log10_total_counts_endogenous"} +\item \code{"log10_total_counts_feature_control"} +} +This can be useful for identifying outliers cells based on QC metrics, +especially when combined with \code{detect_outliers=TRUE}. +If outlier identification is enabled, the output \code{colData} will contain a logical \code{outlier} field. +This specifies the cells that correspond to the identified outliers. +} +\examples{ +## Set up an example SingleCellExperiment +data("sc_example_counts") +data("sc_example_cell_info") +example_sce <- SingleCellExperiment( + assays = list(counts = sc_example_counts), + colData = sc_example_cell_info +) +example_sce <- normalize(example_sce) + +example_sce <- calculateQCMetrics(example_sce, + feature_controls=list(Spike=1:10)) +example_sce <- runColDataPCA(example_sce) +reducedDimNames(example_sce) +head(reducedDim(example_sce)) + +} +\seealso{ +\code{\link[scater]{runPCA}}, for the corresponding method operating on expression data. +} +\author{ +Aaron Lun, based on code by Davis McCarthy +} diff --git a/man/runPCA.Rd b/man/runPCA.Rd index e53711af..d2c8d6aa 100644 --- a/man/runPCA.Rd +++ b/man/runPCA.Rd @@ -3,7 +3,7 @@ \docType{methods} \name{runPCA,SingleCellExperiment-method} \alias{runPCA,SingleCellExperiment-method} -\title{Perform PCA on cell-level data} +\title{Perform PCA on expression data} \usage{ \S4method{runPCA}{SingleCellExperiment}(x, ncomponents = 50, ntop = 500, exprs_values = "logcounts", feature_set = NULL, @@ -18,7 +18,7 @@ \item{ntop}{Numeric scalar specifying the number of most variable features to use for PCA.} -\item{exprs_values}{Integer scalar or string indicating which assay of \code{object} should be used to obtain the expression values for the calculations.} +\item{exprs_values}{Integer scalar or string indicating which assay of \code{x} contains the expression values of interest.} \item{feature_set}{Character vector of row names, a logical vector or a numeric vector of indices indicating a set of features to use for PCA. This will override any \code{ntop} argument if specified.} @@ -26,50 +26,35 @@ This will override any \code{ntop} argument if specified.} \item{scale_features}{Logical scalar, should the expression values be standardised so that each feature has unit variance? This will also remove features with standard deviations below 1e-8.} -\item{use_coldata}{Logical scalar specifying whether the column data should be used instead of expression values to perform PCA.} +\item{use_coldata}{Deprecated, use \code{\link{runColDataPCA}} instead.} -\item{selected_variables}{List of strings or a character vector indicating which variables in \code{colData(object)} to use for PCA when \code{use_coldata=TRUE}. -If a list, each entry can take the form described in \code{?"\link{scater-vis-var}"}.} +\item{selected_variables}{Deprecated, use \code{\link{runColDataPCA}} instead.} -\item{detect_outliers}{Logical scalar, should outliers be detected based on PCA coordinates generated from column-level metadata?} +\item{detect_outliers}{Deprecated, use \code{\link{runColDataPCA}} instead.} \item{BSPARAM}{A \linkS4class{BiocSingularParam} object specifying which algorithm should be used to perform the PCA.} \item{BPPARAM}{A \linkS4class{BiocParallelParam} object specifying whether the PCA should be parallelized.} } \value{ -A SingleCellExperiment object containing the first \code{ncomponent} principal coordinates for each cell. -If \code{use_coldata=FALSE}, this is stored in the \code{"PCA"} entry of the \code{reducedDims} slot. -Otherwise, it is stored in the \code{"PCA_coldata"} entry. - +A SingleCellExperiment object containing the first \code{ncomponent} principal coordinates for each cell, +stored in the \code{"PCA"} entry of the \code{reducedDims} slot. The proportion of variance explained by each PC is stored as a numeric vector in the \code{"percentVar"} attribute of the reduced dimension matrix. -Note that this will only be of length equal to \code{ncomponents} when \code{method} is not \code{"prcomp"}. -This is because approximate PCA methods do not compute singular values for all components. } \description{ -Perform a principal components analysis (PCA) on cells, based on the data in a SingleCellExperiment object. +Perform a principal components analysis (PCA) on cells, +based on the expression data in a SingleCellExperiment object. } \details{ -The function \code{\link{prcomp}} is used internally to do the PCA when \code{method="prcomp"}. -Alternatively, the \pkg{irlba} package can be used, which performs a fast approximation of PCA through the \code{\link[irlba]{prcomp_irlba}} function. -This is especially useful for large, sparse matrices. - -Note that \code{\link[irlba]{prcomp_irlba}} involves a random initialization, after which it converges towards the exact PCs. +Algorithms like \code{BSPARAM=IrlbaParam()} or \code{RandomParam()} involve +a random initialization, after which it converges towards the exact PCs. This means that the result will change slightly across different runs. -For full reproducibility, users should call \code{\link{set.seed}} prior to running \code{runPCA} with \code{method="irlba"}. +For full reproducibility, users should call \code{\link{set.seed}} prior to running \code{runPCA} with such algorithms. -If \code{use_coldata=TRUE}, PCA will be performed on column-level metadata instead of the gene expression matrix. -The \code{selected_variables} defaults to a vector containing: -\itemize{ -\item \code{"pct_counts_top_100_features"} -\item \code{"total_features_by_counts"} -\item \code{"pct_counts_feature_control"} -\item \code{"total_features_feature_control"} -\item \code{"log10_total_counts_endogenous"} -\item \code{"log10_total_counts_feature_control"} -} -This can be useful for identifying outliers cells based on QC metrics, especially when combined with \code{detect_outliers=TRUE}. -If outlier identification is enabled, the \code{outlier} field of the output \code{colData} will contain the identified outliers. +In the returned output, +the vector of proportion of variance explained may not have length equal to the total number of available PCs. +This is because not all PCA methods are guaranteed to compute singular values for all components. +As such, the proportions of variance explained - while accurate - may not sum to unity. } \examples{ ## Set up an example SingleCellExperiment @@ -86,7 +71,7 @@ reducedDimNames(example_sce) head(reducedDim(example_sce)) } \seealso{ -\code{\link{prcomp}}, \code{\link[scater]{plotPCA}} +\code{\link{runPCA}}, \code{\link[scater]{plotPCA}} } \author{ Aaron Lun, based on code by Davis McCarthy