From 16625ee04b1d18fe96500bca72bb65c81c045b2c Mon Sep 17 00:00:00 2001
From: LTLA <infinite.monkeys.with.keyboards@gmail.com>
Date: Thu, 13 Jun 2019 22:03:50 -0700
Subject: [PATCH] More cache-efficient version of sumCountsAcrossFeatures.

---
 R/sumCountsAcrossFeatures.R | 20 ++++++----
 src/init.cpp                |  2 +-
 src/scater.h                |  2 +-
 src/sum_counts.cpp          | 74 ++++++++++++++++++-------------------
 4 files changed, 49 insertions(+), 49 deletions(-)
diff --git a/R/sumCountsAcrossFeatures.R b/R/sumCountsAcrossFeatures.R
index 2dcc5eb0..b8ac0e02 100644
--- a/R/sumCountsAcrossFeatures.R
+++ b/R/sumCountsAcrossFeatures.R
@@ -8,6 +8,7 @@
 #' @param BPPARAM A \linkS4class{BiocParallelParam} object specifying whether summation should be parallelized.
 #'
 #' @return A count matrix where counts for all features in the same set are summed together within each cell.
+#' Rows are ordered according to \code{levels(ids)}.
 #'
 #' @details
 #' This function provides a convenient method for aggregating counts across multiple rows for each cell.
@@ -47,21 +48,24 @@ sumCountsAcrossFeatures <- function(object, ids, exprs_values="counts", BPPARAM=
         object <- assay(object, exprs_values, withDimnames=FALSE)
     }
 
-    by_set <- split(seq_along(ids) - 1L, ids)
+    ids <- as.factor(ids)
+    ans <- integer(length(ids))
+    ans <- as.integer(ids) - 1L
+
     assignments <- .assign_jobs_to_workers(ncol(object), BPPARAM)
-    out_list <- bpmapply(start=assignments$start, end=assignments$end, FUN=.sum_across_rows_internal, 
-        MoreArgs=list(by_set=by_set, mat=object), BPPARAM=BPPARAM, SIMPLIFY=FALSE, USE.NAMES=FALSE)
+    out_list <- bpmapply(start=assignments$start, end=assignments$end, 
+        FUN=.sum_across_rows_internal, 
+        MoreArgs=list(by_set=ans, ntotal=nlevels(ids), mat=object), 
+        BPPARAM=BPPARAM, SIMPLIFY=FALSE, USE.NAMES=FALSE)
 
     out <- do.call(cbind, out_list)
     colnames(out) <- colnames(object)
-    rownames(out) <- names(by_set)
+    rownames(out) <- levels(ids)
     out
 }
 
-.sum_across_rows_internal <- function(mat, by_set, start, end) 
+.sum_across_rows_internal <- function(mat, by_set, ntotal, start, end) 
 # Internal function to drag along the namespace.
 {
-    .Call(cxx_sum_row_counts, mat, by_set, start, end)
+    .Call(cxx_sum_row_counts, mat, by_set, ntotal, start, end)
 }
-
-
diff --git a/src/init.cpp b/src/init.cpp
index 8357587b..3005e708 100644
--- a/src/init.cpp
+++ b/src/init.cpp
@@ -13,7 +13,7 @@ static const R_CallMethodDef all_call_entries[] = {
     REGISTER(combined_qc, 7),
     REGISTER(top_cumprop, 2),
 
-    REGISTER(sum_row_counts, 4),
+    REGISTER(sum_row_counts, 5),
     REGISTER(sum_col_counts, 4),
 
     REGISTER(row_above, 4),
diff --git a/src/scater.h b/src/scater.h
index 17ab9baf..aac31a5c 100644
--- a/src/scater.h
+++ b/src/scater.h
@@ -17,7 +17,7 @@ SEXP combined_qc(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
 SEXP top_cumprop(SEXP, SEXP);
 
 
-SEXP sum_row_counts(SEXP, SEXP, SEXP, SEXP);
+SEXP sum_row_counts(SEXP, SEXP, SEXP, SEXP, SEXP);
 
 SEXP sum_col_counts(SEXP, SEXP, SEXP, SEXP);
 
diff --git a/src/sum_counts.cpp b/src/sum_counts.cpp
index 9cb9426e..a3e6de5b 100644
--- a/src/sum_counts.cpp
+++ b/src/sum_counts.cpp
@@ -8,25 +8,17 @@
 #include <stdexcept>
 #include <vector>
 
-std::vector<Rcpp::IntegerVector> create_summation(Rcpp::List Sumset) {
-    const size_t nsummations=Sumset.size();
-    std::vector<Rcpp::IntegerVector> summation_set(Sumset.size());
-    for (size_t fx=0; fx<nsummations; ++fx) {
-        summation_set[fx]=Sumset[fx];
-    }
-    return summation_set;
-}
-
 /***************************
  * Summing across features *
  ***************************/
 
 template <class M, class O>
-Rcpp::RObject sum_row_counts_internal(Rcpp::RObject input, const std::vector<Rcpp::IntegerVector>& summable_set, size_t start_index, size_t end_index) {
+Rcpp::RObject sum_row_counts_internal(Rcpp::RObject input, const Rcpp::IntegerVector& summable_set, 
+    size_t nsummations, size_t start_index, size_t end_index) 
+{
     auto mat=beachmat::create_matrix<M>(input);
     const size_t ncells=mat->get_ncol();
 
-    const size_t nsummations=summable_set.size();
     typename M::vector holder_out(nsummations);
     if (end_index > ncells) {
         throw std::runtime_error("end index out of range");
@@ -44,41 +36,38 @@ Rcpp::RObject sum_row_counts_internal(Rcpp::RObject input, const std::vector<Rcp
     for (size_t c=start_index; c<end_index; ++c) {
         col_holder.fill(c);
         auto it=col_holder.get_values();
-        auto ssIt=summable_set.begin();
-
-        for (auto& curval : holder_out) {
-            const auto& curset=(*ssIt);
-            curval=0;
-            for (auto feat : curset) {
-                curval += *(it + feat);
+        for (auto s : summable_set) {
+            if (s!=NA_INTEGER) {
+                holder_out[s]+=*it;
             }
-            ++ssIt;
+            ++it;
         }
 
         out->set_col(c - start_index, holder_out.begin());
+        std::fill(holder_out.begin(), holder_out.end(), 0);
     }
     return out->yield();
 }
 
-SEXP sum_row_counts (SEXP counts, SEXP sumset, SEXP job_start, SEXP job_end) {
+SEXP sum_row_counts (SEXP counts, SEXP sumset, SEXP nsums, SEXP job_start, SEXP job_end) {
     BEGIN_RCPP
 
-    auto summation_set=create_summation(sumset);
-
     // Determining the type and amount of work to do.
     size_t start_index=check_integer_scalar(job_start, "start index");
     size_t end_index=check_integer_scalar(job_end, "end index");
-    if (end_index < start_index) { throw std::runtime_error("start index is less than end index"); }
-    const size_t ncells=end_index - start_index;
+    if (end_index < start_index) { 
+        throw std::runtime_error("start index is less than end index"); 
+    }
+    const size_t nsummations=check_integer_scalar(nsums, "number of sets");
 
     auto mattype=beachmat::find_sexp_type(counts);
     if (mattype==INTSXP) {
         return sum_row_counts_internal<beachmat::integer_matrix,
-           beachmat::integer_output>(counts, summation_set, start_index, end_index);
+           beachmat::integer_output>(counts, sumset, nsummations, start_index, end_index);
 
     } else if (mattype==REALSXP) {
         return sum_row_counts_internal<beachmat::numeric_matrix,
-           beachmat::numeric_output>(counts, summation_set, start_index, end_index);
+           beachmat::numeric_output>(counts, sumset, nsummations, start_index, end_index);
 
     } else {
         throw std::runtime_error("unacceptable matrix type");
@@ -91,7 +80,9 @@ SEXP sum_row_counts (SEXP counts, SEXP sumset, SEXP job_start, SEXP job_end) {
  ************************/
 
 template <class M, class O>
-Rcpp::RObject sum_col_counts_internal(Rcpp::RObject input, const std::vector<Rcpp::IntegerVector>& summable_set, size_t start_index, size_t end_index) {
+Rcpp::RObject sum_col_counts_internal(Rcpp::RObject input, const Rcpp::List& summable_set, 
+    size_t start_index, size_t end_index)
+{
     auto mat=beachmat::create_matrix<M>(input);
     const size_t ncells=mat->get_ncol();
     const size_t ngenes=mat->get_nrow();
@@ -104,10 +95,17 @@ Rcpp::RObject sum_col_counts_internal(Rcpp::RObject input, const std::vector<Rcp
 
     beachmat::output_param OPARAM(mat.get());
     const size_t njobs=end_index - start_index;
-    auto out=beachmat::create_output<O>(njobs, summable_set.size(), OPARAM);
-
-    size_t i_summed=0;
-    for (const auto& curset : summable_set) {
+    const size_t nsummations=summable_set.size();
+    auto out=beachmat::create_output<O>(njobs, nsummations, OPARAM);
+
+    /* We do the rather unintuitive approach of running across each set
+     * and summing columns, rather than running across columns and adding
+     * them to the different sets as we go along. This approach has the
+     * benefit of avoiding the need to hold intermediate sums, which may
+     * be costly to re-extract depending on the backend used in output_param.
+     */
+    for (size_t i_summed=0; i_summed!=summable_set.size(); ++i_summed) {
+        Rcpp::IntegerVector curset=summable_set[i_summed];
         std::fill(holder_out.begin()+start_index, holder_out.begin()+end_index, 0);
 
         for (auto cell : curset) {
@@ -126,10 +124,9 @@ Rcpp::RObject sum_col_counts_internal(Rcpp::RObject input, const std::vector<Rcp
                     holder_out[i] += *val;
                 }
             }
-        }
+        } 
 
         out->set_col(i_summed, holder_out.begin()+start_index);
-        ++i_summed;
     }
     return out->yield();
 }
@@ -137,25 +134,24 @@ Rcpp::RObject sum_col_counts_internal(Rcpp::RObject input, const std::vector<Rcp
 SEXP sum_col_counts (SEXP counts, SEXP sumset, SEXP job_start, SEXP job_end) {
     BEGIN_RCPP
 
-    auto summation_set=create_summation(sumset);
-
     // Determining the type and amount of work to do.
     size_t start_index=check_integer_scalar(job_start, "start index");
     size_t end_index=check_integer_scalar(job_end, "end index");
-    if (end_index < start_index) { throw std::runtime_error("start index is less than end index"); }
+    if (end_index < start_index) { 
+        throw std::runtime_error("start index is less than end index"); 
+    }
 
     auto mattype=beachmat::find_sexp_type(counts);
     if (mattype==INTSXP) {
         return sum_col_counts_internal<beachmat::integer_matrix,
-           beachmat::integer_output>(counts, summation_set, start_index, end_index);
+           beachmat::integer_output>(counts, sumset, start_index, end_index);
 
     } else if (mattype==REALSXP) {
         return sum_col_counts_internal<beachmat::numeric_matrix,
-           beachmat::numeric_output>(counts, summation_set, start_index, end_index);
+           beachmat::numeric_output>(counts, sumset, start_index, end_index);
 
     } else {
         throw std::runtime_error("unacceptable matrix type");
     }
     END_RCPP
 } 
-