adjustments for CRAN submission

tkonopka · Jun 25, 2018 · 88dc14b · 88dc14b
1 parent 66d2e97
commit 88dc14b
Show file tree

Hide file tree

Showing 75 changed files with 197 additions and 38 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -6,6 +6,7 @@
 .git
 
 ^README
+^LICENSE.md
 
 ^images$
 
@@ -21,4 +22,3 @@
 ^vignettes/.*html$
 ^vignettes/.*png$
 
-
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,10 +1,10 @@
 Package: umap
-Title: Implementation of Uniform Manifold Approximation and Projection in R
-Version: 0.1.0.0
+Title: Uniform Manifold Approximation and Projection
+Version: 0.1.0.3
 Authors@R: person("Tomasz", "Konopka", , "[email protected]", role = c("aut", "cre"))
 Author: Tomasz Konopka [aut, cre]
 Maintainer: Tomasz Konopka <[email protected]>
-Description: Uniform Manifold Approximation and Projection is a technique for dimension reduction.
+Description: Uniform manifold approximation and projection is a technique for dimension reduction. The algorithm was described by McInnes and Healy (2018) in <arXiv:1802.03426>. This package provides an interface for two implementations. One is written from scratch, including components for nearest-neighbor search and for embedding. The second implementation is a wrapper for 'python' package 'umap-learn' (requires separate installation, see vignette for more details).
 Depends:
     R (>= 3.1.2)
 Imports:

diff --git a/LICENSE b/LICENSE
@@ -1,21 +1,2 @@
-The MIT License (MIT)
-
-Copyright (c) 2018 Tomasz Konopka
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+YEAR: 2018
+COPYRIGHT HOLDER: Tomasz Konopka
diff --git a/LICENSE.md b/LICENSE.md
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2018 Tomasz Konopka
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -3,6 +3,7 @@
 
 #' perform a compound transformation on a vector, including clipping
 #'
+#' @keywords internal
 #' @param x numeric vector
 #' @param inner numeric constant
 #' @param outer numeric constan
@@ -14,6 +15,7 @@ clip4 <- function(x, inner, outer) {
 
 #' compute Euclidean distance between two vectors
 #'
+#' @keywords internal
 #' @param x numeric vector
 #' @param y numeric vector
 #'
@@ -24,6 +26,7 @@ dEuclidean <- function(x, y) {
 
 #' compute Euclidean distances
 #'
+#' @keywords internal
 #' @param m matrix with raw data
 #'
 #' @return dEuclidean norms between first row and all other rows
@@ -33,6 +36,7 @@ mdEuclidean <- function(m) {
 
 #' compute Manhattan distance between two vectors
 #'
+#' @keywords internal
 #' @param x numeric vector
 #' @param y numeric vector
 #'
@@ -43,6 +47,7 @@ dManhattan <- function(x, y) {
 
 #' compute Manhattan distances
 #'
+#' @keywords internal
 #' @param m matrix with raw data
 #'
 #' @return dManhattan norms between origin and targets
@@ -57,6 +62,7 @@ mdManhattan <- function(m) {
 #' Important: this function assumes that data has been centered
 #' i.e. that mean(x) = mean(y) = 0
 #'
+#' @keywords internal
 #' @param x numeric vector
 #' @param y numeric vector
 #'
@@ -67,6 +73,7 @@ dCenteredPearson <- function(x, y) {
 
 #' compute pearson correlation distances 
 #'
+#' @keywords internal
 #' @param m matrix with raw data
 #'
 #' @return dCenteredPearson norms between first row and all other rows
@@ -78,6 +85,7 @@ mdCenteredPearson <- function(m) {
 #'
 #' Note: values output from this function do not satisfy the triangle inequality
 #'
+#' @keywords internal
 #' @param x numeric vector
 #' @param y numeric vector
 #'
@@ -88,6 +96,7 @@ dCosine <- function(x, y) {
 
 #' compute cosine distances
 #'
+#' @keywords internal
 #' @param m matrix with raw data
 #'
 #' @return dCosine norms between first row and all other rows

diff --git a/R/coo.R b/R/coo.R
@@ -5,6 +5,7 @@
 
 ##' Create a coo representation of a square matrix
 ##'
+##' @keywords internal
 ##' @param x square matrix
 ##'
 ##' @return matrix with three columns (from index, to index, value)
@@ -36,7 +37,7 @@ coo = function(x) {
 
 ##' Helper to construct coo objects
 ##'
-##'
+##' @keywords internal
 ##' @param x coo matrix
 ##' @param names character vector
 ##' @param n.elements integer
@@ -59,6 +60,7 @@ make.coo = function(x, names, n.elements) {
 
 ##' Stop execution with a custom message
 ##'
+##' @keywords internal
 ##' @param msg1 character
 ##' @param msg2 character
 stop.coo = function(msg1, msg2="") {
@@ -71,6 +73,7 @@ stop.coo = function(msg1, msg2="") {
 
 ##' Check class for coo
 ##'
+##' @keywords internal
 ##' @param x object of class coo
 ##' @param msg character, message to print alongside error
 check.coo = function(x, msg="") {
@@ -82,6 +85,7 @@ check.coo = function(x, msg="") {
 
 ##' Check that two coo objects are compatible for addition, multiplication
 ##'
+##' @keywords internal
 ##' @param x object of class coo
 ##' @param y object of class coos
 ##' @param msg character, message to print alongside error
@@ -103,6 +107,7 @@ check.compatible.coo = function(x, y, msg="") {
 
 ##' Remove some entires in a coo matrix where values are zero
 ##'
+##' @keywords internal
 ##' @param x coo object
 ##'
 ##' @return matrix based on x, perhaps with some lines in original removed
@@ -119,6 +124,7 @@ reduce.coo = function(x) {
 
 ##' Transpose a coo matrix
 ##'
+##' @keywords internal
 ##' @param x coo object
 ##'
 ##' @return another coo object describing a transposed matrix
@@ -137,6 +143,7 @@ t.coo = function(x) {
 ##'
 ##' The two input objects must be compatible (have equivalent names)
 ##'
+##' @keywords internal
 ##' @param x coo object
 ##' @param y coo object
 ##' @param a numeric, scalar for multiplication
@@ -164,6 +171,7 @@ multiply.coo = function(x, y, a=1) {
 
 ##' Add two coo objects element-wise
 ##'
+##' @keywords internal
 ##' @param x coo object
 ##' @param y coo object
 ##' @param a numeric, scalar for addition
@@ -190,6 +198,7 @@ add.coo = function(x, y, a=1, b=1) {
 
 ##' Prepare a coo object by splitting a coo
 ##'
+##' @keywords internal
 ##' @param x coo object
 ##'
 ##' @return list with two components $to and $from
@@ -207,6 +216,7 @@ multiplicationprep.coo = function(x) {
 
 ##' Matrix multiplication of a coo matrix with a vector
 ##'
+##' @keywords internal
 ##' @param x coo object
 ##' @param v numeric vector
 ##' @param xprep list with values in x split using multiplicationprep
@@ -245,6 +255,7 @@ vectormultiplication.coo = function(x, v, xprep=NULL) {
 
 ##' Convert from coo object into conventional matrix
 ##'
+##' @keywords internal
 ##' @param x coo object
 ##'
 ##' @return matrix

diff --git a/R/coo_spectral.R b/R/coo_spectral.R
@@ -5,6 +5,7 @@
 
 ##' Get a set of k eigenvalues and eigenvectors
 ##'
+##' @keywords internal
 ##' @param x coo object
 ##' @param k integer, number of eigenvalues/eigenvectors
 ##' @param m integer, number of lanczos vectors to use
@@ -40,6 +41,7 @@ spectral.coo = function(x, k, m=2*k+1) {
 
 ##' Construct an identity matrix
 ##'
+##' @keywords internal
 ##' @param n.elements integer, number of elements
 ##' @param names character vector, names associated with the elements
 ##'
@@ -62,6 +64,7 @@ identity.coo = function(n.elements, names=NULL) {
 
 ##' Subset a coo
 ##'
+##' @keywords internal
 ##' @param x coo object
 ##' @param items items (indexes) to keep
 ##'
@@ -109,6 +112,7 @@ subset.coo = function(x, items) {
 ##' This implementation constructs the laplacian  element-by-element.
 ##' Diagonals: 1, Element_ij = -1/sqrt(deg_i deg_j)
 ##'
+##' @keywords internal
 ##' @param x coo object encoding a graph
 ##'
 ##' @return new coo object 
@@ -150,6 +154,7 @@ laplacian.coo = function(x) {
 
 ##' Count the number of connected components in a coo graph
 ##'
+##' @keywords internal
 ##' @param x coo object
 ##'
 ##' @return list with number of connected components and a vector
@@ -207,6 +212,7 @@ concomp.coo = function(x) {
 ##'
 ##' This does not work very well (eigenvectors don't match svd()). Help would be appreciated.
 ##'
+##' @keywords internal
 ##' @param x coo object
 ##' @param k integer, number of vectors to optimize
 ##' @param m integer, number of vectors to use in procedure (set higher than k)

diff --git a/R/knn.R b/R/knn.R
@@ -11,6 +11,7 @@
 ##' By definition, the first nearest neighbor to each point is the point itself.
 ##' Subsequent neighbors are "true" neighbors.
 ##'
+##' @keywords internal
 ##' @param d dist object or matrix with distances
 ##' @param k integer, number of neighbors
 ##'
@@ -65,6 +66,7 @@ knn.from.dist = function(d, k) {
 ##' algorithm is roughly inspired by Dong et al, but there are differences.
 ##' This is a rough implementation and improvements are possible.
 ##'
+##' @keywords internal
 ##' @param d matrix with data
 ##' @param k integer, number of neighbors
 ##' @param metric.function function with signature f(a, b) that returns a metric distance
@@ -207,6 +209,7 @@ knn.from.data = function(d, k, metric.function, subsample.k=0.5) {
 
 ##' Repeat knn.from.data multiple times, pick the best neighbors
 ##'
+##' @keywords internal
 ##' @param d matrix with data
 ##' @param k integer, number of neighbors
 ##' @param metric.function function with signature f(a, b) that returns a metric distance

diff --git a/R/umap.R b/R/umap.R
@@ -42,7 +42,7 @@ python.umap = NULL
 
 ##' Default configuration for umap 
 ##'
-##' A list with parameters customizing a UMAP projection. Each component of the
+##' A list with parameters customizing a UMAP embedding. Each component of the
 ##' list is an effective argument for umap().
 ##'
 ##' n.neighbors: integer; number of nearest neighbors
@@ -75,7 +75,7 @@ python.umap = NULL
 ##' set.op.mix.ratio: numeric in range [0,1]; determines who the knn-graph
 ##' is used to create a fuzzy simplicial graph
 ##'
-##' local.connectivity: numeric; used during construction of fuzzy simplicail set
+##' local.connectivity: numeric; used during construction of fuzzy simplicial set
 ##'
 ##' bandwidth: numeric; used during construction of fuzzy simplicial set
 ##'
@@ -100,6 +100,15 @@ python.umap = NULL
 ##'
 ##' verbose: logical or integer; determines whether to show progress messages
 ##'
+##' @examples
+##' # display all default settings
+##' umap.defaults
+##'
+##' # create a new settings object with n.neighbors set to 5
+##' custom.settings = umap.defaults
+##' custom.settings$n.neighbors = 5
+##' custom.settings
+##' 
 ##' @export
 umap.defaults = list(
   n.neighbors=15,
@@ -132,12 +141,24 @@ class(umap.defaults) = "umap.config"
 ##' @param d matrix, input data
 ##' @param config object of class umap.config
 ##' @param method character, implementation. Available methods are 'naive'
-##' (an implementation written in pure R) and 'python' (requires python package 'umap')
-##' @param ... list of settings; overwrite settings in config
+##' (an implementation written in pure R) and 'python' (requires python package
+##' 'umap-learn')
+##' @param ... list of settings; overwrite default values from config
 ##'
 ##' @return object of class umap, containing at least a component
 ##' with an embedding and a component with configuration settings
 ##'
+##' @examples
+##' # embedd iris dataset
+##' # (using default settings, but with reduced number of epochs)
+##' iris.umap = umap(iris[,1:4], n.epochs=20)
+##'
+##' # display object summary
+##' iris.umap
+##'
+##' # display embedding coordinates
+##' head(iris.umap$layout)
+##'
 ##' @export
 umap = function(d, config=umap.defaults, method=c("naive", "python"), ...) {
 

diff --git a/R/umap_checks.R b/R/umap_checks.R
@@ -4,6 +4,7 @@
 
 ##' Validator functions for umap settings
 ##'
+##' @keywords internal
 ##' @param config list with umap arguments
 ##' @param ... other arguments
 ##'
@@ -70,6 +71,7 @@ umap.check.config = function(config=umap.defaults, ...) {
 
 ##' Prep primary input as a data matrix
 ##'
+##' @keywords internal
 ##' @param d matrix or compatible
 ##' @param config list with settings
 ##'
@@ -100,6 +102,7 @@ umap.prep.input = function(d, config) {
 
 ##' stop execution with a custom error message
 ##'
+##' @keywords internal
 ##' @param ... strings for error message
 umap.error = function(...) {
   x = paste(..., collapse=" ")