diff --git a/.Rbuildignore b/.Rbuildignore index 4f2a4af..bdf666a 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -6,6 +6,7 @@ .git ^README +^LICENSE.md ^images$ @@ -21,4 +22,3 @@ ^vignettes/.*html$ ^vignettes/.*png$ - diff --git a/DESCRIPTION b/DESCRIPTION index 9c53030..d52a280 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,10 +1,10 @@ Package: umap -Title: Implementation of Uniform Manifold Approximation and Projection in R -Version: 0.1.0.0 +Title: Uniform Manifold Approximation and Projection +Version: 0.1.0.3 Authors@R: person("Tomasz", "Konopka", , "tokonopka@gmail.com", role = c("aut", "cre")) Author: Tomasz Konopka [aut, cre] Maintainer: Tomasz Konopka -Description: Uniform Manifold Approximation and Projection is a technique for dimension reduction. +Description: Uniform manifold approximation and projection is a technique for dimension reduction. The algorithm was described by McInnes and Healy (2018) in . This package provides an interface for two implementations. One is written from scratch, including components for nearest-neighbor search and for embedding. The second implementation is a wrapper for 'python' package 'umap-learn' (requires separate installation, see vignette for more details). Depends: R (>= 3.1.2) Imports: diff --git a/LICENSE b/LICENSE index 797f8ab..c1358d4 100644 --- a/LICENSE +++ b/LICENSE @@ -1,21 +1,2 @@ -The MIT License (MIT) - -Copyright (c) 2018 Tomasz Konopka - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +YEAR: 2018 +COPYRIGHT HOLDER: Tomasz Konopka \ No newline at end of file diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..797f8ab --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2018 Tomasz Konopka + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/R/RcppExports.R b/R/RcppExports.R index d3a7290..645c46f 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -3,6 +3,7 @@ #' perform a compound transformation on a vector, including clipping #' +#' @keywords internal #' @param x numeric vector #' @param inner numeric constant #' @param outer numeric constan @@ -14,6 +15,7 @@ clip4 <- function(x, inner, outer) { #' compute Euclidean distance between two vectors #' +#' @keywords internal #' @param x numeric vector #' @param y numeric vector #' @@ -24,6 +26,7 @@ dEuclidean <- function(x, y) { #' compute Euclidean distances #' +#' @keywords internal #' @param m matrix with raw data #' #' @return dEuclidean norms between first row and all other rows @@ -33,6 +36,7 @@ mdEuclidean <- function(m) { #' compute Manhattan distance between two vectors #' +#' @keywords internal #' @param x numeric vector #' @param y numeric vector #' @@ -43,6 +47,7 @@ dManhattan <- function(x, y) { #' compute Manhattan distances #' +#' @keywords internal #' @param m matrix with raw data #' #' @return dManhattan norms between origin and targets @@ -57,6 +62,7 @@ mdManhattan <- function(m) { #' Important: this function assumes that data has been centered #' i.e. that mean(x) = mean(y) = 0 #' +#' @keywords internal #' @param x numeric vector #' @param y numeric vector #' @@ -67,6 +73,7 @@ dCenteredPearson <- function(x, y) { #' compute pearson correlation distances #' +#' @keywords internal #' @param m matrix with raw data #' #' @return dCenteredPearson norms between first row and all other rows @@ -78,6 +85,7 @@ mdCenteredPearson <- function(m) { #' #' Note: values output from this function do not satisfy the triangle inequality #' +#' @keywords internal #' @param x numeric vector #' @param y numeric vector #' @@ -88,6 +96,7 @@ dCosine <- function(x, y) { #' compute cosine distances #' +#' @keywords internal #' @param m matrix with raw data #' #' @return dCosine norms between first row and all other rows diff --git a/R/coo.R b/R/coo.R index ac1844f..6cf6695 100644 --- a/R/coo.R +++ b/R/coo.R @@ -5,6 +5,7 @@ ##' Create a coo representation of a square matrix ##' +##' @keywords internal ##' @param x square matrix ##' ##' @return matrix with three columns (from index, to index, value) @@ -36,7 +37,7 @@ coo = function(x) { ##' Helper to construct coo objects ##' -##' +##' @keywords internal ##' @param x coo matrix ##' @param names character vector ##' @param n.elements integer @@ -59,6 +60,7 @@ make.coo = function(x, names, n.elements) { ##' Stop execution with a custom message ##' +##' @keywords internal ##' @param msg1 character ##' @param msg2 character stop.coo = function(msg1, msg2="") { @@ -71,6 +73,7 @@ stop.coo = function(msg1, msg2="") { ##' Check class for coo ##' +##' @keywords internal ##' @param x object of class coo ##' @param msg character, message to print alongside error check.coo = function(x, msg="") { @@ -82,6 +85,7 @@ check.coo = function(x, msg="") { ##' Check that two coo objects are compatible for addition, multiplication ##' +##' @keywords internal ##' @param x object of class coo ##' @param y object of class coos ##' @param msg character, message to print alongside error @@ -103,6 +107,7 @@ check.compatible.coo = function(x, y, msg="") { ##' Remove some entires in a coo matrix where values are zero ##' +##' @keywords internal ##' @param x coo object ##' ##' @return matrix based on x, perhaps with some lines in original removed @@ -119,6 +124,7 @@ reduce.coo = function(x) { ##' Transpose a coo matrix ##' +##' @keywords internal ##' @param x coo object ##' ##' @return another coo object describing a transposed matrix @@ -137,6 +143,7 @@ t.coo = function(x) { ##' ##' The two input objects must be compatible (have equivalent names) ##' +##' @keywords internal ##' @param x coo object ##' @param y coo object ##' @param a numeric, scalar for multiplication @@ -164,6 +171,7 @@ multiply.coo = function(x, y, a=1) { ##' Add two coo objects element-wise ##' +##' @keywords internal ##' @param x coo object ##' @param y coo object ##' @param a numeric, scalar for addition @@ -190,6 +198,7 @@ add.coo = function(x, y, a=1, b=1) { ##' Prepare a coo object by splitting a coo ##' +##' @keywords internal ##' @param x coo object ##' ##' @return list with two components $to and $from @@ -207,6 +216,7 @@ multiplicationprep.coo = function(x) { ##' Matrix multiplication of a coo matrix with a vector ##' +##' @keywords internal ##' @param x coo object ##' @param v numeric vector ##' @param xprep list with values in x split using multiplicationprep @@ -245,6 +255,7 @@ vectormultiplication.coo = function(x, v, xprep=NULL) { ##' Convert from coo object into conventional matrix ##' +##' @keywords internal ##' @param x coo object ##' ##' @return matrix diff --git a/R/coo_spectral.R b/R/coo_spectral.R index f97747a..0aea908 100644 --- a/R/coo_spectral.R +++ b/R/coo_spectral.R @@ -5,6 +5,7 @@ ##' Get a set of k eigenvalues and eigenvectors ##' +##' @keywords internal ##' @param x coo object ##' @param k integer, number of eigenvalues/eigenvectors ##' @param m integer, number of lanczos vectors to use @@ -40,6 +41,7 @@ spectral.coo = function(x, k, m=2*k+1) { ##' Construct an identity matrix ##' +##' @keywords internal ##' @param n.elements integer, number of elements ##' @param names character vector, names associated with the elements ##' @@ -62,6 +64,7 @@ identity.coo = function(n.elements, names=NULL) { ##' Subset a coo ##' +##' @keywords internal ##' @param x coo object ##' @param items items (indexes) to keep ##' @@ -109,6 +112,7 @@ subset.coo = function(x, items) { ##' This implementation constructs the laplacian element-by-element. ##' Diagonals: 1, Element_ij = -1/sqrt(deg_i deg_j) ##' +##' @keywords internal ##' @param x coo object encoding a graph ##' ##' @return new coo object @@ -150,6 +154,7 @@ laplacian.coo = function(x) { ##' Count the number of connected components in a coo graph ##' +##' @keywords internal ##' @param x coo object ##' ##' @return list with number of connected components and a vector @@ -207,6 +212,7 @@ concomp.coo = function(x) { ##' ##' This does not work very well (eigenvectors don't match svd()). Help would be appreciated. ##' +##' @keywords internal ##' @param x coo object ##' @param k integer, number of vectors to optimize ##' @param m integer, number of vectors to use in procedure (set higher than k) diff --git a/R/knn.R b/R/knn.R index ed7a37b..9c0457c 100644 --- a/R/knn.R +++ b/R/knn.R @@ -11,6 +11,7 @@ ##' By definition, the first nearest neighbor to each point is the point itself. ##' Subsequent neighbors are "true" neighbors. ##' +##' @keywords internal ##' @param d dist object or matrix with distances ##' @param k integer, number of neighbors ##' @@ -65,6 +66,7 @@ knn.from.dist = function(d, k) { ##' algorithm is roughly inspired by Dong et al, but there are differences. ##' This is a rough implementation and improvements are possible. ##' +##' @keywords internal ##' @param d matrix with data ##' @param k integer, number of neighbors ##' @param metric.function function with signature f(a, b) that returns a metric distance @@ -207,6 +209,7 @@ knn.from.data = function(d, k, metric.function, subsample.k=0.5) { ##' Repeat knn.from.data multiple times, pick the best neighbors ##' +##' @keywords internal ##' @param d matrix with data ##' @param k integer, number of neighbors ##' @param metric.function function with signature f(a, b) that returns a metric distance diff --git a/R/umap.R b/R/umap.R index c09d397..9dcd940 100644 --- a/R/umap.R +++ b/R/umap.R @@ -42,7 +42,7 @@ python.umap = NULL ##' Default configuration for umap ##' -##' A list with parameters customizing a UMAP projection. Each component of the +##' A list with parameters customizing a UMAP embedding. Each component of the ##' list is an effective argument for umap(). ##' ##' n.neighbors: integer; number of nearest neighbors @@ -75,7 +75,7 @@ python.umap = NULL ##' set.op.mix.ratio: numeric in range [0,1]; determines who the knn-graph ##' is used to create a fuzzy simplicial graph ##' -##' local.connectivity: numeric; used during construction of fuzzy simplicail set +##' local.connectivity: numeric; used during construction of fuzzy simplicial set ##' ##' bandwidth: numeric; used during construction of fuzzy simplicial set ##' @@ -100,6 +100,15 @@ python.umap = NULL ##' ##' verbose: logical or integer; determines whether to show progress messages ##' +##' @examples +##' # display all default settings +##' umap.defaults +##' +##' # create a new settings object with n.neighbors set to 5 +##' custom.settings = umap.defaults +##' custom.settings$n.neighbors = 5 +##' custom.settings +##' ##' @export umap.defaults = list( n.neighbors=15, @@ -132,12 +141,24 @@ class(umap.defaults) = "umap.config" ##' @param d matrix, input data ##' @param config object of class umap.config ##' @param method character, implementation. Available methods are 'naive' -##' (an implementation written in pure R) and 'python' (requires python package 'umap') -##' @param ... list of settings; overwrite settings in config +##' (an implementation written in pure R) and 'python' (requires python package +##' 'umap-learn') +##' @param ... list of settings; overwrite default values from config ##' ##' @return object of class umap, containing at least a component ##' with an embedding and a component with configuration settings ##' +##' @examples +##' # embedd iris dataset +##' # (using default settings, but with reduced number of epochs) +##' iris.umap = umap(iris[,1:4], n.epochs=20) +##' +##' # display object summary +##' iris.umap +##' +##' # display embedding coordinates +##' head(iris.umap$layout) +##' ##' @export umap = function(d, config=umap.defaults, method=c("naive", "python"), ...) { diff --git a/R/umap_checks.R b/R/umap_checks.R index ed17617..0d01197 100644 --- a/R/umap_checks.R +++ b/R/umap_checks.R @@ -4,6 +4,7 @@ ##' Validator functions for umap settings ##' +##' @keywords internal ##' @param config list with umap arguments ##' @param ... other arguments ##' @@ -70,6 +71,7 @@ umap.check.config = function(config=umap.defaults, ...) { ##' Prep primary input as a data matrix ##' +##' @keywords internal ##' @param d matrix or compatible ##' @param config list with settings ##' @@ -100,6 +102,7 @@ umap.prep.input = function(d, config) { ##' stop execution with a custom error message ##' +##' @keywords internal ##' @param ... strings for error message umap.error = function(...) { x = paste(..., collapse=" ") diff --git a/R/umap_naive.R b/R/umap_naive.R index b47561a..1c23835 100644 --- a/R/umap_naive.R +++ b/R/umap_naive.R @@ -16,6 +16,7 @@ ##' This implementation is called naive because it is a rather straightforward ##' translation of the original python code. ##' +##' @keywords internal ##' @param d data object ##' @param config list with settings ##' @@ -64,6 +65,7 @@ umap.naive = function(d, config) { ##' create an embedding of graph into a low-dimensional space ##' +##' @keywords internal ##' @param g matrix, graph connectivity as coo ##' @param embedding matrix, coordinates for an initial graph embedding ##' @param config list with settings @@ -102,6 +104,7 @@ naive.simplicial.set.embedding = function(g, embedding, config) { ##' modify an existing embedding ##' +##' @keywords internal ##' @param embedding matrix holding an initial embedding ##' @param config list with settings ##' @param eps matrix with connectivity coo graph and epochs per sample; @@ -202,6 +205,7 @@ naive.optimize.embedding = function(embedding, config, eps) { ##' create a simplicial set from a distance object ##' +##' @keywords internal ##' @param knn list with inform about nearest neighbors (output of knn.info) ##' @param config list with settings ##' @@ -263,6 +267,7 @@ naive.fuzzy.simplicial.set = function(knn, config) { ##' compute a "smooth" distance to the kth neighbor and approximate first neighbor ##' +##' @keywords internal ##' @param k.dist matrix with distances to k neighbors ##' @param neighbors numeric, number of neighbors to approximate for ##' @param iterations integers, number of iterations diff --git a/R/umap_print.R b/R/umap_print.R index fb0da12..2e63a65 100644 --- a/R/umap_print.R +++ b/R/umap_print.R @@ -4,6 +4,7 @@ ##' Display a summary of a umap object ##' +##' @keywords internal ##' @param x umap object ##' @param ... other parameters (not used) ##' @@ -30,6 +31,7 @@ print.umap = function(x, ...) { ##' Display contents of a umap configuration ##' +##' @keywords internal ##' @param x object of class umap.config ##' @param ... ignored ##' @@ -63,6 +65,7 @@ print.umap.config = function(x, ...) { ##' Display summary of knn.info ##' +##' @keywords internal ##' @param x object of class umap.knn ##' @param ... ignored ##' diff --git a/R/umap_python.R b/R/umap_python.R index ee0488b..94a11b9 100644 --- a/R/umap_python.R +++ b/R/umap_python.R @@ -8,6 +8,7 @@ ##' Create a umap embedding using a python package ##' +##' @keywords internal ##' @param d data object ##' @param config list with settings ##' diff --git a/R/umap_small.R b/R/umap_small.R index 1e113a3..6ba8fa3 100644 --- a/R/umap_small.R +++ b/R/umap_small.R @@ -5,6 +5,7 @@ ##' Create an embedding object compatible with package umap for very small inputs ##' +##' @keywords internal ##' @param d matrix ##' @param config list with settings ##' diff --git a/R/umap_universal.R b/R/umap_universal.R index 85c2087..b6ed281 100644 --- a/R/umap_universal.R +++ b/R/umap_universal.R @@ -9,6 +9,7 @@ ##' This function determines whether to obtain knn information using an exact ##' brute force approach or using an approximate algorithm ##' +##' @keywords internal ##' @param d data matrix ##' @param config list with settings; relevant settings are as follows: ##' input - "data" or "dist" @@ -47,6 +48,7 @@ knn.info = function(d, config) { ##' Make an initial embedding with random coordinates ##' +##' @keywords internal ##' @param d integer, number of diemsions (columns) ##' @param V integer, number of vertices (rows) ##' @param lims numeric vector with lower and upper bounds @@ -61,6 +63,7 @@ make.random.embedding = function(d, V, lims=c(-10, 10)) { ##' Create a spectral embedding for a connectivity graph ##' +##' @keywords internal ##' @param d integer, number of dimensions ##' @param g coo object ##' @@ -121,6 +124,7 @@ make.spectral.embedding = function(d, g) { ##' ##' This either takes a set embedding from config, or sets a random state ##' +##' @keywords internal ##' @param V integer, number of vertices ##' @param config list with settings ##' @param g coo object with graph connectivity @@ -165,6 +169,7 @@ make.initial.embedding = function(V, config, g=NULL) { ##' This performs a brute-force search in parameter space. ##' The algorithm assumes a narrowing-down search will produce a decent fit. ##' +##' @keywords internal ##' @param spread numeric ##' @param min.dist numeric ##' @param alim numeric vector of length 2, initial search range for parameter a @@ -227,6 +232,7 @@ find.ab.params = function(spread, min.dist, ##' Compute a value to capture how often each item contributes to layout optimization ##' +##' @keywords internal ##' @param w numeric vector or matrix ##' @param epochs integer ##' @@ -250,6 +256,7 @@ make.epochs.per.sample = function(w, epochs) { ##' even better with Rcpp. This implementation exists mainly to ##' facilitate testing the Rcpp version. ##' +##' @keywords internal ##' @param x numeric; single value or a vector ##' @param xmax maximum value for x ##' @@ -264,6 +271,7 @@ clip = function(x, xmax=4) { ##' Adjust a matrix so that each column is centered around zero ##' +##' @keywords internal ##' @param x matrix ##' ##' @return matrix of same shape as x @@ -278,6 +286,7 @@ center.embedding = function(x) { ##' Compute vector norm ##' +##' @keywords internal ##' @param z numeric vector ##' ##' @return numeric, vector norm diff --git a/R/utils.R b/R/utils.R index f2812ec..277f49f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -8,9 +8,9 @@ ##' Send a message() with a prefix with a data ##' +##' @keywords internal ##' @param x character ##' @param verbose logical -##' message.w.date = function(x, verbose=FALSE) { if (verbose) { message(paste0("[",Sys.time(), "] ", x)) diff --git a/README.md b/README.md index 415f6f6..e31d183 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ R implementation of Uniform Manifold Approximation and Projection [![codecov](https://codecov.io/gh/tkonopka/umap/branch/master/graph/badge.svg)](https://codecov.io/gh/tkonopka/umap) -Uniform manifold appximation and projection (UMAP) is a technique for dimensional reduction. The original algorithm was proposed by [McLelland and Heyes](https://arxiv.org/abs/1802.03426) and +Uniform manifold appximation and projection (UMAP) is a technique for dimensional reduction. The original algorithm was proposed by [McInnes and Heyes](https://arxiv.org/abs/1802.03426) and implemented in a python package [umap](https://github.com/lmcinnes/umap). This package provides an interface to the UMAP algorithm in R, including a translation of the original algorithm into R with minimal dependencies. diff --git a/man/add.coo.Rd b/man/add.coo.Rd index ed05930..9049d83 100644 --- a/man/add.coo.Rd +++ b/man/add.coo.Rd @@ -21,3 +21,4 @@ new coo object with (a*x) + (b*y) \description{ Add two coo objects element-wise } +\keyword{internal} diff --git a/man/center.embedding.Rd b/man/center.embedding.Rd index d3d5858..dd9a033 100644 --- a/man/center.embedding.Rd +++ b/man/center.embedding.Rd @@ -15,3 +15,4 @@ matrix of same shape as x \description{ Adjust a matrix so that each column is centered around zero } +\keyword{internal} diff --git a/man/check.compatible.coo.Rd b/man/check.compatible.coo.Rd index dee3fe1..b11e1de 100644 --- a/man/check.compatible.coo.Rd +++ b/man/check.compatible.coo.Rd @@ -16,3 +16,4 @@ check.compatible.coo(x, y, msg = "") \description{ Check that two coo objects are compatible for addition, multiplication } +\keyword{internal} diff --git a/man/check.coo.Rd b/man/check.coo.Rd index aaa02f8..85ebb63 100644 --- a/man/check.coo.Rd +++ b/man/check.coo.Rd @@ -14,3 +14,4 @@ check.coo(x, msg = "") \description{ Check class for coo } +\keyword{internal} diff --git a/man/clip.Rd b/man/clip.Rd index eeab4f8..a6d33fb 100644 --- a/man/clip.Rd +++ b/man/clip.Rd @@ -20,3 +20,4 @@ the xmax value inside the function. However, performance is even better with Rcpp. This implementation exists mainly to facilitate testing the Rcpp version. } +\keyword{internal} diff --git a/man/clip4.Rd b/man/clip4.Rd index 33ead49..3598c8d 100644 --- a/man/clip4.Rd +++ b/man/clip4.Rd @@ -19,3 +19,4 @@ numeric vector of same length as x, containing outer*clip4(inner*x) \description{ perform a compound transformation on a vector, including clipping } +\keyword{internal} diff --git a/man/concomp.coo.Rd b/man/concomp.coo.Rd index 5e58010..e557dd9 100644 --- a/man/concomp.coo.Rd +++ b/man/concomp.coo.Rd @@ -16,3 +16,4 @@ assigning rows in object to components \description{ Count the number of connected components in a coo graph } +\keyword{internal} diff --git a/man/coo.Rd b/man/coo.Rd index 4854ced..ff4b94d 100644 --- a/man/coo.Rd +++ b/man/coo.Rd @@ -15,3 +15,4 @@ matrix with three columns (from index, to index, value) \description{ Create a coo representation of a square matrix } +\keyword{internal} diff --git a/man/coo2mat.Rd b/man/coo2mat.Rd index 1bdd429..5412417 100644 --- a/man/coo2mat.Rd +++ b/man/coo2mat.Rd @@ -15,3 +15,4 @@ matrix \description{ Convert from coo object into conventional matrix } +\keyword{internal} diff --git a/man/dCenteredPearson.Rd b/man/dCenteredPearson.Rd index 07847d4..97c47ff 100644 --- a/man/dCenteredPearson.Rd +++ b/man/dCenteredPearson.Rd @@ -21,3 +21,4 @@ Pearson distance is (1-r^2) Important: this function assumes that data has been centered i.e. that mean(x) = mean(y) = 0 } +\keyword{internal} diff --git a/man/dCosine.Rd b/man/dCosine.Rd index 9214344..91f0193 100644 --- a/man/dCosine.Rd +++ b/man/dCosine.Rd @@ -17,3 +17,4 @@ cosine dissimilarity between x and y \description{ Note: values output from this function do not satisfy the triangle inequality } +\keyword{internal} diff --git a/man/dEuclidean.Rd b/man/dEuclidean.Rd index e2ce80d..ccaf578 100644 --- a/man/dEuclidean.Rd +++ b/man/dEuclidean.Rd @@ -17,3 +17,4 @@ euclidean norm of x-y \description{ compute Euclidean distance between two vectors } +\keyword{internal} diff --git a/man/dManhattan.Rd b/man/dManhattan.Rd index 3229c99..332199a 100644 --- a/man/dManhattan.Rd +++ b/man/dManhattan.Rd @@ -17,3 +17,4 @@ manhattan norm of x-y \description{ compute Manhattan distance between two vectors } +\keyword{internal} diff --git a/man/find.ab.params.Rd b/man/find.ab.params.Rd index 6a81913..8f7e30e 100644 --- a/man/find.ab.params.Rd +++ b/man/find.ab.params.Rd @@ -26,3 +26,4 @@ for the given spread and min.dist. This performs a brute-force search in parameter space. The algorithm assumes a narrowing-down search will produce a decent fit. } +\keyword{internal} diff --git a/man/identity.coo.Rd b/man/identity.coo.Rd index 6efdea0..a7253ca 100644 --- a/man/identity.coo.Rd +++ b/man/identity.coo.Rd @@ -17,3 +17,4 @@ new coo object \description{ Construct an identity matrix } +\keyword{internal} diff --git a/man/knn.from.data.Rd b/man/knn.from.data.Rd index 1f8436e..51e6326 100644 --- a/man/knn.from.data.Rd +++ b/man/knn.from.data.Rd @@ -28,3 +28,4 @@ results that are nondeterministic and only approximately correct. The algorithm is roughly inspired by Dong et al, but there are differences. This is a rough implementation and improvements are possible. } +\keyword{internal} diff --git a/man/knn.from.data.reps.Rd b/man/knn.from.data.reps.Rd index 1056ce0..f7fd15f 100644 --- a/man/knn.from.data.reps.Rd +++ b/man/knn.from.data.reps.Rd @@ -23,3 +23,4 @@ list of same format as knn.from.data \description{ Repeat knn.from.data multiple times, pick the best neighbors } +\keyword{internal} diff --git a/man/knn.from.dist.Rd b/man/knn.from.dist.Rd index ebfb854..fee1cb3 100644 --- a/man/knn.from.dist.Rd +++ b/man/knn.from.dist.Rd @@ -25,3 +25,4 @@ that are nearest to each data point. The result is deterministic and exact. By definition, the first nearest neighbor to each point is the point itself. Subsequent neighbors are "true" neighbors. } +\keyword{internal} diff --git a/man/knn.info.Rd b/man/knn.info.Rd index 450783e..9ba667a 100644 --- a/man/knn.info.Rd +++ b/man/knn.info.Rd @@ -21,3 +21,4 @@ list with at least two components, indexes and distances This function determines whether to obtain knn information using an exact brute force approach or using an approximate algorithm } +\keyword{internal} diff --git a/man/lanczos.coo.Rd b/man/lanczos.coo.Rd index c71f65f..6ca5c2c 100644 --- a/man/lanczos.coo.Rd +++ b/man/lanczos.coo.Rd @@ -24,3 +24,4 @@ with restart implemented ad-hoc \details{ This does not work very well (eigenvectors don't match svd()). Help would be appreciated. } +\keyword{internal} diff --git a/man/laplacian.coo.Rd b/man/laplacian.coo.Rd index f92d997..043738a 100644 --- a/man/laplacian.coo.Rd +++ b/man/laplacian.coo.Rd @@ -16,3 +16,4 @@ new coo object This implementation constructs the laplacian element-by-element. Diagonals: 1, Element_ij = -1/sqrt(deg_i deg_j) } +\keyword{internal} diff --git a/man/make.coo.Rd b/man/make.coo.Rd index 5a71de3..c6f75e4 100644 --- a/man/make.coo.Rd +++ b/man/make.coo.Rd @@ -19,3 +19,4 @@ coo object \description{ Helper to construct coo objects } +\keyword{internal} diff --git a/man/make.epochs.per.sample.Rd b/man/make.epochs.per.sample.Rd index f9e32ed..d43ecaa 100644 --- a/man/make.epochs.per.sample.Rd +++ b/man/make.epochs.per.sample.Rd @@ -17,3 +17,4 @@ numeric vector of same length as w \description{ Compute a value to capture how often each item contributes to layout optimization } +\keyword{internal} diff --git a/man/make.initial.embedding.Rd b/man/make.initial.embedding.Rd index acae4af..bef4ad5 100644 --- a/man/make.initial.embedding.Rd +++ b/man/make.initial.embedding.Rd @@ -19,3 +19,4 @@ matrix with an embedding \description{ This either takes a set embedding from config, or sets a random state } +\keyword{internal} diff --git a/man/make.random.embedding.Rd b/man/make.random.embedding.Rd index d139186..8862a23 100644 --- a/man/make.random.embedding.Rd +++ b/man/make.random.embedding.Rd @@ -19,3 +19,4 @@ matrix (V,d) with random numbers \description{ Make an initial embedding with random coordinates } +\keyword{internal} diff --git a/man/make.spectral.embedding.Rd b/man/make.spectral.embedding.Rd index 122c783..160d8c0 100644 --- a/man/make.spectral.embedding.Rd +++ b/man/make.spectral.embedding.Rd @@ -17,3 +17,4 @@ embedding matrix. Might return NULL if spectral embedding fails \description{ Create a spectral embedding for a connectivity graph } +\keyword{internal} diff --git a/man/mdCenteredPearson.Rd b/man/mdCenteredPearson.Rd index b7a55f5..efc3f48 100644 --- a/man/mdCenteredPearson.Rd +++ b/man/mdCenteredPearson.Rd @@ -15,3 +15,4 @@ dCenteredPearson norms between first row and all other rows \description{ compute pearson correlation distances } +\keyword{internal} diff --git a/man/mdCosine.Rd b/man/mdCosine.Rd index 2e9a71a..e0c5a9c 100644 --- a/man/mdCosine.Rd +++ b/man/mdCosine.Rd @@ -15,3 +15,4 @@ dCosine norms between first row and all other rows \description{ compute cosine distances } +\keyword{internal} diff --git a/man/mdEuclidean.Rd b/man/mdEuclidean.Rd index 719b19f..c2253ac 100644 --- a/man/mdEuclidean.Rd +++ b/man/mdEuclidean.Rd @@ -15,3 +15,4 @@ dEuclidean norms between first row and all other rows \description{ compute Euclidean distances } +\keyword{internal} diff --git a/man/mdManhattan.Rd b/man/mdManhattan.Rd index 2253e09..75707f0 100644 --- a/man/mdManhattan.Rd +++ b/man/mdManhattan.Rd @@ -15,3 +15,4 @@ dManhattan norms between origin and targets \description{ compute Manhattan distances } +\keyword{internal} diff --git a/man/message.w.date.Rd b/man/message.w.date.Rd index 7ae8331..1189928 100644 --- a/man/message.w.date.Rd +++ b/man/message.w.date.Rd @@ -14,3 +14,4 @@ message.w.date(x, verbose = FALSE) \description{ Send a message() with a prefix with a data } +\keyword{internal} diff --git a/man/multiplicationprep.coo.Rd b/man/multiplicationprep.coo.Rd index e1cc9cb..462a1cc 100644 --- a/man/multiplicationprep.coo.Rd +++ b/man/multiplicationprep.coo.Rd @@ -15,3 +15,4 @@ list with two components $to and $from \description{ Prepare a coo object by splitting a coo } +\keyword{internal} diff --git a/man/multiply.coo.Rd b/man/multiply.coo.Rd index fd121ad..1134bbf 100644 --- a/man/multiply.coo.Rd +++ b/man/multiply.coo.Rd @@ -19,3 +19,4 @@ new coo object with produce a*x*y \description{ The two input objects must be compatible (have equivalent names) } +\keyword{internal} diff --git a/man/naive.fuzzy.simplicial.set.Rd b/man/naive.fuzzy.simplicial.set.Rd index 5598292..9cf5504 100644 --- a/man/naive.fuzzy.simplicial.set.Rd +++ b/man/naive.fuzzy.simplicial.set.Rd @@ -17,3 +17,4 @@ matrix \description{ create a simplicial set from a distance object } +\keyword{internal} diff --git a/man/naive.optimize.embedding.Rd b/man/naive.optimize.embedding.Rd index 24ef5fd..6419e2b 100644 --- a/man/naive.optimize.embedding.Rd +++ b/man/naive.optimize.embedding.Rd @@ -20,3 +20,4 @@ matrix of same dimension as initial embedding \description{ modify an existing embedding } +\keyword{internal} diff --git a/man/naive.simplicial.set.embedding.Rd b/man/naive.simplicial.set.embedding.Rd index e1be697..6ff1aad 100644 --- a/man/naive.simplicial.set.embedding.Rd +++ b/man/naive.simplicial.set.embedding.Rd @@ -20,3 +20,4 @@ nrows is from g, ncols determined from config \description{ create an embedding of graph into a low-dimensional space } +\keyword{internal} diff --git a/man/print.umap.Rd b/man/print.umap.Rd index 2be0808..f86309e 100644 --- a/man/print.umap.Rd +++ b/man/print.umap.Rd @@ -14,3 +14,4 @@ \description{ Display a summary of a umap object } +\keyword{internal} diff --git a/man/print.umap.config.Rd b/man/print.umap.config.Rd index 8f5773d..2e3d85d 100644 --- a/man/print.umap.config.Rd +++ b/man/print.umap.config.Rd @@ -14,3 +14,4 @@ \description{ Display contents of a umap configuration } +\keyword{internal} diff --git a/man/print.umap.knn.Rd b/man/print.umap.knn.Rd index a62247b..e612b2e 100644 --- a/man/print.umap.knn.Rd +++ b/man/print.umap.knn.Rd @@ -14,3 +14,4 @@ \description{ Display summary of knn.info } +\keyword{internal} diff --git a/man/reduce.coo.Rd b/man/reduce.coo.Rd index 32d26dc..b3ba1f8 100644 --- a/man/reduce.coo.Rd +++ b/man/reduce.coo.Rd @@ -15,3 +15,4 @@ matrix based on x, perhaps with some lines in original removed \description{ Remove some entires in a coo matrix where values are zero } +\keyword{internal} diff --git a/man/smooth.knn.dist.Rd b/man/smooth.knn.dist.Rd index 8c40aa6..790c074 100644 --- a/man/smooth.knn.dist.Rd +++ b/man/smooth.knn.dist.Rd @@ -29,3 +29,4 @@ and distance to the first nearest neighbor \description{ compute a "smooth" distance to the kth neighbor and approximate first neighbor } +\keyword{internal} diff --git a/man/spectral.coo.Rd b/man/spectral.coo.Rd index 8635fb6..3b42e6b 100644 --- a/man/spectral.coo.Rd +++ b/man/spectral.coo.Rd @@ -20,3 +20,4 @@ list with two components \description{ Get a set of k eigenvalues and eigenvectors } +\keyword{internal} diff --git a/man/stop.coo.Rd b/man/stop.coo.Rd index 75ba6e4..15693d0 100644 --- a/man/stop.coo.Rd +++ b/man/stop.coo.Rd @@ -14,3 +14,4 @@ stop.coo(msg1, msg2 = "") \description{ Stop execution with a custom message } +\keyword{internal} diff --git a/man/subset.coo.Rd b/man/subset.coo.Rd index fb09bbd..3a5d0bb 100644 --- a/man/subset.coo.Rd +++ b/man/subset.coo.Rd @@ -17,3 +17,4 @@ new smaller coo object \description{ Subset a coo } +\keyword{internal} diff --git a/man/t.coo.Rd b/man/t.coo.Rd index 6bf7b0d..38c4957 100644 --- a/man/t.coo.Rd +++ b/man/t.coo.Rd @@ -15,3 +15,4 @@ another coo object describing a transposed matrix \description{ Transpose a coo matrix } +\keyword{internal} diff --git a/man/umap.Rd b/man/umap.Rd index fb293d8..a510c07 100644 --- a/man/umap.Rd +++ b/man/umap.Rd @@ -12,9 +12,10 @@ umap(d, config = umap.defaults, method = c("naive", "python"), ...) \item{config}{object of class umap.config} \item{method}{character, implementation. Available methods are 'naive' -(an implementation written in pure R) and 'python' (requires python package 'umap')} +(an implementation written in pure R) and 'python' (requires python package +'umap-learn')} -\item{...}{list of settings; overwrite settings in config} +\item{...}{list of settings; overwrite default values from config} } \value{ object of class umap, containing at least a component @@ -23,3 +24,15 @@ with an embedding and a component with configuration settings \description{ Computes a manifold approximation and projection } +\examples{ +# embedd iris dataset +# (using default settings, but with reduced number of epochs) +iris.umap = umap(iris[,1:4], n.epochs=20) + +# display object summary +iris.umap + +# display embedding coordinates +head(iris.umap$layout) + +} diff --git a/man/umap.check.config.Rd b/man/umap.check.config.Rd index 3c71c3f..1715854 100644 --- a/man/umap.check.config.Rd +++ b/man/umap.check.config.Rd @@ -17,3 +17,4 @@ config object, may contain some different components from config in input \description{ Validator functions for umap settings } +\keyword{internal} diff --git a/man/umap.defaults.Rd b/man/umap.defaults.Rd index 6ef898a..79c6f27 100644 --- a/man/umap.defaults.Rd +++ b/man/umap.defaults.Rd @@ -9,7 +9,7 @@ umap.defaults } \description{ -A list with parameters customizing a UMAP projection. Each component of the +A list with parameters customizing a UMAP embedding. Each component of the list is an effective argument for umap(). } \details{ @@ -43,7 +43,7 @@ min.dist: numeric; determines how close points appear in the final layout set.op.mix.ratio: numeric in range [0,1]; determines who the knn-graph is used to create a fuzzy simplicial graph -local.connectivity: numeric; used during construction of fuzzy simplicail set +local.connectivity: numeric; used during construction of fuzzy simplicial set bandwidth: numeric; used during construction of fuzzy simplicial set @@ -66,5 +66,15 @@ seed: integer; seed for random number generation knn.repeat: number of times to restart knn search verbose: logical or integer; determines whether to show progress messages +} +\examples{ +# display all default settings +umap.defaults + +# create a new settings object with n.neighbors set to 5 +custom.settings = umap.defaults +custom.settings$n.neighbors = 5 +custom.settings + } \keyword{datasets} diff --git a/man/umap.error.Rd b/man/umap.error.Rd index 7d9b7fd..939b16c 100644 --- a/man/umap.error.Rd +++ b/man/umap.error.Rd @@ -12,3 +12,4 @@ umap.error(...) \description{ stop execution with a custom error message } +\keyword{internal} diff --git a/man/umap.naive.Rd b/man/umap.naive.Rd index 5174d0a..777924f 100644 --- a/man/umap.naive.Rd +++ b/man/umap.naive.Rd @@ -18,3 +18,4 @@ list, one element of which is matrix with embedding coordinates This implementation is called naive because it is a rather straightforward translation of the original python code. } +\keyword{internal} diff --git a/man/umap.prep.input.Rd b/man/umap.prep.input.Rd index f4f4562..4f2f3f4 100644 --- a/man/umap.prep.input.Rd +++ b/man/umap.prep.input.Rd @@ -17,3 +17,4 @@ d as matrix \description{ Prep primary input as a data matrix } +\keyword{internal} diff --git a/man/umap.python.Rd b/man/umap.python.Rd index 61a3301..717a24d 100644 --- a/man/umap.python.Rd +++ b/man/umap.python.Rd @@ -17,3 +17,4 @@ list, one element of which is matrix with embedding coordinates \description{ Create a umap embedding using a python package } +\keyword{internal} diff --git a/man/umap.small.Rd b/man/umap.small.Rd index 7588c7e..400ab1f 100644 --- a/man/umap.small.Rd +++ b/man/umap.small.Rd @@ -17,3 +17,4 @@ list, one element of which is matrix with embedding coordinates \description{ Create an embedding object compatible with package umap for very small inputs } +\keyword{internal} diff --git a/man/vector.norm.Rd b/man/vector.norm.Rd index f5ec645..d63c40f 100644 --- a/man/vector.norm.Rd +++ b/man/vector.norm.Rd @@ -15,3 +15,4 @@ numeric, vector norm \description{ Compute vector norm } +\keyword{internal} diff --git a/man/vectormultiplication.coo.Rd b/man/vectormultiplication.coo.Rd index c61aa5f..33df8c4 100644 --- a/man/vectormultiplication.coo.Rd +++ b/man/vectormultiplication.coo.Rd @@ -19,3 +19,4 @@ new vector x*v \description{ Matrix multiplication of a coo matrix with a vector } +\keyword{internal} diff --git a/src/clip4.cpp b/src/clip4.cpp index 249c59b..1b382d3 100644 --- a/src/clip4.cpp +++ b/src/clip4.cpp @@ -5,6 +5,7 @@ using namespace Rcpp; //' perform a compound transformation on a vector, including clipping //' +//' @keywords internal //' @param x numeric vector //' @param inner numeric constant //' @param outer numeric constan diff --git a/src/distances.cpp b/src/distances.cpp index 5a6c60b..49aa73a 100644 --- a/src/distances.cpp +++ b/src/distances.cpp @@ -11,6 +11,7 @@ using namespace Rcpp; //' compute Euclidean distance between two vectors //' +//' @keywords internal //' @param x numeric vector //' @param y numeric vector //' @@ -30,6 +31,7 @@ double dEuclidean(NumericVector &x, NumericVector y) { //' compute Euclidean distances //' +//' @keywords internal //' @param m matrix with raw data //' //' @return dEuclidean norms between first row and all other rows @@ -49,6 +51,7 @@ NumericVector mdEuclidean(NumericMatrix &m) { //' compute Manhattan distance between two vectors //' +//' @keywords internal //' @param x numeric vector //' @param y numeric vector //' @@ -73,6 +76,7 @@ double dManhattan(NumericVector &x, NumericVector y) { //' compute Manhattan distances //' +//' @keywords internal //' @param m matrix with raw data //' //' @return dManhattan norms between origin and targets @@ -97,6 +101,7 @@ NumericVector mdManhattan(NumericMatrix &m) { //' Important: this function assumes that data has been centered //' i.e. that mean(x) = mean(y) = 0 //' +//' @keywords internal //' @param x numeric vector //' @param y numeric vector //' @@ -120,6 +125,7 @@ double dCenteredPearson(NumericVector &x, NumericVector y) { //' compute pearson correlation distances //' +//' @keywords internal //' @param m matrix with raw data //' //' @return dCenteredPearson norms between first row and all other rows @@ -141,6 +147,7 @@ NumericVector mdCenteredPearson(NumericMatrix &m) { //' //' Note: values output from this function do not satisfy the triangle inequality //' +//' @keywords internal //' @param x numeric vector //' @param y numeric vector //' @@ -164,6 +171,7 @@ double dCosine(NumericVector &x, NumericVector y) { //' compute cosine distances //' +//' @keywords internal //' @param m matrix with raw data //' //' @return dCosine norms between first row and all other rows diff --git a/vignettes/umap.Rmd b/vignettes/umap.Rmd index 17c87bb..8af6e97 100644 --- a/vignettes/umap.Rmd +++ b/vignettes/umap.Rmd @@ -146,7 +146,7 @@ plot.iris(iris.umap.2, iris.labels, main="Another UMAP visualization of the Iris dataset (different seed)") ``` -The result is slightly different due to a new instanciation of the random number generator. +The result is slightly different due to a new instantiation of the random number generator. @@ -175,7 +175,7 @@ The second available implementation is a wrapper for a python module with the sa iris.umap.4 = umap(iris.data, method="python") ``` -This command has several dependencies. To make it work, you must have the `reticulate` package installed and loaded (use `install.packages("reticulate")` and `library(reticulate)`). Furthermore, you must have the `umap` python package installed (see the [package repo](https://github.com/lmcinnes/umap) for instructions). If either of these components is not available, the above command will display an error message. +This command has several dependencies. To make it work, you must have the `reticulate` package installed and loaded (use `install.packages("reticulate")` and `library(reticulate)`). Furthermore, you must have the `umap-learn` python package installed (see the [package repo](https://github.com/lmcinnes/umap) for instructions). If either of these components is not available, the above command will display an error message. Note that it will not be possible to produce exactly the same output from the two implementations due to inequivalent random number generators in R and python.