diff --git a/.Rbuildignore b/.Rbuildignore index fe0c83d..f8ee74b 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -6,3 +6,6 @@ ^vignettes/Complex_shapes\.Rmd$ ^vignettes/Runtime_comparison\.Rmd$ ^\.travis\.yml$ +^Makefile$ +^CONTRIBUTORS\.md$ +^README.Rmd$ \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index ffca7b1..b73eab6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,12 +3,11 @@ Type: Package Title: Permutation Distribution Clustering Version: 1.0.4 Date: 2020-02-02 -Author: Andreas M. Brandmaier [aut, cre] Authors@R: c(person("Andreas M. Brandmaier", email="andy@brandmaier.de", role=c("aut","cre"), comment = c(ORCID = "0000-0001-8765-6982"))) -Maintainer: Andreas M. Brandmaier +Maintainer: Andreas M. Brandmaier Description: Permutation Distribution Clustering is a clustering method for time series. Dissimilarity of time series is formalized as the divergence between their permutation distributions. The permutation distribution was proposed as measure of the complexity of a time series. License: GPL-3 Imports: stats, @@ -21,4 +20,5 @@ Suggests: knitr, rmarkdown VignetteBuilder: knitr -URL: http://brandmaier.github.io/pdc, https://github.com/brandmaier/pdc +URL: https://brandmaier.github.io/pdc/, https://github.com/brandmaier/pdc +RoxygenNote: 7.1.1 diff --git a/NAMESPACE b/NAMESPACE index e1fcba7..e660a21 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -40,7 +40,7 @@ export("convert.image") # # load shared object # -useDynLib(pdc) +useDynLib(pdc, .registration = TRUE) # # imports from default (but not base) packages @@ -53,4 +53,5 @@ importFrom("graphics", "abline", "contour", "image", "lines", "par", "strwidth", "text") importFrom("stats", "as.dendrogram", "as.dist", "cmdscale", "hclust", "pchisq") +importFrom("stats", "qnorm") importFrom("utils", "str") diff --git a/R/codebook.entropy.R b/R/codebook.entropy.R index 289ad24..6f77187 100644 --- a/R/codebook.entropy.R +++ b/R/codebook.entropy.R @@ -1,8 +1,8 @@ codebook.entropy <- -function(data, m, t) +function(data, m, t, normalize_by_observed=FALSE) { - ent <- entropy(codebook(data,m, t)) + ent <- entropy(codebook(data, m, t),normalize_by_observed=normalize_by_observed) return( ent); } diff --git a/R/entropy.R b/R/entropy.R index 38db249..0c1dd2f 100644 --- a/R/entropy.R +++ b/R/entropy.R @@ -1,8 +1,9 @@ entropy <- -function(dist) +function(dist, normalize_by_observed=FALSE) { n <- length(dist) dist <- dist[dist!=0] + if (normalize_by_observed) n <- length(dist) if (length(dist)==1) return (0); return(-sum(dist*log(dist))/log(n)); diff --git a/R/entropyHeuristic.R b/R/entropyHeuristic.R index 7d9c926..f96738a 100644 --- a/R/entropyHeuristic.R +++ b/R/entropyHeuristic.R @@ -6,7 +6,7 @@ function(X, m.min=3, m.max=7, t.min=1, t.max=1) } entropyHeuristic <- -function(X, m.min=3, m.max=7, t.min=1, t.max=1) +function(X, m.min=3, m.max=7, t.min=1, t.max=1, normalize_by_observed=FALSE) { X <- as.matrix(X) @@ -22,7 +22,7 @@ function(X, m.min=3, m.max=7, t.min=1, t.max=1) { ent[k,1] <- j ent[k,2] <- i - ent[k,3] <- mean(apply(FUN=codebook.entropy, MARGIN=2, X, m=i,t=j)) + ent[k,3] <- mean(apply(FUN=codebook.entropy, MARGIN=2, X, m=i,t=j, normalize_by_observed=normalize_by_observed)) k <- k+1 } best <- which.min(ent[,3]); diff --git a/R/pdclust.R b/R/pdclust.R index 80c2bca..c73b3b3 100644 --- a/R/pdclust.R +++ b/R/pdclust.R @@ -1,25 +1,23 @@ pdclust <- function(X, m=NULL, t=NULL, divergence=symmetricAlphaDivergence, clustering.method="complete") { - user.m <- !is.null(m); - user.t <- !is.null(t); + user.m <- !is.null(m) + user.t <- !is.null(t) if ((is.null(m)) && (is.null(t))) { - m <- entropyHeuristic(X)$m; + m <- entropyHeuristic(X)$m } if (is.null(m)) { - m <- entropyHeuristic(X, t.min=t, t.max=t)$m; + m <- entropyHeuristic(X, t.min=t, t.max=t)$m } if (is.null(t)) { - t <- 1; + t <- 1 } - - # calculate divergence matrix - D <- pdcDist(X,m,t,divergence); + D <- pdcDist(X,m,t,divergence) # start hierarchical clustering if (clustering.method == "complete") { diff --git a/R/udcDist.R b/R/udcDist.R index a220c52..0d8b45c 100644 --- a/R/udcDist.R +++ b/R/udcDist.R @@ -3,8 +3,9 @@ udConvert <- function(x) { ifelse(sign(diff(x))==1,1,0) } udCodeword <- function(x, m) {sum( 2^(1:(m-1)-1) * udConvert(x) )+1} udComplexity <- function(x, compression.type="zip") { - return(length(memCompress(paste0(udConvert(x),collapse = "")), - type=compression.type)/length(x)) + return(length( + memCompress(paste0(udConvert(x),collapse = ""),type=compression.type) + )/length(x)) } udcDist <- function(X, compression.type="zip") { diff --git a/demo/00Index b/demo/00Index new file mode 100644 index 0000000..8dc1320 --- /dev/null +++ b/demo/00Index @@ -0,0 +1 @@ +paired.tseries Demonstrate clustering of pairs of time series diff --git a/man/codebook.Rd b/man/codebook.Rd index 489f129..ded0373 100644 --- a/man/codebook.Rd +++ b/man/codebook.Rd @@ -7,7 +7,7 @@ Codebook A codebook contains the permutation distribution of a time series. } \usage{ -codebook(x, m = 3, t = 1, use.fast=TRUE, normalized = TRUE, codeword_func = codeword) +codebook(x, m = 3, t = 1, use.fast=TRUE, normalized = TRUE, codeword_func = NULL) } \arguments{ \item{x}{ @@ -22,7 +22,7 @@ The embedding dimension. \item{normalized}{Normalize codebook such that it is a probability distribution.} -\item{codeword_func}{Function to compute codewords.} +\item{codeword_func}{Function to compute codewords. If NULL, the default internal function codebook is used.} } \details{ The length of a codebook is the factorial of the embedding dimension. The elements of the diff --git a/man/entropy.heuristic.Rd b/man/entropy.heuristic.Rd index 4af2675..a0fe652 100644 --- a/man/entropy.heuristic.Rd +++ b/man/entropy.heuristic.Rd @@ -16,7 +16,7 @@ Heuristic (MinE) automatically chooses an embedding dimension with an optimal representational entropy as proxy for representational power. } \usage{ -entropyHeuristic(X, m.min=3, m.max=7, t.min = 1, t.max = 1) +entropyHeuristic(X, m.min=3, m.max=7, t.min = 1, t.max = 1, normalize_by_observed = FALSE) \method{print}{mine}(x, \dots) \method{summary}{mine}(object, \dots) @@ -32,6 +32,8 @@ entropyHeuristic(X, m.min=3, m.max=7, t.min = 1, t.max = 1) \item{m.max}{Maximum embedding dimension} \item{t.min}{Minimum time-delay} \item{t.max}{Maximum time-delay} +\item{normalize_by_observed}{Boolean. If false, entropy is normalized by dividing by the logarithm of the number of all possible patterns. If true, +it is divided by the logarithm of the number of observed patterns.} \item{\dots}{Further arguments for the generic print, summary, and plot method.} \item{normalize}{Normalize values to range [0;1].} diff --git a/vignettes/Getting_started.Rmd b/vignettes/Getting_started.Rmd index 3cd2157..86071ca 100644 --- a/vignettes/Getting_started.Rmd +++ b/vignettes/Getting_started.Rmd @@ -4,7 +4,7 @@ author: "Andreas Brandmaier" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{Vignette Title} + %\VignetteIndexEntry{Getting started} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} ---