From 99758b3d908243539441eae7a8aca36c02a7345f Mon Sep 17 00:00:00 2001 From: Michael Hahsler Date: Fri, 28 Jun 2024 17:18:53 -0500 Subject: [PATCH] CRAN submission. --- DESCRIPTION | 7 ++++--- NEWS.md | 13 +++++++------ R/hdbscan.R | 1 - README.Rmd | 4 ++-- README.md | 6 +++--- 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 782311e..3436616 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,10 +1,11 @@ Package: dbscan -Version: 1.1-12-1 -Date: 2023-xx-xx +Version: 1.2-0 +Date: 2024-06-28 Title: Density-Based Spatial Clustering of Applications with Noise (DBSCAN) and Related Algorithms Authors@R: c(person("Michael", "Hahsler", role = c("aut", "cre", "cph"), - email = "mhahsler@lyle.smu.edu"), + email = "mhahsler@lyle.smu.edu", + comment = c(ORCID = "0000-0003-2716-1405")), person("Matthew", "Piekenbrock", role = c("aut", "cph")), person("Sunil", "Arya", role = c("ctb", "cph")), person("David", "Mount", role = c("ctb", "cph"))) diff --git a/NEWS.md b/NEWS.md index 4df46be..d3d7c4a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,18 +1,19 @@ -# dbscan 1.1-12-1 (2024-xx-xx) +# dbscan 1.2-0 (2024-06-28) ## New Features +* dbscan has now tidymodels tidiers (glance, tidy, augment). * kNNdistplot can now plot a range of k/minPts values. * added stats::nobs methods for the clusterings. * kNN and frNN now contains the used distance metric. -* dbscan has now tidymodels tidiers (glance, tidy, augment). ## Changes -* Reorganized the C++ source code. -* README now used bibtex. +* dbscan component dist was renamed to metric. * Removed redundant sort in kNNdistplot (reported by Natasza Szczypien). -* Refactor: use more performant anyNA(x) instead of any(is.na(x)) (by m-muecke). +* Refactoring use more performant anyNA(x) instead of any(is.na(x)) + and many more (by m-muecke). +* Reorganized the C++ source code. +* README now uses bibtex. * Tests use now testthat edition 3 (m-muecke). -* dbscan component dist was renamed to metric. # dbscan 1.1-12 (2023-11-28) diff --git a/R/hdbscan.R b/R/hdbscan.R index 4eb097c..2f65407 100644 --- a/R/hdbscan.R +++ b/R/hdbscan.R @@ -178,7 +178,6 @@ hdbscan <- function(x, ## Generate membership 'probabilities' using core distance as the measure of density prob <- rep(0, length(cl)) for (cid in sl) { - ccl <- res[[as.character(cid)]] max_f <- max(coredist[which(cl == cid)]) pr <- (max_f - coredist[which(cl == cid)]) / max_f prob[cl == cid] <- pr diff --git a/README.Rmd b/README.Rmd index f35727e..def6f03 100644 --- a/README.Rmd +++ b/README.Rmd @@ -103,13 +103,13 @@ library(tidyverse) db <- x %>% dbscan(eps = .42, minPts = 5) ``` -Get cluster statistics +Get cluster statistics as a tibble ```{r tidyverse2} tidy(db) ``` -Visualize the clustering with ggplot2 +Visualize the clustering with ggplot2 (use an x for noise points) ```{r tidyverse3} augment(db, x) %>% ggplot(aes(x = Petal.Length, y = Petal.Width)) + diff --git a/README.md b/README.md index 9f103a3..7fae162 100644 --- a/README.md +++ b/README.md @@ -238,7 +238,7 @@ db <- x %>% dbscan(eps = 0.42, minPts = 5) ``` -Get cluster statistics +Get cluster statistics as a tibble ``` r tidy(db) @@ -246,13 +246,13 @@ tidy(db) ## # A tibble: 4 × 3 ## cluster size noise - ## + ## ## 1 0 29 TRUE ## 2 1 48 FALSE ## 3 2 37 FALSE ## 4 3 36 FALSE -Visualize the clustering with ggplot2 +Visualize the clustering with ggplot2 (use an x for noise points) ``` r augment(db, x) %>%