diff --git a/DESCRIPTION b/DESCRIPTION index 3f4686e..fbce070 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -57,7 +57,10 @@ Suggests: magrittr, covr, picante, - withr + withr, + fs, + readr, + usethis Config/testthat/edition: 3 Depends: R (>= 4.1.0) diff --git a/codemeta.json b/codemeta.json index 51176bb..af1775f 100644 --- a/codemeta.json +++ b/codemeta.json @@ -213,6 +213,42 @@ "url": "https://cran.r-project.org" }, "sameAs": "https://CRAN.R-project.org/package=withr" + }, + { + "@type": "SoftwareApplication", + "identifier": "fs", + "name": "fs", + "provider": { + "@id": "https://cran.r-project.org", + "@type": "Organization", + "name": "Comprehensive R Archive Network (CRAN)", + "url": "https://cran.r-project.org" + }, + "sameAs": "https://CRAN.R-project.org/package=fs" + }, + { + "@type": "SoftwareApplication", + "identifier": "readr", + "name": "readr", + "provider": { + "@id": "https://cran.r-project.org", + "@type": "Organization", + "name": "Comprehensive R Archive Network (CRAN)", + "url": "https://cran.r-project.org" + }, + "sameAs": "https://CRAN.R-project.org/package=readr" + }, + { + "@type": "SoftwareApplication", + "identifier": "usethis", + "name": "usethis", + "provider": { + "@id": "https://cran.r-project.org", + "@type": "Organization", + "name": "Comprehensive R Archive Network (CRAN)", + "url": "https://cran.r-project.org" + }, + "sameAs": "https://CRAN.R-project.org/package=usethis" } ], "softwareRequirements": { @@ -349,7 +385,7 @@ }, "SystemRequirements": null }, - "fileSize": "7822.482KB", + "fileSize": "9237.93KB", "citation": [ { "@type": "SoftwareSourceCode", diff --git a/data-raw/acacia.R b/data-raw/acacia.R index ddaf334..bf3c7e2 100644 --- a/data-raw/acacia.R +++ b/data-raw/acacia.R @@ -4,13 +4,17 @@ library(janitor) library(ape) library(tidyverse) library(assertr) +library(assertthat) library(here) # Load community ---- -# acacia_sites_by_spp.csv downloaded from github -# https://github.com/shawnlaffan/biodiverse/tree/73522b74e52a5fb77ae5cfc2e90010350a3abf70/etc/experiments/independent_swaps -acacia_comm <- read_csv(here("data-raw/acacia_sites_by_spp.csv"), col_types = cols(.default = col_character())) %>% +# Community data: Acacia community data from Mishler et al. 2014 +# For more info, see data-raw/CANAPE_Acacia/README.md +acacia_comm <- read_csv( + here("data-raw/CANAPE_Acacia/sites_by_spp.csv"), + col_types = cols(.default = col_character()) +) %>% clean_names() %>% select(-axis_0, -axis_1) %>% mutate(across(-element, as.numeric)) %>% @@ -19,11 +23,20 @@ acacia_comm <- read_csv(here("data-raw/acacia_sites_by_spp.csv"), col_types = co # Load tree ---- -# Need to do extra parsing of nexus file, since taxa names include spaces, parentheses, etc. +# Need to do extra parsing of nexus file, since taxa names include spaces, +# parentheses, etc. temp_phy <- tempfile("temp_phy.tre") -acacia_nexus_raw <- readr::read_lines(here("data-raw/1_1363828941_Acacia.nexorg")) +# Read in tree nexus file from treebase +acacia_nexus_raw <- readr::read_lines( + here("data-raw/1_1363828941_Acacia.nexorg") +) +# The nexus file has one block for taxa names and +# one block for the tree. +# The tips of the tree are labeled with number codes, which +# correspond to taxon names. +# We want to relabel the tree so the tips are the taxon names, not numbers acacia_nexus_raw %>% magrittr::extract(str_detect(., "TREE tree_1 = ")) %>% unlist() %>% @@ -44,26 +57,51 @@ taxa_end <- acacia_nexus_raw %>% which() %>% magrittr::subtract(1) -acacia_taxa <- acacia_nexus_raw[taxa_start:taxa_end] %>% - readr::read_table(col_names = FALSE) %>% - separate(X1, c("number", "name"), sep = " ", extra = "merge") %>% +# Write taxa block to temporary file to read in with read_tsv +temptable <- tempfile("table.tsv") + +writeLines(acacia_nexus_raw[taxa_start:taxa_end], temptable) + +# Make table of taxon number and name for all Acacia plus outgroups +acacia_with_og <- readr::read_tsv( + temptable, + col_names = FALSE, quote = "", + col_types = cols(.default = col_character()) +) %>% + separate(X3, c("number", "name"), sep = " ", extra = "merge") %>% + select(number, name) + +unlink(temptable) + +# Format table of ingroup taxa (named with specific epithet only) +acacia_taxa <- + acacia_with_og %>% + # drop outgroups filter(str_detect(name, "Acacia")) %>% mutate(name = str_replace_all(name, " ", "_")) %>% extract(name, "species", "(Acacia_[^_]+)", remove = FALSE) %>% - mutate(species = str_remove_all(species, "Acacia_") %>% str_remove_all(",")) %>% + mutate( + species = str_remove_all(species, "Acacia_") %>% + str_remove_all(",") + ) %>% assert(not_na, species) %>% assert(is_uniq, species) %>% select(-name) -og_taxa <- acacia_nexus_raw[taxa_start:taxa_end] %>% - readr::read_table(col_names = FALSE) %>% - separate(X1, c("number", "name"), sep = " ", extra = "merge") %>% +# Format table of ingroup taxa (named with genus and specific epithet) +og_taxa <- + acacia_with_og %>% filter(str_detect(name, "Pararchidendron|Paraserianthes")) %>% mutate(name = str_replace_all(name, " ", "_")) %>% mutate(name = str_remove_all(name, "'")) %>% separate(name, c("genus", "epithet"), sep = "_", extra = "drop") %>% unite("species", genus, epithet) +# Fix one label to match between comm and phy: +# 'clunies' in comm, but 'clunies-rossiae' in phy +acacia_taxa$species[acacia_taxa$species == "clunies-rossiae"] <- "clunies" + +# Make tibble of tip labels matching numbers in tree new_tips <- tibble(number = acacia_phy$tip.label) %>% left_join(bind_rows(acacia_taxa, og_taxa), by = "number") %>% @@ -77,4 +115,17 @@ acacia <- list( comm = acacia_comm ) +# Final checks +# - should be 510 tips in tree +assert_that((Ntip(acacia$phy) == 510)) +# - All names besides outgroups should match +assert_that( + isTRUE( + all.equal( + sort(colnames(acacia_comm)), + sort(drop.tip(acacia$phy, og_taxa$species)$tip.label) + ) + ) +) + usethis::use_data(acacia, overwrite = TRUE) diff --git a/data/acacia.rda b/data/acacia.rda index b754df4..43cd845 100644 Binary files a/data/acacia.rda and b/data/acacia.rda differ diff --git a/inst/WORDLIST b/inst/WORDLIST index 4771caf..c1d5f11 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -62,6 +62,7 @@ eval exhuastive familyName fileSize +fs fsf funder getRversion @@ -147,6 +148,7 @@ rcmdcheck Rds README readme +readr regionalization relatedLink releaseNotes @@ -196,6 +198,7 @@ tidyverse ubuntu Ulrich unifrac +usethis vigenette VignetteBuilder Wataru