Skip to content

Commit

Permalink
Update acacia data
Browse files Browse the repository at this point in the history
use community data based on dryad dataset (https://doi.org/10.5061/dryad.dv4qk) processed with biodiverse
  • Loading branch information
joelnitta committed Aug 31, 2022
1 parent d1a4a1f commit 78313fb
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 14 deletions.
5 changes: 4 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ Suggests:
magrittr,
covr,
picante,
withr
withr,
fs,
readr,
usethis
Config/testthat/edition: 3
Depends:
R (>= 4.1.0)
Expand Down
38 changes: 37 additions & 1 deletion codemeta.json
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,42 @@
"url": "https://cran.r-project.org"
},
"sameAs": "https://CRAN.R-project.org/package=withr"
},
{
"@type": "SoftwareApplication",
"identifier": "fs",
"name": "fs",
"provider": {
"@id": "https://cran.r-project.org",
"@type": "Organization",
"name": "Comprehensive R Archive Network (CRAN)",
"url": "https://cran.r-project.org"
},
"sameAs": "https://CRAN.R-project.org/package=fs"
},
{
"@type": "SoftwareApplication",
"identifier": "readr",
"name": "readr",
"provider": {
"@id": "https://cran.r-project.org",
"@type": "Organization",
"name": "Comprehensive R Archive Network (CRAN)",
"url": "https://cran.r-project.org"
},
"sameAs": "https://CRAN.R-project.org/package=readr"
},
{
"@type": "SoftwareApplication",
"identifier": "usethis",
"name": "usethis",
"provider": {
"@id": "https://cran.r-project.org",
"@type": "Organization",
"name": "Comprehensive R Archive Network (CRAN)",
"url": "https://cran.r-project.org"
},
"sameAs": "https://CRAN.R-project.org/package=usethis"
}
],
"softwareRequirements": {
Expand Down Expand Up @@ -349,7 +385,7 @@
},
"SystemRequirements": null
},
"fileSize": "7822.482KB",
"fileSize": "9237.93KB",
"citation": [
{
"@type": "SoftwareSourceCode",
Expand Down
75 changes: 63 additions & 12 deletions data-raw/acacia.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,17 @@ library(janitor)
library(ape)
library(tidyverse)
library(assertr)
library(assertthat)
library(here)

# Load community ----

# acacia_sites_by_spp.csv downloaded from github
# https://github.com/shawnlaffan/biodiverse/tree/73522b74e52a5fb77ae5cfc2e90010350a3abf70/etc/experiments/independent_swaps
acacia_comm <- read_csv(here("data-raw/acacia_sites_by_spp.csv"), col_types = cols(.default = col_character())) %>%
# Community data: Acacia community data from Mishler et al. 2014
# For more info, see data-raw/CANAPE_Acacia/README.md
acacia_comm <- read_csv(
here("data-raw/CANAPE_Acacia/sites_by_spp.csv"),
col_types = cols(.default = col_character())
) %>%
clean_names() %>%
select(-axis_0, -axis_1) %>%
mutate(across(-element, as.numeric)) %>%
Expand All @@ -19,11 +23,20 @@ acacia_comm <- read_csv(here("data-raw/acacia_sites_by_spp.csv"), col_types = co

# Load tree ----

# Need to do extra parsing of nexus file, since taxa names include spaces, parentheses, etc.
# Need to do extra parsing of nexus file, since taxa names include spaces,
# parentheses, etc.
temp_phy <- tempfile("temp_phy.tre")

acacia_nexus_raw <- readr::read_lines(here("data-raw/1_1363828941_Acacia.nexorg"))
# Read in tree nexus file from treebase
acacia_nexus_raw <- readr::read_lines(
here("data-raw/1_1363828941_Acacia.nexorg")
)

# The nexus file has one block for taxa names and
# one block for the tree.
# The tips of the tree are labeled with number codes, which
# correspond to taxon names.
# We want to relabel the tree so the tips are the taxon names, not numbers
acacia_nexus_raw %>%
magrittr::extract(str_detect(., "TREE tree_1 = ")) %>%
unlist() %>%
Expand All @@ -44,26 +57,51 @@ taxa_end <- acacia_nexus_raw %>%
which() %>%
magrittr::subtract(1)

acacia_taxa <- acacia_nexus_raw[taxa_start:taxa_end] %>%
readr::read_table(col_names = FALSE) %>%
separate(X1, c("number", "name"), sep = " ", extra = "merge") %>%
# Write taxa block to temporary file to read in with read_tsv
temptable <- tempfile("table.tsv")

writeLines(acacia_nexus_raw[taxa_start:taxa_end], temptable)

# Make table of taxon number and name for all Acacia plus outgroups
acacia_with_og <- readr::read_tsv(
temptable,
col_names = FALSE, quote = "",
col_types = cols(.default = col_character())
) %>%
separate(X3, c("number", "name"), sep = " ", extra = "merge") %>%
select(number, name)

unlink(temptable)

# Format table of ingroup taxa (named with specific epithet only)
acacia_taxa <-
acacia_with_og %>%
# drop outgroups
filter(str_detect(name, "Acacia")) %>%
mutate(name = str_replace_all(name, " ", "_")) %>%
extract(name, "species", "(Acacia_[^_]+)", remove = FALSE) %>%
mutate(species = str_remove_all(species, "Acacia_") %>% str_remove_all(",")) %>%
mutate(
species = str_remove_all(species, "Acacia_") %>%
str_remove_all(",")
) %>%
assert(not_na, species) %>%
assert(is_uniq, species) %>%
select(-name)

og_taxa <- acacia_nexus_raw[taxa_start:taxa_end] %>%
readr::read_table(col_names = FALSE) %>%
separate(X1, c("number", "name"), sep = " ", extra = "merge") %>%
# Format table of ingroup taxa (named with genus and specific epithet)
og_taxa <-
acacia_with_og %>%
filter(str_detect(name, "Pararchidendron|Paraserianthes")) %>%
mutate(name = str_replace_all(name, " ", "_")) %>%
mutate(name = str_remove_all(name, "'")) %>%
separate(name, c("genus", "epithet"), sep = "_", extra = "drop") %>%
unite("species", genus, epithet)

# Fix one label to match between comm and phy:
# 'clunies' in comm, but 'clunies-rossiae' in phy
acacia_taxa$species[acacia_taxa$species == "clunies-rossiae"] <- "clunies"

# Make tibble of tip labels matching numbers in tree
new_tips <-
tibble(number = acacia_phy$tip.label) %>%
left_join(bind_rows(acacia_taxa, og_taxa), by = "number") %>%
Expand All @@ -77,4 +115,17 @@ acacia <- list(
comm = acacia_comm
)

# Final checks
# - should be 510 tips in tree
assert_that((Ntip(acacia$phy) == 510))
# - All names besides outgroups should match
assert_that(
isTRUE(
all.equal(
sort(colnames(acacia_comm)),
sort(drop.tip(acacia$phy, og_taxa$species)$tip.label)
)
)
)

usethis::use_data(acacia, overwrite = TRUE)
Binary file modified data/acacia.rda
Binary file not shown.
3 changes: 3 additions & 0 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ eval
exhuastive
familyName
fileSize
fs
fsf
funder
getRversion
Expand Down Expand Up @@ -147,6 +148,7 @@ rcmdcheck
Rds
README
readme
readr
regionalization
relatedLink
releaseNotes
Expand Down Expand Up @@ -196,6 +198,7 @@ tidyverse
ubuntu
Ulrich
unifrac
usethis
vigenette
VignetteBuilder
Wataru
Expand Down

0 comments on commit 78313fb

Please sign in to comment.