initial commit of scripts

jennybigman · Nov 2, 2022 · 631179c · 631179c
1 parent c7d0ef1
commit 631179c
Show file tree

Hide file tree

Showing 7 changed files with 1,193 additions and 0 deletions.
diff --git a/05 - calculating gill area index three different ways.R b/05 - calculating gill area index three different ways.R
@@ -0,0 +1,66 @@
+# 05 - calculating gill area index three different ways (empirically est d, predicted d from Pauly 1981, constant d)
+
+	# Question 4 in MS
+
+	# here, we are working with the raw dataset, but to estimate gill area index, we need to take a mean of 
+	# gill surface area and body mass 
+
+	mean_dat <- RawGSA8_phylo %>% 
+						  group_by(Binomial, mean_gperf_simple) %>%
+						  summarise(meanGSAcm2 = mean(GSAcm2),
+						  					meanMassG = mean(MassG),
+						  					mean_Winfinity = mean(mean_Winfinity), # need to keep Winf in dataset
+						  					Log_Winfinity = log10(mean_Winfinity))
+
+	#### calculate gill area index using empirically estimated d ####
+	#get_prior(LogGSAcm2 ~ LogMassG,
+	#					data = RawGSA8_phylo,
+	#					family = gaussian())
+	#
+	#GSA_mass_fit <- brm(LogGSAcm2 ~ LogMassG * Binomial,
+	#										family = gaussian(),
+	#										data = RawGSA8_phylo,
+	#										prior = c(
+	#											prior(student_t(3, 3, 3), class = Intercept),
+	#											prior(student_t(3, 5, 10), class = b),
+	#											prior(student_t(3, 0, 2.5), class = sigma)),
+	#										control = list(max_treedepth = 15),
+	#										chains = 4, cores = 4, iter = 5000, warmup = 1000)
+	#
+	#save(GSA_mass_fit, file = here("./output/GSA_mass_fit.rds"))
+	load(file = here("./output/GSA_mass_fit.rds"))
+
+	post_GSA_mass <- as.data.frame(GSA_mass_fit)
+
+	intercept <- post_GSA_mass[]
+	post_GSA_mass_mean <- stack(apply(post_GSA_mass, 2, mean)) %>% rename(mean = values)
+
+
+	#### calculate gill area index using d = 0.8 ####
+
+	GAI_formula <- function(G, W){
+	  GAI = G / (W^0.8)
+	  return(GAI)
+	}
+
+	mean_dat <- mean_dat %>%
+	            rowwise() %>%
+	            mutate(GAI_setd = GAI_formula(G = meanGSAcm2, W = meanMassG))
+
+
+	#### calculate gill area index using d predicted from relationship in Pauly (1981) ####
+
+	d_formula <- function(x){
+	  d = 0.6742 + 0.03574 * x
+	  return(d)
+	}
+
+	GAI_formula_pred <- function(G, W, x){
+	  GAI = G / (W^x)
+	  return(GAI)
+	}
+
+	mean_dat <- mean_dat %>%
+	            rowwise() %>%
+	            mutate(pred_d = d_formula(x = Log_Winfinity),
+	            			 GAI_predd = GAI_formula_pred(G = meanGSAcm2, W = meanMassG, x = pred_d))
diff --git a/revisiting-the-GOLT.Rproj b/revisiting-the-GOLT.Rproj
@@ -0,0 +1,13 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: No
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
diff --git a/scripts/01 - load each time-ms.R b/scripts/01 - load each time-ms.R
@@ -0,0 +1,187 @@
+# 01-load all data
+
+	# load libraries
+	library(ape)
+	library(beepr)
+	library(broom)
+	library(brms)
+	library(car)
+	library(cowplot)
+	library(data.table)
+	library(dplyr)
+	library(forcats)
+	library(forestmangr)
+	#library(geiger)
+	library(ggeffects)
+	library(ggplot2)
+	library(here)
+	library(lme4)
+#	library(lmodel2)
+	library(loo)
+	library(magick)
+	library(MASS)
+	#library(MCMCglmm)
+	library(patchwork)
+	library(phangorn)
+	library(phytools)
+	library(purrr)
+	library(readr)
+	library(reshape2)
+#	library(rfishbase)
+	library(rstan)
+	library(stringr)
+	library(tidyr)
+	library(tidyverse)
+#	library(ggdist)
+	library(quantreg)
+
+
+
+	`%notin%` <- Negate(`%in%`)
+
+	# set options for stan
+	rstan_options(auto_write = TRUE)
+	options(mc.cores = parallel::detectCores())
+
+	# helpful functions
+	len <- function(y) {
+	  z <-length(unique(y))
+	  return(z)
+	}
+
+	# load gill surface area & growth data for 132 species
+	GSA_growth_dat <- read.csv(here("data/GSA_Growth_132SP_FINAL.csv"),
+                             header = TRUE, stringsAsFactors = FALSE, strip.white = TRUE)  %>%
+                  	dplyr::select(., -X, -X.1, -X.2)
+
+	# change "UNK" to mean in datasheet
+	GSA_growth_dat$RawOrMean[GSA_growth_dat$RawOrMean == "UNK"] <- "mean"
+
+	# transformations and standardizations
+	GSA_growth_dat <- GSA_growth_dat %>%
+                    mutate(GSAcm2 = case_when(
+                              GSA_units %in% "cm2" ~ GSA,
+                              GSA_units %in% "mm2" ~ GSA/100))
+
+	GSA_growth_dat <- GSA_growth_dat %>%
+	                  mutate(MassG = case_when(
+	                              MassUnits %in% "grams" ~ Mass))
+
+	GSA_growth_dat <- GSA_growth_dat %>%
+	                  mutate(LogGSAcm2 = log10(GSAcm2),
+	                         LogMassG = log10(Mass))
+
+	# center body mass
+	body_mass_avg <- GSA_growth_dat %>% group_by(Binomial) %>% summarise(mean_mass = mean(LogMassG))
+
+	mean(body_mass_avg$mean_mass)
+
+	GSA_growth_dat$LogCenterMassG <- GSA_growth_dat$LogMassG - (log10(300))
+
+	# if a species has raw GSA data from more than one study, I need to only include that dataset
+	# that has a larger range of body mass
+	GSA_growth_dat$SpeciesStudy <- paste(GSA_growth_dat$Binomial, GSA_growth_dat$Source)
+
+	# add a column for the number of determinations 
+	GSA_growth_dat_study_sum <- GSA_growth_dat %>% 
+	  count(Binomial, SpeciesStudy, RawOrMean)
+
+	GSA_growth_dat_study_sum <- GSA_growth_dat_study_sum %>%
+	                            arrange(Binomial, desc(n), .by_group = TRUE) %>%
+	                            distinct(Binomial, .keep_all = TRUE)
+
+	to_keep <- GSA_growth_dat_study_sum$SpeciesStudy
+
+	GSA_growth_dat <- GSA_growth_dat %>% filter(., SpeciesStudy %in% to_keep)
+
+	# dataset for species that have at least 8 individual estimates per species
+	RawGSA <- GSA_growth_dat %>%
+		filter(RawOrMean == "raw")
+
+	RawGSA8 <- RawGSA %>%
+		group_by(Binomial) %>%
+	  filter(n()>=8) 
+
+	len(RawGSA8$Binomial)
+
+	# load phylogeny
+	fish_elasmo_supertree <- read.tree(file = here("./data/fish_elasmo_supertree.tre"))
+	len(fish_elasmo_supertree$tip.label)
+	# 125 species, missing 7 teleost species from the phylogeny (that's ok, just have to exclude them from )
+
+	# which species are on the tree?
+	Species <- lapply(strsplit(as.character(RawGSA8$Binomial), "\\ "), "[", 2)
+	Genus <- lapply(strsplit(as.character(RawGSA8$Binomial), "\\ "), "[", 1)
+	species_list <- tibble(Genus, Species)
+	RawGSA8$phylo <- paste(species_list$Genus, species_list$Species,
+	                                sep = "_")
+	len(RawGSA8$Binomial)
+
+	sp_drop <- setdiff(fish_elasmo_supertree$tip.label, RawGSA8$phylo)
+
+	tree_pruned <- drop.tip(fish_elasmo_supertree, sp_drop)
+
+	len(tree_pruned$tip.label) # 32 species in the dataset that are on the tree
+
+	phylo_list <- tree_pruned$tip.label
+
+	len(unique(RawGSA8$Binomial))
+	sp_df <- unique(RawGSA8$Binomial)
+
+	# dataset of species with raw data (at least n = 8) and on phylogeny
+	RawGSA8_phylo <- filter(RawGSA8, phylo %in% phylo_list)
+
+	setdiff(RawGSA8_phylo$phylo, tree_pruned$tip.label)
+	setdiff(tree_pruned$tip.label, RawGSA8_phylo$phylo)
+
+	## create an object for later use in filtering out species that are traditionally used in
+	# aquaculture and those that are air-breathers
+
+	aquaculture_sp_mean <- c("Ameiurus nebulosus",
+	                          "Channa punctata",
+	                          "Channa striata",
+	                          "Cirrhinus mrigala",
+	                          "Clarias batrachus",
+	                          "Ctenopharyngodon idella",
+	                          "Oncorhynchus mykiss",
+	                          "Oreochromis niloticus",
+	                          "Salmo trutta",
+	                          "Tinca tinca")
+
+	# species capable of air-breathing
+	airbreathing_mean <- c("Anabas testudineus",
+	                       "Anguilla anguilla",
+	                       "Anguilla rostrata",
+	                       "Boleophthalmus boddarti",
+	                       "Channa punctata",
+	                       "Channa striata",
+	                       "Clarias batrachus",
+	                       "Cobitis taenia",
+	                       "Heteropneustes fossilis",
+	                       "Lipophrys pholis",
+	                       "Misgurnus fossilis",
+	                       "Periophthalmus barbarus",
+	                       "Periophthalmus chrysospilos",
+	                       "Taurulus bubalis",
+	                       "Zoarces viviparus")
+
+	### Pauly's data
+	Pauly_dat <- read.csv(here("data/Pauly_GAI_data.csv"), header = TRUE,
+	                      strip.white = TRUE, stringsAsFactors = FALSE) %>% na.omit()
+
+	Pauly_dat$LogGAI <- log10(Pauly_dat$GAI)
+
+	Pauly_dat_no_coel <- Pauly_dat %>%
+                    	 filter(., Species != "Latimeria chalumnae")
+	# remove coelacanth
+
+	Pauly_dat_no_coel$LogGAI <- log10(Pauly_dat_no_coel$GAI)
+
+	Pauly_sp <- unique(Pauly_dat$BinomialResolved)
+
+	OverlapPaulyNew <- GSA_growth_dat %>% filter(., Binomial %in% Pauly_sp)
+
+	overlap_sp <- unique(OverlapPaulyNew$Binomial) 
+
+	setdiff(Pauly_sp, overlap_sp)
+