Bayesian Analysis of Macroevolutionary Mixtures

Author: Léo-Paul Dagallier
Last update: 2023-08-09

Linux installation

cd <path to program directory>
git clone https://github.com/macroevolution/bamm.git
cd bamm
mkdir build
cd build
cmake ..
make -j
sudo make install

BAMM analysis

Input data

Prepare the path variables:

in bash:

path_to_data="data";
path_to_output="outputs";
cd $path_to_output
mkdir BAMM

in R:

path_to_tree = c("data/name_MCC_monodoreae3_monod_pruned.newick")
path_to_output = c("outputs/BAMM/")
path_to_figures = c("figures/")
data_suffix <- "monodoreae3"

Check the prerequesities of the tree

The tree needs to be ultrametric, fully bifurcating (no polytomy) and minimum branch length should be > 0.

library(ape)
v <- read.tree(path_to_tree)
is.ultrametric(v)

## [1] TRUE

is.binary(v)

## [1] TRUE

# Now to check min branch length:
min(v$edge.length)

## [1] 0.1063518

Prepare the configuration file

Prepare manually the configuration file in the data/BAMM directory.

Prepare using BAMMtools

library(BAMMtools)
setBAMMpriors(v, outfile = paste0(path_to_output, "myPriors.txt"))

## 
## Prior block written to file << outputs/BAMM/myPriors.txt >>
## Copy and paste the contents of the file into the
## priors block of your BAMM input file
## 
## This function simply sets the expectedNumberOfShifts to 1;
## This is a parameter you may need to vary to achieve good convergence
## with your data.

Copy the values from myPriors.txt into the bamm_config_FINAL file.

Run the analysis

In bash (not R).

cd $path_to_output/BAMM
bamm -c $path_to_data/bamm_config_FINAL

Assess MCMC convergence

Compute effective sample size after discarding the burnin:

mcmcout <- read.csv(paste0(path_to_output, "mcmc_out.txt"), header=T)
burnstart <- floor(0.1 * nrow(mcmcout))
postburn <- mcmcout[burnstart:nrow(mcmcout), ]

library(coda)
paste0("ESS for the log Likelihood: ", effectiveSize(postburn$logLik))

## [1] "ESS for the log Likelihood: 901.000000000001"

paste0("ESS for the log Prior: ",effectiveSize(postburn$logPrior))

## [1] "ESS for the log Prior: 901"

paste0("ESS for the number of shifts: ",effectiveSize(postburn$N_shifts))

## [1] "ESS for the number of shifts: 708.541052069349"

Visual assessment of MCMC convergence of the BAMM run:

par(mfrow=c(2,2))
plot(mcmcout$logLik, type="l", main="a)", xlab="Generations",ylab="log Likelihood")
plot(mcmcout$N_shifts ~ mcmcout$generation, main="b)", xlab="Generations",ylab="Number of shifts")
plot(postburn$logLik, type="l", main="c)", xlab="Generations",ylab="log Likelihood")
plot(postburn$N_shifts ~ postburn$generation, main="d)", xlab="Generations",ylab="Number of shifts")

Save the plot in PDF:

pdf(paste0(path_to_figures, "BAMM - 1. Assessing MCMC convergence of the BAMM run - ", data_suffix, ".pdf"))
par(mfrow=c(2,2))
plot(mcmcout$logLik, type="l", main="a)", xlab="Generations",ylab="log Likelihood")
plot(mcmcout$N_shifts ~ mcmcout$generation, main="b)", xlab="Generations",ylab="Number of shifts")
plot(postburn$logLik, type="l", main="c)", xlab="Generations",ylab="log Likelihood")
plot(postburn$N_shifts ~ postburn$generation, main="d)", xlab="Generations",ylab="Number of shifts")
dev.off()

## png 
##   2

Getting BAMM results

library(BAMMtools)
tree <- read.tree(path_to_tree)
edata <- getEventData(tree, eventdata = paste0(path_to_output, "event_data.txt"), burnin=0.1)

## Reading event datafile:  outputs/BAMM/event_data.txt 
##      ...........
## Read a total of 200 samples from posterior
## 
## Discarded as burnin: GENERATIONS <  950000
## Analyzing  181  samples from posterior
## 
## Setting recursive sequence on tree...
## 
## Done with recursive sequence

Analysis of rate shifts

How many rate shifts

Compute the posterior probabilities of the models sampled using BAMM:

post_probs <- table(postburn$N_shifts) / nrow(postburn)
post_probs

## 
##           0           1           2           3 
## 0.847946726 0.119866815 0.022197558 0.009988901

shift_probs <- summary(edata)

## 
## Analyzed 181 posterior samples
## Shift posterior distribution:
## 
##          0     0.8400
##          1     0.1300
##          2     0.0220
##          3     0.0055
## 
## Compute credible set of shift configurations for more information:
##  See ?credibleShiftSet and ?getBestShiftConfiguration

Posterior probability of the expected number of rate shifts.

par(mfrow=c(1,1))
plot(post_probs, xlim=c(0,8), main="", ylab="Posterior", xlab="Number of shifts")

Save the plot in PDF:

pdf(paste0(path_to_figures, "BAMM - 2. Posterior for number of shifts - ", data_suffix, ".pdf"))
plot(post_probs, xlim=c(0,8), main="", ylab="Posterior", xlab="Number of shifts")
dev.off()

## png 
##   2

Compute Bayes Factor

bfmat <- computeBayesFactors(mcmcout, expectedNumberOfShifts=1, burnin=0.1)
bfmat[,1]

##          0          1          2          3 
## 1.00000000 0.28272251 0.10471204 0.09424084

Here it means that the models with 1, 2 or 3 shifts are not better than the null model (the model with 0 shift).

Mean Phylo rate plot

Speciation rate through time:

# plot.bammdata(edata, tau=0.001, spex="s", legend=T, lwd=1.5, logcolor = F,  breaksmethod = "linear")
ratesHistogram(plot.bammdata(edata, tau=0.001, spex="s", legend=T, lwd=1.5), plotBrks = TRUE, xlab = "Speciation rate")

axis(side = 1, at = c(0, 0.17, 0.175, 0.180, 0.185, 0.190, 0.195, 0.20, 0.205, 0.21, 0.215, 0.3),  cex.axis = 0.75, tcl = NA, mgp = c(0,  0.25, 0))

Extinction rate through time:

ratesHistogram(plot.bammdata(edata, tau=0.001, spex="e", legend=T, lwd=1.5), plotBrks = TRUE, xlab = "Extinction rate")

Net diversification rate through time:

ratesHistogram(plot.bammdata(edata, tau=0.001, spex="netdiv", legend=T, lwd=1.5), plotBrks = TRUE, xlab = "Net diversification rate")

axis(side = 1, at = c(0, 0.15, 0.155, 0.16, 0.165, 0.17, 0.175, 0.180),  cex.axis = 0.75, tcl = NA, mgp = c(0,  0.25, 0))

Save the plots in PDF:

pdf(paste0(path_to_figures, "BAMM - 3. Speciation rate through time of Monodoreae - ", data_suffix, ".pdf"))
plot.bammdata(edata, tau=0.001, spex="s", legend=T, lwd=2, labels = T, cex = 0.45)
dev.off()

## png 
##   2

pdf(paste0(path_to_figures, "BAMM - 4. Extinction rate through time of Monodoreae - ", data_suffix, ".pdf"))
plot.bammdata(edata, tau=0.001, spex="e", legend=T, lwd=2, labels = T, cex = 0.45)
dev.off()

## png 
##   2

pdf(paste0(path_to_figures, "BAMM - 5. Net diversification rate through time of Monodoreae - ", data_suffix, ".pdf"))
plot.bammdata(edata, tau=0.001, spex="netdiv", legend=T, lwd=2, labels = T, cex = 0.45)
dev.off()

## png 
##   2

Bayesian credible sets of shift configurations

css <- credibleShiftSet(edata, expectedNumberOfShifts=1, threshold=5, set.limit = 0.95)
css$number.distinct

## [1] 1

summary(css)

## 
##  95 % credible set of rate shift configurations sampled with BAMM
## 
## Distinct shift configurations in credible set:  1
## 
## Frequency of 1 shift configurations with highest posterior probability:
## 
## 
##    rank     probability cumulative  Core_shifts
##          1  0.9834254  0.9834254          0

# plot.credibleshiftset(css, spex="netdiv", BFcriterion=5, border=F, pal="temperature")
# plot.credibleshiftset(css, spex="s", BFcriterion=5, border=F, pal="temperature")
# plot.credibleshiftset(css, spex="e", BFcriterion=5, border=F, pal="temperature")

Finding the single best shift configuration

best <- getBestShiftConfiguration(edata, expectedNumberOfShifts=1)

## Processing event data from data.frame
## 
## Discarded as burnin: GENERATIONS <  0
## Analyzing  1  samples from posterior
## 
## Setting recursive sequence on tree...
## 
## Done with recursive sequence

{plot.bammdata(best, lwd = 2)
addBAMMshifts(best, cex=2.5)}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

BAMM.md

BAMM.md

Bayesian Analysis of Macroevolutionary Mixtures

Linux installation

BAMM analysis

Input data

Check the prerequesities of the tree

Prepare the configuration file

Prepare using BAMMtools

Run the analysis

Assess MCMC convergence

Getting BAMM results

Analysis of rate shifts

How many rate shifts

Compute Bayes Factor

Mean Phylo rate plot

Bayesian credible sets of shift configurations

Finding the single best shift configuration

Files

BAMM.md

Latest commit

History

BAMM.md

File metadata and controls

Bayesian Analysis of Macroevolutionary Mixtures

Linux installation

BAMM analysis

Input data

Check the prerequesities of the tree

Prepare the configuration file

Prepare using BAMMtools

Run the analysis

Assess MCMC convergence

Getting BAMM results

Analysis of rate shifts

How many rate shifts

Compute Bayes Factor

Mean Phylo rate plot

Bayesian credible sets of shift configurations

Finding the single best shift configuration