Skip to content

Commit

Permalink
dtwf/ped stats tests
Browse files Browse the repository at this point in the history
  • Loading branch information
GertjanBisschop committed Dec 5, 2023
1 parent 873c2cc commit 9ebb2ea
Showing 1 changed file with 136 additions and 9 deletions.
145 changes: 136 additions & 9 deletions verification.py
Original file line number Diff line number Diff line change
Expand Up @@ -1658,26 +1658,32 @@ def run_dtwf_pedigree_comparison(
recombination_rate=0,
num_replicates=100,
pedigree_sim_direction="forward",
additional_nodes=None,
):
df = pd.DataFrame()

def replicates_data(replicates, model):
data = collections.defaultdict(list)
for ts in replicates:
t_mrca = np.zeros(ts.num_trees)
num_roots = np.zeros(ts.num_trees)
t_intervals = []
total_branch_length = np.zeros(ts.num_trees)
for tree in ts.trees():
t_mrca[tree.index] = max(tree.time(root) for root in tree.roots)
t_intervals.append(tree.interval)
num_roots[tree.index] = tree.num_roots
total_branch_length[tree.index] = tree.total_branch_length
data["num_roots"].append(np.mean(num_roots))
data["tmrca_mean"].append(np.mean(t_mrca))
data["num_trees"].append(ts.num_trees)
data["intervals"].append(t_intervals)
data["total_branch_length"].append(np.mean(total_branch_length))
data["model"].append(model)
return pd.DataFrame(data)

df_list = []
coalescing_segments_only = True
if additional_nodes is not None:
coalescing_segments_only = False
for _ in range(num_replicates):
pedigree = pedigrees.sim_pedigree(
population_size=N, end_time=end_time, direction=pedigree_sim_direction
Expand All @@ -1687,8 +1693,11 @@ def replicates_data(replicates, model):
initial_state=pedigree,
recombination_rate=recombination_rate,
model="fixed_pedigree",
additional_nodes=additional_nodes,
coalescing_segments_only=coalescing_segments_only,
)
df = df.append(replicates_data([ts_ped], "dtwf|ped"))

df_list.append(replicates_data([ts_ped], "dtwf|ped"))

dtwf_replicates = msprime.sim_ancestry(
samples=N,
Expand All @@ -1698,14 +1707,16 @@ def replicates_data(replicates, model):
sequence_length=sequence_length,
model="dtwf",
num_replicates=num_replicates,
additional_nodes=additional_nodes,
coalescing_segments_only=coalescing_segments_only,
)
df = df.append(replicates_data(dtwf_replicates, "dtwf"))
return df
df_list.append(replicates_data(dtwf_replicates, "dtwf"))
return pd.concat(df_list)

def plot_coalescent_stats(self, df):
df_ped = df[df.model == "dtwf|ped"]
df_dtwf = df[df.model == "dtwf"]
for stat in ["tmrca_mean", "num_trees", "num_roots"]:
for stat in ["tmrca_mean", "num_trees", "num_roots", "total_branch_length"]:
plot_qq(df_ped[stat], df_dtwf[stat])
pyplot.xlabel("dtwf|ped")
pyplot.ylabel("dtwf")
Expand All @@ -1724,6 +1735,14 @@ def _run(self, **kwargs):
df = self.run_dtwf_pedigree_comparison(**kwargs)
self.plot_coalescent_stats(df)


class DtwfVsPedigreeSimple(DtwfVsPedigree):
"""
Running a simulation through a pedigree with population size N
should be identical to running a DTWF simulation of the same
size.
"""

def test_dtwf_vs_pedigree_single_locus_n50(self):
self._run(N=50, end_time=500, num_replicates=100)

Expand Down Expand Up @@ -1772,6 +1791,44 @@ def test_dtwf_vs_pedigree_recomb_discrete_hotspots(self):
)


class DtwfVsPedigreeAdditionalNodes(DtwfVsPedigree):
"""
Running a simulation through a pedigree with population size N
should be identical to running a DTWF simulation of the same
size. Tests impact of registering additional nodes.
"""

def test_dtwf_vs_pedigree_many_roots_add_nodes(self):
additional_nodes = (
msprime.NodeType.RECOMBINANT
| msprime.NodeType.PASS_THROUGH
| msprime.NodeType.COMMON_ANCESTOR
)
self._run(
N=500,
end_time=100,
num_replicates=100,
sequence_length=100,
recombination_rate=0.0001,
additional_nodes=additional_nodes,
)

def test_dtwf_vs_pedigree_few_roots_add_nodes(self):
additional_nodes = (
msprime.NodeType.RECOMBINANT
| msprime.NodeType.PASS_THROUGH
| msprime.NodeType.COMMON_ANCESTOR
)
self._run(
N=10,
end_time=1000,
num_replicates=100,
sequence_length=100,
recombination_rate=0.0001,
additional_nodes=additional_nodes,
)


class DtwfVsRecapitatedPedigree(Test):
"""
Running a simulation through a pedigree with population size N
Expand Down Expand Up @@ -1912,7 +1969,7 @@ class DtwfVsCoalescent(Test):
"""

def run_dtwf_coalescent_stats(self, **kwargs):
df = pd.DataFrame()
df_list = []

for model in ["hudson", "dtwf"]:
kwargs["model"] = model
Expand All @@ -1930,8 +1987,8 @@ def run_dtwf_coalescent_stats(self, **kwargs):
data["num_trees"].append(ts.num_trees)
data["intervals"].append(t_intervals)
data["model"].append(model)
df = df.append(pd.DataFrame(data))
return df
df_list.append(pd.DataFrame(data))
return pd.concat(df_list)

def plot_dtwf_coalescent_stats(self, df):
df_hudson = df[df.model == "hudson"]
Expand Down Expand Up @@ -2232,6 +2289,76 @@ def test_dtwf_vs_coalescent_2_pops_high_asymm_mig(self):
)


class DtwfVsCoalescentAdditionalNodes(DtwfVsCoalescent):
"""
Comparison of DTWF with additional nodes against coalescent sims
without additional nodes.
"""

def run_dtwf_coalescent_stats(self, **kwargs):
df_list = []

for model in ["dtwf", "hudson"]:
kwargs["model"] = model
if model == "hudson":
kwargs["additional_nodes"] = None
kwargs["coalescing_segments_only"] = True
logging.debug(f"Running: {kwargs}")
data = collections.defaultdict(list)
replicates = msprime.sim_ancestry(**kwargs)
for ts in replicates:
tss = ts.simplify()
t_mrca = np.zeros(tss.num_trees)
t_intervals = []
for tree in tss.trees():
t_mrca[tree.index] = tree.time(tree.root)
t_intervals.append(tree.interval)
data["tmrca_mean"].append(np.mean(t_mrca))
data["num_trees"].append(tss.num_trees)
data["intervals"].append(t_intervals)
data["model"].append(model)
df_list.append(pd.DataFrame(data))
return pd.concat(df_list)

def test_dtwf_vs_coalescent_pass_through_only(self):
"""
Checks the DTWF against the standard coalescent while
registering all pass through nodes.
"""
additional_nodes = msprime.NodeType.PASS_THROUGH
self._run(
samples=10,
population_size=1000,
recombination_rate=1e-5,
num_replicates=300,
sequence_length=1000,
discrete_genome=True,
coalescing_segments_only=False,
additional_nodes=additional_nodes,
)

def test_dtwf_vs_coalescent_all_additional_nodes(self):
"""
Checks the DTWF against the standard coalescent while
registering all possible additional nodes.
"""
additional_nodes = (
msprime.NodeType.RECOMBINANT
| msprime.NodeType.PASS_THROUGH
| msprime.NodeType.COMMON_ANCESTOR
)
self._run(
samples=10,
population_size=1000,
recombination_rate=1e-5,
num_replicates=300,
sequence_length=1000,
discrete_genome=True,
coalescing_segments_only=False,
additional_nodes=additional_nodes,
)


class DtwfVsSlim(Test):
"""
Tests where we compare the DTWF with SLiM simulations.
Expand Down

0 comments on commit 9ebb2ea

Please sign in to comment.