Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dtwf/ped stats tests #2225

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 136 additions & 9 deletions verification.py
Original file line number Diff line number Diff line change
Expand Up @@ -1658,26 +1658,32 @@ def run_dtwf_pedigree_comparison(
recombination_rate=0,
num_replicates=100,
pedigree_sim_direction="forward",
additional_nodes=None,
):
df = pd.DataFrame()

def replicates_data(replicates, model):
data = collections.defaultdict(list)
for ts in replicates:
t_mrca = np.zeros(ts.num_trees)
num_roots = np.zeros(ts.num_trees)
t_intervals = []
total_branch_length = np.zeros(ts.num_trees)
for tree in ts.trees():
t_mrca[tree.index] = max(tree.time(root) for root in tree.roots)
t_intervals.append(tree.interval)
num_roots[tree.index] = tree.num_roots
total_branch_length[tree.index] = tree.total_branch_length
data["num_roots"].append(np.mean(num_roots))
data["tmrca_mean"].append(np.mean(t_mrca))
data["num_trees"].append(ts.num_trees)
data["intervals"].append(t_intervals)
data["total_branch_length"].append(np.mean(total_branch_length))
data["model"].append(model)
return pd.DataFrame(data)

df_list = []
coalescing_segments_only = True
if additional_nodes is not None:
coalescing_segments_only = False
for _ in range(num_replicates):
pedigree = pedigrees.sim_pedigree(
population_size=N, end_time=end_time, direction=pedigree_sim_direction
Expand All @@ -1687,8 +1693,11 @@ def replicates_data(replicates, model):
initial_state=pedigree,
recombination_rate=recombination_rate,
model="fixed_pedigree",
additional_nodes=additional_nodes,
coalescing_segments_only=coalescing_segments_only,
)
df = df.append(replicates_data([ts_ped], "dtwf|ped"))

df_list.append(replicates_data([ts_ped], "dtwf|ped"))

dtwf_replicates = msprime.sim_ancestry(
samples=N,
Expand All @@ -1698,14 +1707,16 @@ def replicates_data(replicates, model):
sequence_length=sequence_length,
model="dtwf",
num_replicates=num_replicates,
additional_nodes=additional_nodes,
coalescing_segments_only=coalescing_segments_only,
)
df = df.append(replicates_data(dtwf_replicates, "dtwf"))
return df
df_list.append(replicates_data(dtwf_replicates, "dtwf"))
return pd.concat(df_list)

def plot_coalescent_stats(self, df):
df_ped = df[df.model == "dtwf|ped"]
df_dtwf = df[df.model == "dtwf"]
for stat in ["tmrca_mean", "num_trees", "num_roots"]:
for stat in ["tmrca_mean", "num_trees", "num_roots", "total_branch_length"]:
plot_qq(df_ped[stat], df_dtwf[stat])
pyplot.xlabel("dtwf|ped")
pyplot.ylabel("dtwf")
Expand All @@ -1724,6 +1735,14 @@ def _run(self, **kwargs):
df = self.run_dtwf_pedigree_comparison(**kwargs)
self.plot_coalescent_stats(df)


class DtwfVsPedigreeSimple(DtwfVsPedigree):
"""
Running a simulation through a pedigree with population size N
should be identical to running a DTWF simulation of the same
size.
"""

def test_dtwf_vs_pedigree_single_locus_n50(self):
self._run(N=50, end_time=500, num_replicates=100)

Expand Down Expand Up @@ -1772,6 +1791,44 @@ def test_dtwf_vs_pedigree_recomb_discrete_hotspots(self):
)


class DtwfVsPedigreeAdditionalNodes(DtwfVsPedigree):
"""
Running a simulation through a pedigree with population size N
should be identical to running a DTWF simulation of the same
size. Tests impact of registering additional nodes.
"""

def test_dtwf_vs_pedigree_many_roots_add_nodes(self):
additional_nodes = (
msprime.NodeType.RECOMBINANT
| msprime.NodeType.PASS_THROUGH
| msprime.NodeType.COMMON_ANCESTOR
)
self._run(
N=500,
end_time=100,
num_replicates=100,
sequence_length=100,
recombination_rate=0.0001,
additional_nodes=additional_nodes,
)

def test_dtwf_vs_pedigree_few_roots_add_nodes(self):
additional_nodes = (
msprime.NodeType.RECOMBINANT
| msprime.NodeType.PASS_THROUGH
| msprime.NodeType.COMMON_ANCESTOR
)
self._run(
N=10,
end_time=1000,
num_replicates=100,
sequence_length=100,
recombination_rate=0.0001,
additional_nodes=additional_nodes,
)


class DtwfVsRecapitatedPedigree(Test):
"""
Running a simulation through a pedigree with population size N
Expand Down Expand Up @@ -1912,7 +1969,7 @@ class DtwfVsCoalescent(Test):
"""

def run_dtwf_coalescent_stats(self, **kwargs):
df = pd.DataFrame()
df_list = []

for model in ["hudson", "dtwf"]:
kwargs["model"] = model
Expand All @@ -1930,8 +1987,8 @@ def run_dtwf_coalescent_stats(self, **kwargs):
data["num_trees"].append(ts.num_trees)
data["intervals"].append(t_intervals)
data["model"].append(model)
df = df.append(pd.DataFrame(data))
return df
df_list.append(pd.DataFrame(data))
return pd.concat(df_list)

def plot_dtwf_coalescent_stats(self, df):
df_hudson = df[df.model == "hudson"]
Expand Down Expand Up @@ -2232,6 +2289,76 @@ def test_dtwf_vs_coalescent_2_pops_high_asymm_mig(self):
)


class DtwfVsCoalescentAdditionalNodes(DtwfVsCoalescent):
"""
Comparison of DTWF with additional nodes against coalescent sims
without additional nodes.
"""

def run_dtwf_coalescent_stats(self, **kwargs):
df_list = []

for model in ["dtwf", "hudson"]:
kwargs["model"] = model
if model == "hudson":
kwargs["additional_nodes"] = None
kwargs["coalescing_segments_only"] = True
logging.debug(f"Running: {kwargs}")
data = collections.defaultdict(list)
replicates = msprime.sim_ancestry(**kwargs)
for ts in replicates:
tss = ts.simplify()
t_mrca = np.zeros(tss.num_trees)
t_intervals = []
for tree in tss.trees():
t_mrca[tree.index] = tree.time(tree.root)
t_intervals.append(tree.interval)
data["tmrca_mean"].append(np.mean(t_mrca))
data["num_trees"].append(tss.num_trees)
data["intervals"].append(t_intervals)
data["model"].append(model)
df_list.append(pd.DataFrame(data))
return pd.concat(df_list)

def test_dtwf_vs_coalescent_pass_through_only(self):
"""
Checks the DTWF against the standard coalescent while
registering all pass through nodes.
"""
additional_nodes = msprime.NodeType.PASS_THROUGH
self._run(
samples=10,
population_size=1000,
recombination_rate=1e-5,
num_replicates=300,
sequence_length=1000,
discrete_genome=True,
coalescing_segments_only=False,
additional_nodes=additional_nodes,
)

def test_dtwf_vs_coalescent_all_additional_nodes(self):
"""
Checks the DTWF against the standard coalescent while
registering all possible additional nodes.
"""
additional_nodes = (
msprime.NodeType.RECOMBINANT
| msprime.NodeType.PASS_THROUGH
| msprime.NodeType.COMMON_ANCESTOR
)
self._run(
samples=10,
population_size=1000,
recombination_rate=1e-5,
num_replicates=300,
sequence_length=1000,
discrete_genome=True,
coalescing_segments_only=False,
additional_nodes=additional_nodes,
)


class DtwfVsSlim(Test):
"""
Tests where we compare the DTWF with SLiM simulations.
Expand Down
Loading