Skip to content

Commit

Permalink
normalise
Browse files Browse the repository at this point in the history
Add `normalise_phenotype` function in tstrait.
  • Loading branch information
daikitag committed Feb 6, 2024
1 parent 0729682 commit 5053545
Showing 1 changed file with 92 additions and 0 deletions.
92 changes: 92 additions & 0 deletions verification.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,98 @@ def model_list(loc, scale):
return distr


class TestNormalise(Test):
"""
Test the `normalise_phenotypes` function by using the simulation codes
that are adapted from the ARG-Needle paper,
https://zenodo.org/records/7745746. The simulation framework assumes
that all sites are causal.
"""

def _sim_arg_needle(self, ts, h2, alpha, random_seed):
"""
Phenotype simulation framework that is adaped from the ARG-Needle paper.
"""

rng = np.random.default_rng(random_seed)
num_ind = ts.num_individuals
phenotypes = np.zeros(num_ind)

for variant in ts.variants():
row = variant.genotypes.astype("float64")
row = row.reshape((num_ind, 2)).sum(axis=-1)
std = np.std(row, ddof=1)
beta = rng.normal()
phenotypes += row * (beta * std**alpha)

# normalize pheno to have mean 0 and var = h2
phenotypes -= np.mean(phenotypes)
phenotypes /= np.std(phenotypes, ddof=1)
phenotypes *= np.sqrt(h2)

# sample environmental component with variance 1-h2 and add it to phenotype
phenotypes += rng.normal(size=num_ind) * np.sqrt(1 - h2)

# normalize it all
phenotypes -= np.mean(phenotypes)
phenotypes /= np.std(phenotypes, ddof=1)

return phenotypes

def _simulate_tstrait_normalise(self, ts, trait_model, h2, alpha, random_seed):
"""
Simulate phenotypes and normalise it assuming that all sites are causal.
"""
sim_result = tstrait.sim_phenotype(
ts=ts,
num_causal=ts.num_sites,
model=trait_model,
h2=h2,
alpha=alpha,
random_seed=random_seed,
)
phenotype_df = sim_result.phenotype
normalised_df = tstrait.normalise_phenotypes(phenotype_df)

return normalised_df["phenotype"].values

def test_compare_normalise(self):
"""
We are comparing with the infinite-sites simulation to be consistent
with the ARG-Needle simulation.
"""
ts = msprime.sim_ancestry(
samples=1000,
recombination_rate=1e-8,
sequence_length=100_000,
population_size=10_000,
random_seed=123456,
)
ts = msprime.sim_mutations(
ts, rate=1e-8, random_seed=1234, discrete_genome=False
)
model = tstrait.trait_model(distribution="normal", mean=0, var=1)
h2 = 0.3
random_seed = 1234
for alpha in [0, -1]:
tstrait_phenotype = self._simulate_tstrait_normalise(
ts=ts,
trait_model=model,
h2=h2,
alpha=alpha,
random_seed=random_seed + alpha,
)
arg_needle_phenotype = self._sim_arg_needle(
ts=ts, h2=h2, alpha=alpha, random_seed=random_seed + alpha
)
self._plot_qq_compare(
data1=tstrait_phenotype,
data1_name=f"tstrait_alpha={alpha}",
data2=arg_needle_phenotype,
data2_name=f"arg_needle_alpha={alpha}",
)


class EffectSizeDistribution(Test):
"""
Examine the statistical properties of simulated effect sizes.
Expand Down

0 comments on commit 5053545

Please sign in to comment.