Skip to content

Commit

Permalink
CODE: signature
Browse files Browse the repository at this point in the history
Update signature of  `sim_trait`, `sim_env`, and `sim_phenotype`.
  • Loading branch information
daikitag authored and mergify[bot] committed Nov 22, 2023
1 parent 29be232 commit 913ccb1
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 17 deletions.
73 changes: 72 additions & 1 deletion tests/test_simulate_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def test_bad_input_h2(self, sample_df, sample_two_trait_df):
with pytest.raises(
ValueError, match="Length of h2 must match the number of traits"
):
tstrait.sim_env(genetic_df=sample_two_trait_df, h2=0.3)
tstrait.sim_env(genetic_df=sample_two_trait_df, h2=[0.3])

with pytest.raises(
ValueError, match="Narrow-sense heritability must be 0 < h2 <= 1"
Expand Down Expand Up @@ -198,6 +198,19 @@ def test_h2_one(self, sample_df, sample_two_trait_df):
two_trait_df["environmental_noise"], np.zeros(len(two_trait_df))
)

def test_h2_one_default(self, sample_df, sample_two_trait_df):
single_df = tstrait.sim_env(genetic_df=sample_df)
self.check_dimensions(single_df, len(sample_df))
np.testing.assert_equal(
single_df["environmental_noise"], np.zeros(len(single_df))
)

two_trait_df = tstrait.sim_env(genetic_df=sample_two_trait_df)
self.check_dimensions(two_trait_df, len(sample_two_trait_df))
np.testing.assert_equal(
two_trait_df["environmental_noise"], np.zeros(len(two_trait_df))
)


class Test_KSTest:
"""Test the distribution of environmental noise by using a Kolmogorov-Smirnov test"""
Expand Down Expand Up @@ -299,3 +312,61 @@ def test_env_multiple(self):
"norm",
(0, sd),
)

def test_env_multiple_single_h2(self):
num_ind = 10_000
num_causal = 1_000

np.random.seed(10)
n = 3
mean = np.random.randn(n)
M = np.random.randn(n, n)
cov = np.dot(M, M.T)

h2 = 0.3
ts = msprime.sim_ancestry(num_ind, sequence_length=100_000, random_seed=1)
ts = msprime.sim_mutations(ts, rate=0.01, random_seed=1)

model_multiple = tstrait.trait_model(
distribution="multi_normal", mean=mean, cov=cov
)

trait_df = tstrait.sim_trait(
ts=ts, num_causal=num_causal, model=model_multiple, random_seed=11
)
genetic_result = tstrait.genetic_value(ts=ts, trait_df=trait_df)
phenotype_df = tstrait.sim_env(genetic_df=genetic_result, h2=h2, random_seed=10)
phenotype_df1 = tstrait.sim_env(
genetic_df=genetic_result, h2=h2, random_seed=20
)

phenotype_df = pd.concat([phenotype_df, phenotype_df1])

phenotype_df = phenotype_df.reset_index()
del phenotype_df["index"]

const = np.random.randn(n)
sd_array = np.zeros(3)

sum_data = np.zeros(2 * num_ind)

h2_array = np.ones(n) * h2
for i in range(n):
df = phenotype_df.loc[phenotype_df.trait_id == i]
var = np.var(df["genetic_value"])
sd = np.sqrt((1 - h2_array[i]) / h2_array[i] * var)
self.check_distribution(
df["environmental_noise"],
"norm",
(0, sd),
)
sum_data += df["environmental_noise"] * const[i]
sd_array[i] = sd

sd = np.dot(sd_array, const)

self.check_distribution(
sum_data,
"norm",
(0, sd),
)
2 changes: 1 addition & 1 deletion tests/test_simulate_phenotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def test_bad_input_h2(self, sample_ts, sample_trait_model):
ts=sample_ts, num_causal=5, model=trait_model, h2=[0.1, 0]
)

@pytest.mark.parametrize("h2", [0.1, [0.2, 0.3, 0.4]])
@pytest.mark.parametrize("h2", [[0.1], [0.2, 0.3, 0.4]])
def test_h2_dim(self, sample_ts, h2):
with pytest.raises(
ValueError, match="Length of h2 must match the number of traits"
Expand Down
9 changes: 5 additions & 4 deletions tstrait/simulate_effect_size.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def _run(self):
return trait_df


def sim_trait(ts, model, *, num_causal=None, alpha=0, random_seed=None):
def sim_trait(ts, model, *, num_causal=None, alpha=None, random_seed=None):
"""
Randomly selects causal sites and the corresponding causal allele, and simulates
effect sizes for each of the chosen causal site.
Expand All @@ -189,12 +189,12 @@ def sim_trait(ts, model, *, num_causal=None, alpha=0, random_seed=None):
simulation.
model : tstrait.TraitModel
Trait model that will be used to simulate effect sizes.
num_causal : int, default 1
num_causal : int, default None
Number of causal sites. If None, number of causal sites will be 1.
alpha : float, default 0
alpha : float, default None
Parameter that determines the degree of the frequency dependence model. Please
see :ref:`frequency_dependence` for details on how this parameter influences
effect size simulation.
effect size simulation. If None, alpha will be 0.
random_seed : int, default None
Random seed of simulation. If None, simulation will be conducted randomly.
Expand Down Expand Up @@ -235,6 +235,7 @@ def sim_trait(ts, model, *, num_causal=None, alpha=0, random_seed=None):
model = _check_instance(model, "model", tstrait.TraitModel)
num_causal = 1 if num_causal is None else num_causal
num_causal = _check_int(num_causal, "num_causal", minimum=1)
alpha = 0 if alpha is None else alpha
alpha = _check_val(alpha, "alpha")
num_sites = ts.num_sites
if num_sites == 0:
Expand Down
12 changes: 7 additions & 5 deletions tstrait/simulate_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,18 @@ def _sim_environment(self):
return df


def sim_env(genetic_df, h2, random_seed=None):
def sim_env(genetic_df, *, h2=None, random_seed=None):
"""
Simulates environmental noise.
Parameters
----------
genetic_df : pandas.DataFrame
Genetic value dataframe.
h2 : float or array-like
Narrow-sense heritability. When it is 0, environmental noise will be a vector of
zeros. The dimension of `h2` must match the number of traits to be simulated.
h2 : float or array-like, default None.
Narrow-sense heritability. When it is 1, environmental noise will be a vector of
zeros. If `h2` is array-like, the dimension of `h2` must match the number of
traits to be simulated. If None, h2 will be 1.
random_seed : int, default None
Random seed of simulation. If None, simulation will be conducted randomly.
Expand Down Expand Up @@ -116,8 +117,9 @@ def sim_env(genetic_df, h2, random_seed=None):
if np.min(trait_id) != 0 or np.max(trait_id) != len(trait_id) - 1:
raise ValueError("trait_id must be consecutive and start from 0")

h2 = 1 if h2 is None else h2
if isinstance(h2, numbers.Real):
h2 = np.array([h2])
h2 = np.ones(len(trait_id)) * h2

if len(h2) != len(trait_id):
raise ValueError("Length of h2 must match the number of traits")
Expand Down
13 changes: 7 additions & 6 deletions tstrait/simulate_phenotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class PhenotypeResult:
phenotype: pd.DataFrame


def sim_phenotype(ts, model, h2, *, num_causal=None, alpha=0, random_seed=None):
def sim_phenotype(ts, model, *, num_causal=None, alpha=None, h2=None, random_seed=None):
"""
Simulate quantitative traits.
Expand All @@ -42,15 +42,16 @@ def sim_phenotype(ts, model, h2, *, num_causal=None, alpha=0, random_seed=None):
simulation.
model : tstrait.TraitModel
Trait model that will be used to simulate effect sizes.
h2 : float or array-like
Narrow-sense heritability. When it is 0, environmental noise will be a vector of
zeros. The dimension of `h2` must match the number of traits to be simulated.
num_causal : int, default None
Number of causal sites. If None, number of causal sites will be 1.
alpha : float, default 0
alpha : float, default None
Parameter that determines the degree of the frequency dependence model. Please
see :ref:`frequency_dependence` for details on how this parameter influences
effect size simulation.
effect size simulation. If None, alpha will be 0.
h2 : float or array-like, default None.
Narrow-sense heritability. When it is 1, environmental noise will be a vector of
zeros. If `h2` is array-like, the dimension of `h2` must match the number of
traits to be simulated. If None, h2 will be 1.
random_seed : int, default None
Random seed of simulation. If None, simulation will be conducted randomly.
Expand Down

0 comments on commit 913ccb1

Please sign in to comment.