Skip to content

Commit

Permalink
CODE/DOC: Centre
Browse files Browse the repository at this point in the history
Add the argument `centre` in `genetic_value` function to modify the numericalisation of effect sizes.
  • Loading branch information
daikitag committed Feb 6, 2024
1 parent 5053545 commit 8a782ad
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 59 deletions.
6 changes: 4 additions & 2 deletions docs/effect-size.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,10 @@ y = X\beta+\epsilon,
```

where $X$ is the matrix that describes the number of causal alleles in each individual, $\beta$
is the vector of effect sizes, and $\epsilon$ is the vector of environmental noise. Environmental
noise is simulated from the following distribution,
is the vector of effect sizes, and $\epsilon$ is the vector of environmental noise.
By default, the genotypes are numericalised as $(AA=1, Aa=0, aa=-1)$, where $A$ denotes the
causal allele, and it can also be numericalised as $(AA=2, Aa=1, aa=0)$ by modifying the
`centre` input. Environmental noise is simulated from the following distribution,

```{math}
\epsilon\sim N\left(0,V_G\cdot\frac{1-h^2}{h^2} \right),
Expand Down
110 changes: 64 additions & 46 deletions tests/test_genetic_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,6 @@ def sample_df():
return df


@pytest.fixture(scope="class")
def sample_trait_model():
return tstrait.trait_model(distribution="normal", mean=0, var=1)


class TestInput:
"""This test will check that an informative error is raised when the input parameter
does not have an appropriate type or value.
Expand Down Expand Up @@ -94,6 +89,13 @@ def test_no_individual(self, sample_df):
):
tstrait.genetic_value(ts=ts, trait_df=df)

def test_centre(self, sample_ts, sample_df):
with pytest.raises(
TypeError,
match="centre must be a <class 'bool'> instance",
):
tstrait.genetic_value(ts=sample_ts, trait_df=sample_df, centre="centre")


class TestOutputDim:
"""Check that the genetic_value function gives the output with correct dimensions"""
Expand Down Expand Up @@ -166,14 +168,27 @@ def test_additional_row(self, sample_ts, sample_df):
df["height"] = [170, 180]
tstrait.genetic_value(ts=sample_ts, trait_df=df)

def test_default_centre(self, sample_ts, sample_df):
"""
Check that the default centre is True
"""
genetic_df_default = tstrait.genetic_value(ts=sample_ts, trait_df=sample_df)
genetic_df_True = tstrait.genetic_value(
ts=sample_ts, trait_df=sample_df, centre=True
)
pd.testing.assert_frame_equal(
genetic_df_default, genetic_df_True, check_dtype=False
)


class TestGenotype:
"""Test the `_individual_genetic_values` and `_obtain_allele_count` method and
check if they can accurately detect the individual genotype. Afterwards, we will
check the output of `genetic_value`.
"""

def test_binary_tree(self):
@pytest.mark.parametrize("centre", [True, False])
def test_binary_tree(self, centre):
trait_df = pd.DataFrame(
{
"site_id": [0, 2, 3],
Expand All @@ -182,33 +197,32 @@ def test_binary_tree(self):
"causal_allele": ["T", "C", "T"],
}
)

ts = binary_tree()
tree = ts.first()
genetic = _GeneticValue(ts, trait_df)
genetic = _GeneticValue(ts, trait_df, centre=centre)
g0 = genetic._individual_genetic_values(tree, ts.site(0), "T", 1)
g1 = genetic._individual_genetic_values(tree, ts.site(1), "T", 2)
g2 = genetic._individual_genetic_values(tree, ts.site(2), "C", 3)
g3 = genetic._individual_genetic_values(tree, ts.site(3), "C", 4)

np.testing.assert_equal(g0, np.array([1, 0, 2]) * 1)
np.testing.assert_equal(g1, np.array([1, 1, 0]) * 2)
np.testing.assert_equal(g2, np.array([0, 1, 0]) * 3)
np.testing.assert_equal(g3, np.array([1, 2, 0]) * 4)

genetic_result = tstrait.genetic_value(ts=ts, trait_df=trait_df)
np.testing.assert_equal(g0, np.array([1, 0, 2] - np.ones(3) * centre) * 1)
np.testing.assert_equal(g1, np.array([1, 1, 0] - np.ones(3) * centre) * 2)
np.testing.assert_equal(g2, np.array([0, 1, 0] - np.ones(3) * centre) * 3)
np.testing.assert_equal(g3, np.array([1, 2, 0] - np.ones(3) * centre) * 4)

genetic_df = pd.DataFrame(
{
"trait_id": [0, 0, 0],
"individual_id": [0, 1, 2],
"genetic_value": [101, 10, 202],
"genetic_value": ([101, 10, 202] - np.array([111, 111, 111]) * centre),
}
)
genetic_result = tstrait.genetic_value(ts=ts, trait_df=trait_df, centre=centre)

pd.testing.assert_frame_equal(genetic_df, genetic_result, check_dtype=False)

def test_diff_ind_tree(self):
@pytest.mark.parametrize("centre", [True, False])
def test_diff_ind_tree(self, centre):
trait_df = pd.DataFrame(
{
"site_id": [0, 2],
Expand All @@ -217,32 +231,32 @@ def test_diff_ind_tree(self):
"causal_allele": ["T", "C"],
}
)

ts = diff_ind_tree()
tree = ts.first()
genetic = _GeneticValue(ts, trait_df)
genetic = _GeneticValue(ts, trait_df, centre=centre)
g0 = genetic._individual_genetic_values(tree, ts.site(0), "T", 1)
g1 = genetic._individual_genetic_values(tree, ts.site(1), "T", 2)
g2 = genetic._individual_genetic_values(tree, ts.site(2), "C", 3)
g3 = genetic._individual_genetic_values(tree, ts.site(3), "C", 4)

np.testing.assert_equal(g0, np.array([1, 1, 1]) * 1)
np.testing.assert_equal(g1, np.array([1, 0, 1]) * 2)
np.testing.assert_equal(g2, np.array([0, 1, 0]) * 3)
np.testing.assert_equal(g3, np.array([1, 1, 1]) * 4)
np.testing.assert_equal(g0, np.array([1, 1, 1] - np.ones(3) * centre))
np.testing.assert_equal(g1, np.array([1, 0, 1] - np.ones(3) * centre) * 2)
np.testing.assert_equal(g2, np.array([0, 1, 0] - np.ones(3) * centre) * 3)
np.testing.assert_equal(g3, np.array([1, 1, 1] - np.ones(3) * centre) * 4)

genetic_result = tstrait.genetic_value(ts=ts, trait_df=trait_df)
genetic_result = tstrait.genetic_value(ts=ts, trait_df=trait_df, centre=centre)
genetic_df = pd.DataFrame(
{
"trait_id": [0, 0, 0],
"individual_id": [0, 1, 2],
"genetic_value": [1, 11, 1],
"genetic_value": [1, 11, 1] - np.array([11, 11, 11]) * centre,
}
)

pd.testing.assert_frame_equal(genetic_df, genetic_result, check_dtype=False)

def test_non_binary_tree(self):
@pytest.mark.parametrize("centre", [True, False])
def test_non_binary_tree(self, centre):
trait_df = pd.DataFrame(
{
"site_id": [0, 1],
Expand All @@ -254,26 +268,28 @@ def test_non_binary_tree(self):

ts = non_binary_tree()
tree = ts.first()
genetic = _GeneticValue(ts, trait_df)

genetic = _GeneticValue(ts, trait_df, centre=centre)
g0 = genetic._individual_genetic_values(tree, ts.site(0), "T", 1)
g1 = genetic._individual_genetic_values(tree, ts.site(1), "C", 2)

np.testing.assert_equal(g0, np.array([0, 1, 2]) * 1)
np.testing.assert_equal(g1, np.array([0, 1, 1]) * 2)
np.testing.assert_equal(g0, np.array([0, 1, 2] - np.ones(3) * centre) * 1)
np.testing.assert_equal(g1, np.array([0, 1, 1] - np.ones(3) * centre) * 2)

genetic_result = tstrait.genetic_value(ts=ts, trait_df=trait_df)
genetic_result = tstrait.genetic_value(ts=ts, trait_df=trait_df, centre=centre)

genetic_df = pd.DataFrame(
{
"trait_id": [0, 0, 0],
"individual_id": [0, 1, 2],
"genetic_value": [2, 1, 10],
"genetic_value": [2, 1, 10] - np.array([11, 11, 11]) * centre,
}
)

pd.testing.assert_frame_equal(genetic_df, genetic_result, check_dtype=False)

def test_triploid(self, sample_df):
@pytest.mark.parametrize("centre", [True, False])
def test_triploid(self, sample_df, centre):
trait_df = pd.DataFrame(
{
"site_id": [0, 1],
Expand All @@ -285,24 +301,25 @@ def test_triploid(self, sample_df):

ts = triploid_tree()
tree = ts.first()
genetic = _GeneticValue(ts, sample_df)
genetic = _GeneticValue(ts, sample_df, centre=centre)
g0 = genetic._individual_genetic_values(tree, ts.site(0), "T", 1)
g1 = genetic._individual_genetic_values(tree, ts.site(1), "C", 2)

np.testing.assert_equal(g0, np.array([1, 2]) * 1)
np.testing.assert_equal(g1, np.array([1, 1]) * 2)
np.testing.assert_equal(g0, np.array([1, 2] - np.ones(2) * centre) * 1)
np.testing.assert_equal(g1, np.array([1, 1] - np.ones(2) * centre) * 2)

genetic_result = tstrait.genetic_value(ts=ts, trait_df=trait_df)
genetic_result = tstrait.genetic_value(ts=ts, trait_df=trait_df, centre=centre)
genetic_df = pd.DataFrame(
{
"trait_id": [0, 0],
"individual_id": [0, 1],
"genetic_value": [11, 12],
"genetic_value": [11, 12] - np.array([11, 11]) * centre,
}
)
pd.testing.assert_frame_equal(genetic_df, genetic_result, check_dtype=False)

def test_allele_freq_one(self):
@pytest.mark.parametrize("centre", [True, False])
def test_allele_freq_one(self, centre):
ts = binary_tree()
tables = ts.dump_tables()
tables.sites.add_row(4, "A")
Expand All @@ -317,12 +334,12 @@ def test_allele_freq_one(self):
"causal_allele": ["A"],
}
)
genetic_result = tstrait.genetic_value(ts=ts, trait_df=trait_df)
genetic_result = tstrait.genetic_value(ts=ts, trait_df=trait_df, centre=centre)
genetic_df = pd.DataFrame(
{
"trait_id": np.zeros(3),
"individual_id": np.arange(3),
"genetic_value": np.ones(3) * 2,
"genetic_value": np.ones(3) * 2 - np.ones(3) * centre,
}
)
pd.testing.assert_frame_equal(genetic_df, genetic_result, check_dtype=False)
Expand All @@ -331,7 +348,8 @@ def test_allele_freq_one(self):
class TestTreeSeq:
"""Test the `genetic_value` function by using a tree sequence data."""

def test_tree_seq(self):
@pytest.mark.parametrize("centre", [True, False])
def test_tree_seq(self, centre):
ts = binary_tree_seq()
trait_df = pd.DataFrame(
{
Expand All @@ -341,20 +359,20 @@ def test_tree_seq(self):
"trait_id": [0, 0, 0],
}
)

genetic_result = tstrait.genetic_value(ts=ts, trait_df=trait_df)
genetic_result = tstrait.genetic_value(ts=ts, trait_df=trait_df, centre=centre)

genetic_df = pd.DataFrame(
{
"trait_id": [0, 0],
"individual_id": [0, 1],
"genetic_value": [1, 22],
"genetic_value": [1, 22] - np.array([11, 11]) * centre,
}
)

pd.testing.assert_frame_equal(genetic_df, genetic_result, check_dtype=False)

def test_tree_seq_multiple_trait(self):
@pytest.mark.parametrize("centre", [True, False])
def test_tree_seq_multiple_trait(self, centre):
ts = binary_tree_seq()
trait_df = pd.DataFrame(
{
Expand All @@ -365,13 +383,13 @@ def test_tree_seq_multiple_trait(self):
}
)

genetic_result = tstrait.genetic_value(ts=ts, trait_df=trait_df)
genetic_result = tstrait.genetic_value(ts=ts, trait_df=trait_df, centre=centre)

genetic_df = pd.DataFrame(
{
"trait_id": [0, 0, 1, 1],
"individual_id": [0, 1, 0, 1],
"genetic_value": [11, 12, 22, 24],
"genetic_value": [11, 12, 22, 24] - np.array([11, 11, 22, 22]) * centre,
}
)

Expand Down
29 changes: 25 additions & 4 deletions tests/test_simulate_phenotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,19 @@ def test_causal_sites_bad_input(self, sample_ts, sample_trait_model):
ts=sample_ts, causal_sites=[1, 5, 1], model=sample_trait_model, h2=-0.1
)

def test_bad_centre_input(self, sample_ts, sample_trait_model):
with pytest.raises(
TypeError,
match="centre must be a <class 'bool'> instance",
):
tstrait.sim_phenotype(
ts=sample_ts,
model=sample_trait_model,
centre="centre",
h2=0.3,
num_causal=5,
)


class TestModel:
@pytest.mark.parametrize(
Expand All @@ -104,7 +117,8 @@ class TestModel:
tstrait.trait_model(distribution="gamma", shape=1, scale=1),
],
)
def test_output(self, sample_ts, trait_model):
@pytest.mark.parametrize("centre", [True, False])
def test_output(self, sample_ts, trait_model, centre):
num_causal = 10
h2 = 0.3
alpha = -0.3
Expand All @@ -116,6 +130,7 @@ def test_output(self, sample_ts, trait_model):
h2=h2,
alpha=alpha,
random_seed=random_seed,
centre=centre,
)
trait_df = tstrait.sim_trait(
ts=sample_ts,
Expand All @@ -124,15 +139,18 @@ def test_output(self, sample_ts, trait_model):
alpha=alpha,
random_seed=random_seed,
)
genetic_df = tstrait.genetic_value(ts=sample_ts, trait_df=trait_df)
genetic_df = tstrait.genetic_value(
ts=sample_ts, trait_df=trait_df, centre=centre
)
phenotype_df = tstrait.sim_env(
genetic_df=genetic_df, h2=h2, random_seed=random_seed
)

pd.testing.assert_frame_equal(result.trait, trait_df)
pd.testing.assert_frame_equal(result.phenotype, phenotype_df)

def test_multivariate(self, sample_ts):
@pytest.mark.parametrize("centre", [True, False])
def test_multivariate(self, sample_ts, centre):
num_causal = 10
h2 = [0.1, 0.8]
alpha = -0.3
Expand All @@ -147,6 +165,7 @@ def test_multivariate(self, sample_ts):
h2=h2,
alpha=alpha,
random_seed=random_seed,
centre=centre,
)
trait_df = tstrait.sim_trait(
ts=sample_ts,
Expand All @@ -155,7 +174,9 @@ def test_multivariate(self, sample_ts):
alpha=alpha,
random_seed=random_seed,
)
genetic_df = tstrait.genetic_value(ts=sample_ts, trait_df=trait_df)
genetic_df = tstrait.genetic_value(
ts=sample_ts, trait_df=trait_df, centre=centre
)
phenotype_df = tstrait.sim_env(
genetic_df=genetic_df, h2=h2, random_seed=random_seed
)
Expand Down
Loading

0 comments on commit 8a782ad

Please sign in to comment.