From 46b87a55fdaf55c9e86d5dbed4c7c6d77e4ffb8f Mon Sep 17 00:00:00 2001 From: "Julio A. Peraza" <52050407+JulioAPeraza@users.noreply.github.com> Date: Tue, 30 Apr 2024 12:09:49 -0400 Subject: [PATCH] Support precomputed correlation matrix for calculating variance inflation term in Stouffers (#121) * Support precomputed correlation matrix for calculating variance inflation term in Stouffers * Update test_results.py * Switch to macos-12 per https://github.com/actions/setup-python/issues/850 * Update test_results.py --- .github/workflows/testing.yml | 2 +- pymare/estimators/combination.py | 28 +++++++++++++++++++------- pymare/tests/test_combination_tests.py | 25 +++++++++++++++++++++++ pymare/tests/test_results.py | 3 ++- 4 files changed, 49 insertions(+), 9 deletions(-) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index c792186..8d9804f 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -38,7 +38,7 @@ jobs: strategy: fail-fast: false matrix: - os: ["ubuntu-latest", "macos-latest"] + os: ["ubuntu-latest", "macos-12"] python-version: ["3.8", "3.9", "3.10", "3.11"] name: ${{ matrix.os }} with Python ${{ matrix.python-version }} diff --git a/pymare/estimators/combination.py b/pymare/estimators/combination.py index 6f9f41a..5fd4fc4 100644 --- a/pymare/estimators/combination.py +++ b/pymare/estimators/combination.py @@ -113,7 +113,7 @@ class StoufferCombinationTest(CombinationTest): # Maps Dataset attributes onto fit() args; see BaseEstimator for details. _dataset_attr_map = {"z": "y", "w": "n", "g": "v"} - def _inflation_term(self, z, w, g): + def _inflation_term(self, z, w, g, corr=None): """Calculate the variance inflation term for each group. This term is used to adjust the variance of the combined z-score when @@ -127,6 +127,8 @@ def _inflation_term(self, z, w, g): Array of weights. g : :obj:`numpy.ndarray` of shape (n, d) Array of group labels. + corr : :obj:`numpy.ndarray` of shape (n, n), optional + The correlation matrix of the z-values. If None, it will be calculated. Returns ------- @@ -157,26 +159,38 @@ def _inflation_term(self, z, w, g): continue # Calculate the within group correlation matrix and sum the non-diagonal elements - corr = np.corrcoef(group_z, rowvar=True) + if corr is None: + if z.shape[1] < 2: + raise ValueError("The number of features must be greater than 1.") + group_corr = np.corrcoef(group_z, rowvar=True) + else: + group_corr = corr[group_indices][:, group_indices] + upper_indices = np.triu_indices(n_samples, k=1) - non_diag_corr = corr[upper_indices] + non_diag_corr = group_corr[upper_indices] w_i, w_j = weights[upper_indices[0]], weights[upper_indices[1]] sigma += (2 * w_i * w_j * non_diag_corr).sum() return sigma - def fit(self, z, w=None, g=None): + def fit(self, z, w=None, g=None, corr=None): """Fit the estimator to z-values, optionally with weights and groups.""" - return super().fit(z, w=w, g=g) + return super().fit(z, w=w, g=g, corr=corr) - def p_value(self, z, w=None, g=None): + def p_value(self, z, w=None, g=None, corr=None): """Calculate p-values.""" if w is None: w = np.ones_like(z) + if g is None and corr is not None: + warnings.warn("Correlation matrix provided without groups. Ignoring.") + + if g is not None and corr is not None and g.shape[0] != corr.shape[0]: + raise ValueError("Group labels must have the same length as the correlation matrix.") + # Calculate the variance inflation term, sum of non-diagonal elements of sigma. - sigma = self._inflation_term(z, w, g) if g is not None else 0 + sigma = self._inflation_term(z, w, g, corr=corr) if g is not None else 0 # The sum of diagonal elements of sigma is given by (w**2).sum(0). variance = (w**2).sum(0) + sigma diff --git a/pymare/tests/test_combination_tests.py b/pymare/tests/test_combination_tests.py index 9f5f7e1..ac6e956 100644 --- a/pymare/tests/test_combination_tests.py +++ b/pymare/tests/test_combination_tests.py @@ -79,3 +79,28 @@ def test_stouffer_adjusted(): sigma_l1 = n_maps_l1 * (n_maps_l1 - 1) # Expected inflation term z_expected_l1 = n_maps_l1 * common_sample / np.sqrt(n_maps_l1 + sigma_l1) assert np.allclose(z_l1, z_expected_l1, atol=1e-5) + + # Test with correlation matrix and groups. + data_corr = data - data.mean(0) + corr = np.corrcoef(data_corr, rowvar=True) + results_corr = ( + StoufferCombinationTest("directed").fit(z=data, w=weights, g=groups, corr=corr).params_ + ) + z_corr = ss.norm.isf(results_corr["p"]) + + z_corr_expected = np.array([5.00088912, 3.70356943, 4.05465924, 5.4633001, 5.18927878]) + assert np.allclose(z_corr, z_corr_expected, atol=1e-5) + + # Test with no correlation matrix and groups, but only one feature. + with pytest.raises(ValueError): + StoufferCombinationTest("directed").fit(z=data[:, :1], w=weights[:, :1], g=groups) + + # Test with correlation matrix and groups of different shapes. + with pytest.raises(ValueError): + StoufferCombinationTest("directed").fit(z=data, w=weights, g=groups, corr=corr[:-2, :-2]) + + # Test with correlation matrix and no groups. + results1 = StoufferCombinationTest("directed").fit(z=_z1, corr=corr).params_ + z1 = ss.norm.isf(results1["p"]) + + assert np.allclose(z1, [4.69574], atol=1e-5) diff --git a/pymare/tests/test_results.py b/pymare/tests/test_results.py index 5b00742..edaa92e 100644 --- a/pymare/tests/test_results.py +++ b/pymare/tests/test_results.py @@ -87,8 +87,9 @@ def test_combination_test_results_from_arrays(dataset): # fit overwrites dataset_ attribute with None assert fitted_estimator.dataset_ is None + # fit_dataset overwrites it with the Dataset - fitted_estimator.fit_dataset(dataset) + fitted_estimator.fit_dataset(Dataset(dataset.y)) assert isinstance(fitted_estimator.dataset_, Dataset) # fit sets it back to None fitted_estimator.fit(z=dataset.y)