mind-inria · lionelkusch · Dec 13, 2024 · Dec 13, 2024 · Dec 13, 2024 · Dec 16, 2024
diff --git a/doc_conf/api.rst b/doc_conf/api.rst
@@ -16,6 +16,7 @@ Functions
    :toctree: generated/
 
    ada_svr
+   ada_svr_pvalue
    aggregate_quantiles
    clustered_inference
    data_simulation

diff --git a/doc_conf/conf.py b/doc_conf/conf.py
@@ -90,7 +90,7 @@
 # built documents.
 #
 # The short X.Y version.
-from hidimstat._version import __version__  # noqa
+from hidimstat import __version__
 
 # The full version, including alpha/beta/rc tags.
 release = __version__
@@ -217,9 +217,9 @@
     "python": ("https://docs.python.org/3", None),
     "numpy": ("https://numpy.org/devdocs", None),
     "scipy": ("https://scipy.github.io/devdocs", None),
-    "matplotlib": ("https://matplotlib.org", None),
+    "matplotlib": ("https://matplotlib.org/stable/", None),
     "sklearn": ("https://scikit-learn.org/stable", None),
-    "numba": ("https://numba.pydata.org/numba-doc/latest", None),
+    "numba": ("https://numba.readthedocs.io/en/stable/", None),
     "joblib": ("https://joblib.readthedocs.io/en/latest", None),
     "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None),
     "seaborn": ("https://seaborn.pydata.org/", None),
@@ -228,7 +228,6 @@
 
 examples_dirs = ["../examples"]
 gallery_dirs = ["auto_examples"]
-import mne
 
 scrapers = ("matplotlib",)
 try:
@@ -240,6 +239,7 @@
     pass
 if any(x in scrapers for x in ("pyvista")):
     from traits.api import push_exception_handler
+    import mne
 
     push_exception_handler(reraise_exceptions=True)
     report_scraper = mne.report._ReportScraper()
@@ -259,6 +259,8 @@
     "abort_on_example_error": False,
     "image_scrapers": scrapers,
     "show_memory": True,
+    'filename_pattern': r'\.py',
+    'ignore_pattern': r'__init__\.py',
     # 'reference_url': {
     #     'numpy': 'http://docs.scipy.org/doc/numpy-1.9.1',
     #     'scipy': 'http://docs.scipy.org/doc/scipy-0.17.0/reference',

diff --git a/doc_conf/index.rst b/doc_conf/index.rst
@@ -46,8 +46,7 @@ is also needed to install ``pytest``.
 Documentation & Examples
 ------------------------
 
-Documentation about the main HiDimStat functions is available
-`here <api.html>`_ and examples are available `here <auto_examples/index.html>`_.
+Documentation of HiDimStat is composed of an `API <api.html>`_ and `examples <auto_examples/index.html>`_.
 
 As of now, there are three different examples (Python scripts) that
 illustrate how to use the main HiDimStat functions.
@@ -118,15 +117,15 @@ Application to source localization (MEG/EEG data):
 
 Single/Group statistically validated importance using conditional permutations:
 
-* Chamma, A., Thirion, B., & Engemann, D. (2024). **Variable importance in
-high-dimensional settings requires grouping**. In Proceedings of the 38th
-Conference of the Association for the Advancement of Artificial
-Intelligence(AAAI 2024), Vancouver, Canada.
+* Chamma, A., Thirion, B., & Engemann, D. (2024). Variable importance in
+  high-dimensional settings requires grouping. In Proceedings of the 38th
+  Conference of the Association for the Advancement of Artificial
+  Intelligence(AAAI 2024), Vancouver, Canada.
 
-* Chamma, A., Engemann, D., & Thirion, B. (2023). **Statistically Valid Variable
-Importance Assessment through Conditional Permutations**. In Proceedings of the
-37th Conference on Neural Information Processing Systems (NeurIPS 2023), New
-Orleans, USA.
+* Chamma, A., Engemann, D., & Thirion, B. (2023). Statistically Valid Variable
+  Importance Assessment through Conditional Permutations. In Proceedings of the
+  37th Conference on Neural Information Processing Systems (NeurIPS 2023), New
+  Orleans, USA.
 
 If you use our packages, we would appreciate citations to the relevant
 aforementioned papers.

diff --git a/doc_conf/references.bib b/doc_conf/references.bib
@@ -177,4 +177,28 @@ @article{liuFastPowerfulConditional2021
   archiveprefix = {arxiv},
   keywords = {Statistics - Methodology},
   file = {/home/ahmad/Zotero/storage/8HRQZX3H/Liu et al. - 2021 - Fast and Powerful Conditional Randomization Testin.pdf;/home/ahmad/Zotero/storage/YFNDKN2B/2006.html}
-}
+}
+
+@article{gaonkar_deriving_2012,
+	title = {Deriving statistical significance maps for {SVM} based image classification and group comparisons},
+	volume = {15},
+	url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3703958/},
+	abstract = {Population based pattern analysis and classification for quantifying structural and functional differences between diverse groups has been shown to be a powerful tool for the study of a number of diseases, and is quite commonly used especially in neuroimaging. The alternative to these pattern analysis methods, namely mass univariate methods such as voxel based analysis and all related methods, cannot detect multivariate patterns associated with group differences, and are not particularly suitable for developing individual-based diagnostic and prognostic biomarkers. A commonly used pattern analysis tool is the support vector machine ({SVM}). Unlike univariate statistical frameworks for morphometry, analytical tools for statistical inference are unavailable for the {SVM}. In this paper, we show that null distributions ordinarily obtained by permutation tests using {SVMs} can be analytically approximated from the data. The analytical computation takes a small fraction of the time it takes to do an actual permutation test, thereby rendering it possible to quickly create statistical significance maps derived from {SVMs}. Such maps are critical for understanding imaging patterns of group differences and interpreting which anatomical regions are important in determining the classifier's decision.},
+	pages = {723--730},
+	number = {0},
+	journaltitle = {Medical image computing and computer-assisted intervention : {MICCAI} ... International Conference on Medical Image Computing and Computer-Assisted Intervention},
+	journal = {Med Image Comput Comput Assist Interv},
+	author = {Gaonkar, Bilwaj and Davatzikos, Christos},
+	urldate = {2024-12-16},
+	year = {2012},
+	pmid = {23285616},
+	pmcid = {PMC3703958},
+	file = {PubMed Central Full Text PDF:/home/likusch/Zotero/storage/DX8QQAF5/Gaonkar and Davatzikos - 2012 - Deriving statistical significance maps for SVM based image classification and group comparisons.pdf:application/pdf},
+}
+
+@book{molnar2020interpretable,
+  title={Interpretable machine learning},
+  author={Molnar, Christoph},
+  year={2020},
+  publisher={Lulu. com}
+}
diff --git a/examples/README.txt b/examples/README.txt
@@ -5,4 +5,4 @@ Examples Gallery
 
 .. contents:: Contents
    :local:
-   :depth: 3
+   :depth: 0
diff --git a/examples/inference_model/README.txt b/examples/inference_model/README.txt
@@ -0,0 +1,7 @@
+Description of the methods of the package
+=========================================
+The package contains the following methods:
+
+.. contents:: Contents
+   :local:
+   :depth: 0
diff --git a/examples/inference_model/ada_svr.py b/examples/inference_model/ada_svr.py
@@ -0,0 +1,191 @@
+"""
+ADA-SVR: Adaptive Permutation Threshold Support Vector Regression
+==================================================================
+Statistical inference procedure presented in :footcite:t:`gaonkar_deriving_2012`.
+"""
+
+#############################################################################
+# Imports needed for this script
+# ------------------------------
+import matplotlib.pyplot as plt
+import numpy as np
+from hidimstat.ada_svr import ada_svr, ada_svr_pvalue
+from hidimstat.permutation_test import permutation_test
+from sklearn.svm import SVR
+from hidimstat.scenario import multivariate_1D_simulation
+from hidimstat.visualisation.plot_dataset import (
+    plot_dataset1D,
+    plot_validate_variable_importance,
+    plot_pvalue_H0,
+    plot_pvalue_H1,
+    plot_compare_proba_estimated,
+)
+
+#############################################################################
+# Generate toy dataset
+# --------------------
+#
+
+# Parameters for the generation of data
+n_samples, n_features = 20, 100
+support_size = 1
+
+X, y, beta, _ = multivariate_1D_simulation(
+    n_samples=n_samples,
+    n_features=n_features,
+    support_size=support_size,
+    sigma=0.1,
+    shuffle=False,
+    seed=42,
+)
+plot_dataset1D(X=X, y=y, beta=beta)
+#############################################################################
+# Usage the methods
+# -----------------
+# see the API for more details about the optional parameter:
+# :py:func:`hidimstat.ada_svr`
+
+beta_hat, scale = ada_svr(X, y)
+
+#############################################################################
+# | **beta_hat** is the estimated variable of importance
+# | **scale** is the standard deviation of the distribution of the coefficients
+
+#############################################################################
+# Plot the results
+# ----------------
+#
+plot_validate_variable_importance(beta, beta_hat)
+
+#############################################################################
+# The result shows that the only variable of importance has a higher score
+# but the difference between beta and beta_hat is quite different.
+
+#############################################################################
+#
+# The pvalue and the corrected pvalue can help to be confident in the previous
+# results.
+
+
+pvalue, pvalue_corrected, one_minus_pvalue, one_minus_pvalue_correlation = (
+    ada_svr_pvalue(beta_hat, scale)
+)
+plot_pvalue_H0(
+    beta_hat, pvalue, pvalue_corrected, one_minus_pvalue, one_minus_pvalue_correlation
+)
+
+#############################################################################
+# The pvalue and the corrected pvalue show that the confidence in the variable
+# of importance is high. The pvalue and the corrected pvalue are close to one.
+# We can conclude that the variable of importance is estimated in this case.
+
+plot_pvalue_H1(
+    beta_hat, pvalue, pvalue_corrected, one_minus_pvalue, one_minus_pvalue_correlation
+)
+#############################################################################
+# The results for the alternative hipothese shows that the confidence for not
+# important variables is high. The 1-pvalues are not significant. However, the
+# corrected 1-pvalue is close to one. We can conclude that the not importance
+# variables are estimated in this case.
+
+#############################################################################
+#
+# Principle of the methods
+# ------------------------
+# The ADA-SVR method is a statistical inference procedure that estimates the
+# variable of importance of a Support Vector Regression (SVR).
+# The method is a simplification of the permutation test for SVR
+# (see :py:func:`hidimstat.permutation_test`).
+# The principle is to shuffle the target variable and to estimate the variable
+# of importance with the SVR in order to estimate the distribution of the
+# coefficients of the SVR. For ADA-SVR, the distribution of the coefficients of
+# the SVR is assumed to be a normal distribution centred around zeros. ADA-SVR
+# uses the central limit theorem to estimate the
+# standard deviation of this normal distribution for each coefficient
+# (for details see figure 1 of ::footcite:ct:`Gaonkar et al. 2012 <gaonkar_deriving_2012>`).
+#
+
+#############################################################################
+# Comparison with the permutation test for SVR for validating the approach
+# ------------------------------------------------------------------------
+#
+estimator = SVR(kernel="linear", epsilon=0.0, gamma="scale", C=1.0)
+estimator.fit(X, y)
+beta_hat_svr = estimator.coef_
+
+# compare that the coefficiants are the same that the one of SVR
+assert np.max(np.abs(beta_hat - beta_hat_svr.T[:, 0])) < 1e-4
+
+#############################################################################
+# This coefficient of SVR is an estimation of the importance of variables.
+# For estimating the confidence interval of the importance of variables,
+# ADA-SVR uses propose to estimate the distribution of the coefficients
+# of SVR, instead of using the classical permutation test for this estimation.
+
+#############################################################################
+# Estimation of the distribution of the coefficients of SVR
+# ---------------------------------------------------------
+#
+
+proba = permutation_test(
+    X, y, estimator=estimator, n_permutations=10000, n_jobs=8, seed=42, proba=True
+)
+plot_compare_proba_estimated(proba, beta_hat, scale)
+
+#############################################################################
+# **Compare the distribution of the coefficients of SVR with the
+# estimation of the distribution of the coefficients by ADA-SVR**
+print(
+    "ADA-SVR assumes that the normal distribution of the coefficient is",
+    "center around zeros.\n",
+    "Our estimation is that the maximum deviation is: {:.4f}\n".format(
+        np.max(np.abs(np.mean(proba, axis=0)))
+    ),
+    "ADA-SVR provides that the standard deviation the normal distribution",
+    "for each coefficients.\n",
+    "The maximum of difference between AdA-SVR estimation and our estimation",
+    "is:{:.4f}".format(np.max(np.abs(scale - np.std(proba, axis=0)) / scale)),
+)
+
+#############################################################################
+# Assumptions, Advantages and Disadvantages
+# -----------------------------------------
+#
+# **Assumptions**:
+#
+# - The distribution of the coefficients of SVR is normal centred around zeros.
+# - The method is valid for large sample sizes.
+# - The method has the linear models assumptions: linearity, normality, 
+#   homoscedasticity, independence, fixed features, absence of multicollinearity
+#   (see the book of :footcite:ct:`Molnar 2012<molnar2020interpretable>`
+#   for details)
+#
+# **Advantages**:
+# 
+# - The method is fast because it uses the central limit theorem to estimate
+#   the standard deviation of the distribution of the coefficients of SVR.
+# - The method has the advantage of linear models: transparency of 
+#   the prediction, high level of collective experiance and expertise 
+#   and a guarantee of convergence. (see the book of 
+#   :footcite:ct:`Molnar 2012<molnar2020interpretable>` for details)
+#
+# **Disadvantages**:
+#
+# - The method assumes that the distribution of the coefficients of SVR is normal centred around zeros.
+# - The method is not valid for small sample sizes.
+# - The method has all the disadvantages of linear models: only for linear 
+#   relationships, not good predicting performance, unintuitive. 
+#   (see the book of 
+#   :footcite:ct:`Molnar 2012<molnar2020interpretable>` for details)
+#
+# **Conclusion**:
+#
+# The method is a good alternative to the permutation test for SVR when the
+# distribution of the coefficients of SVR is normal centred around zeros.
+# The method is a simplification of the permutation test for SVR.
+
+
+#############################################################################
+# References
+# ----------
+# .. footbibliography::
diff --git a/examples/plot_diabetes_variable_importance_example.py b/examples/plot_diabetes_variable_importance_example.py
@@ -61,13 +61,13 @@
 
 #############################################################################
 # Load the diabetes dataset
-# ------------------------------
+# -------------------------
 diabetes = load_diabetes()
 X, y = diabetes.data, diabetes.target
 
 #############################################################################
 # Fit a baseline model on the diabetes dataset
-# ------------------------------
+# --------------------------------------------
 # We use a Ridge regression model with a 10-fold cross-validation to fit the
 # diabetes dataset.
 
@@ -88,7 +88,7 @@
     print(f"Fold {i}: {mse}")
 #############################################################################
 # Fit a baselien model on the diabetes dataset
-# ------------------------------
+# --------------------------------------------
 # We use a Ridge regression model with a 10-fold cross-validation to fit the
 # diabetes dataset.
 
@@ -110,7 +110,7 @@
 
 #############################################################################
 # Measure the importance of variables using the CPI method
-# ------------------------------
+# --------------------------------------------------------
 
 cpi_importance_list = []
 for i, (train_index, test_index) in enumerate(kf.split(X)):
@@ -131,7 +131,7 @@
 
 #############################################################################
 # Measure the importance of variables using the LOCO method
-# ------------------------------
+# ---------------------------------------------------------
 
 loco_importance_list = []
 
@@ -151,7 +151,7 @@
 
 #############################################################################
 # Measure the importance of variables using the permutation method
-# ------------------------------
+# ----------------------------------------------------------------
 
 pi_importance_list = []
 
@@ -172,7 +172,7 @@
 
 #############################################################################
 # Define a function to compute the p-value from importance values
-# ------------------------------
+# ---------------------------------------------------------------
 def compute_pval(vim):
     mean_vim = np.mean(vim, axis=0)
     std_vim = np.std(vim, axis=0)
@@ -182,7 +182,7 @@ def compute_pval(vim):
 
 #############################################################################
 # Analyze the results
-# ------------------------------
+# -------------------
 
 
 cpi_vim_arr = np.array([x["importance"] for x in cpi_importance_list]) / 2