From d88dc7eb77cb0a831d87f10d21d37c2887811e09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20Hennh=C3=B6fer?= Date: Mon, 30 Dec 2024 12:17:16 +0100 Subject: [PATCH] Minor Refactoring Updated README.md --- README.md | 71 ++++++++++-------- examples/abod.py | 4 +- examples/autoencoder.py | 4 +- examples/cd.py | 4 +- examples/copod.py | 4 +- examples/dif.py | 4 +- examples/ecod.py | 4 +- examples/gmm.py | 11 +-- examples/hbos.py | 4 +- examples/iforest.py | 12 ++- examples/inne.py | 4 +- examples/kde.py | 4 +- examples/knn.py | 4 +- examples/knn_mahalanobis.py | 4 +- examples/kpca.py | 4 +- examples/lmdd.py | 4 +- examples/loci.py | 4 +- examples/loda.py | 4 +- examples/lof.py | 4 +- examples/lscp.py | 4 +- examples/lunar.py | 4 +- examples/mad.py | 4 +- examples/mcd.py | 4 +- examples/ocsvm.py | 4 +- examples/pca.py | 4 +- examples/qmcd.py | 4 +- examples/rod.py | 4 +- examples/sod.py | 4 +- pyproject.toml | 2 +- tests/test_bootstrap.py | 6 +- tests/test_cross_val.py | 6 +- tests/test_jackknife.py | 7 +- tests/test_split.py | 5 +- tests/unit/test_utils_aggregation.py | 2 +- unquad/{utils => }/data/__init__.py | 0 unquad/{utils => }/data/loader.py | 2 +- .../data/parquet/breast/breast.parquet | Bin .../data/parquet/fraud/fraud.parquet | Bin .../parquet/ionosphere/ionosphere.parquet | Bin .../parquet/mammography/mammography.parquet | Bin .../data/parquet/musk/musk.parquet | Bin .../data/parquet/shuttle/shuttle.parquet | Bin .../data/parquet/thyroid/thyroid.parquet | Bin .../{utils => }/data/parquet/wbc/wbc.parquet | Bin unquad/estimator/configuration.py | 6 +- unquad/estimator/detector.py | 2 +- unquad/estimator/parameter.py | 2 +- unquad/utils/aggregation.py | 4 +- .../performance.py => decorator.py} | 0 unquad/utils/decorator/__init__.py | 0 unquad/utils/enums/__init__.py | 0 unquad/utils/enums/adjustment.py | 21 ------ unquad/utils/enums/aggregation.py | 23 ------ unquad/utils/enums/dataset.py | 30 -------- unquad/utils/enums/strategy.py | 28 ------- .../{error/forbidden_model.py => error.py} | 0 unquad/utils/error/__init__.py | 0 unquad/utils/metrics.py | 2 +- unquad/utils/multiplicity.py | 12 +-- unquad/utils/statistical.py | 2 +- 60 files changed, 128 insertions(+), 228 deletions(-) rename unquad/{utils => }/data/__init__.py (100%) rename unquad/{utils => }/data/loader.py (98%) rename unquad/{utils => }/data/parquet/breast/breast.parquet (100%) rename unquad/{utils => }/data/parquet/fraud/fraud.parquet (100%) rename unquad/{utils => }/data/parquet/ionosphere/ionosphere.parquet (100%) rename unquad/{utils => }/data/parquet/mammography/mammography.parquet (100%) rename unquad/{utils => }/data/parquet/musk/musk.parquet (100%) rename unquad/{utils => }/data/parquet/shuttle/shuttle.parquet (100%) rename unquad/{utils => }/data/parquet/thyroid/thyroid.parquet (100%) rename unquad/{utils => }/data/parquet/wbc/wbc.parquet (100%) rename unquad/utils/{decorator/performance.py => decorator.py} (100%) delete mode 100644 unquad/utils/decorator/__init__.py delete mode 100644 unquad/utils/enums/__init__.py delete mode 100644 unquad/utils/enums/adjustment.py delete mode 100644 unquad/utils/enums/aggregation.py delete mode 100644 unquad/utils/enums/dataset.py delete mode 100644 unquad/utils/enums/strategy.py rename unquad/utils/{error/forbidden_model.py => error.py} (100%) delete mode 100644 unquad/utils/error/__init__.py diff --git a/README.md b/README.md index e87a471..e2aa3cf 100644 --- a/README.md +++ b/README.md @@ -3,36 +3,35 @@ [![License](https://img.shields.io/badge/License-BSD_3--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/unquad) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) -**unquad** is a wrapper applicable for most [*PyOD*](https://pyod.readthedocs.io/en/latest/) detectors (see [Supported Estimators](#supported-estimators)) for +**unquad** is a wrapper applicable for most [*PyOD*](https://pyod.readthedocs.io/en/latest/) detectors (see [Supported Estimators](#supported-estimators)) enabling **uncertainty-quantified anomaly detection** based on one-class classification and the principles of **conformal inference**. ```sh pip install unquad ``` +Mind the **optional dependencies** for using deep learning models or the built-in datasets (see. [pyproject.toml](https://github.com/OliverHennhoefer/unquad/blob/main/pyproject.toml)). + ## What is *Conformal Anomaly Detection*? -[*Conformal Anomaly Detection*](https://www.diva-portal.org/smash/get/diva2:690997/FULLTEXT02.pdf) (CAD) is based on the -model-agnostic and non-parametric framework of [*conformal prediction*](https://en.wikipedia.org/wiki/Conformal_prediction#:~:text=Conformal%20prediction%20(CP)%20is%20a,assuming%20exchangeability%20of%20the%20data.) (CP). -While CP aims to produce statistically valid prediction regions (*prediction intervals* or *prediction sets*) for any -given point predictor or classifier, CAD aims to control statistical metrics, like the [*false discovery rate*](https://en.wikipedia.org/wiki/False_discovery_rate), -for a given anomaly detector suitable for one-class classification – without overly compromising on its -[*statistical power*](https://en.wikipedia.org/wiki/Power_of_a_test). +[![start with why](https://img.shields.io/badge/start%20with-why%3F-brightgreen.svg?style=flat)](https://www.diva-portal.org/smash/get/diva2:690997/FULLTEXT02.pdf) -In essence, CAD translates anomaly scores into statistical p-values by comparing anomaly scores observed on test data to a retained set of calibration -scores as previously obtained for normal data during the model training stage. -The larger the discrepancy between *normal* scores and observed test scores, the lower the obtained (and **statistically valid**) p-value. -The p-values, instead of the usual anomaly estimates, allow, e.g., for FDR control by statistical procedures like *Benjamini-Hochberg*. +[*Conformal Anomaly Detection*](https://www.diva-portal.org/smash/get/diva2:690997/FULLTEXT02.pdf) applies the principles of conformal inference ([*conformal prediction*](https://en.wikipedia.org/wiki/Conformal_prediction#:~:text=Conformal%20prediction%20(CP)%20is%20a,assuming%20exchangeability%20of%20the%20data.)) to anomaly detection. +*Conformal Anomaly Detection* focuses on controlling error metrics like the [*false discovery rate*](https://en.wikipedia.org/wiki/False_discovery_rate), while maintaining [*statistical power*](https://en.wikipedia.org/wiki/Power_of_a_test). +CAD converts anomaly scores to _p_-values by comparing test data scores against calibration scores from normal training data. +The resulting _p_-value of the test score(s) is computed as the normalized rank among the calibration scores. +These **statistically valid** _p_-values enable error control through methods like *Benjamini-Hochberg*, replacing traditional anomaly estimates that lack any kind of statistical guarantee. ### Usage: Split-Conformal (Inductive Approach) +Using the default behavior of `ConformalDetector()` with default `DetectorConfig()`. + ```python from pyod.models.gmm import GMM -from unquad.utils.data.loader import DataLoader -from unquad.utils.enums.dataset import Dataset -from unquad.estimator.configuration import DetectorConfig +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.detector import ConformalDetector from unquad.strategy.split import SplitConformal from unquad.utils.metrics import false_discovery_rate, statistical_power @@ -42,8 +41,7 @@ x_train, x_test, y_test = dl.get_example_setup(random_state=1) ce = ConformalDetector( detector=GMM(), - strategy=SplitConformal(calib_size=1_000), - config=DetectorConfig(alpha=0.05), + strategy=SplitConformal(calib_size=1_000) ) ce.fit(x_train) @@ -54,23 +52,38 @@ print(f"Empirical Power: {statistical_power(y=y_test, y_hat=estimates)}") ``` Output: +```text +Empirical FDR: 0.108 +Empirical Power: 0.892 +``` + +The behavior can be customized by changing the `DetectorConfig()`: + ```python -Empirical FDR: 0.03 -Empirical Power: 0.97 +@dataclass +class DetectorConfig: + alpha: float = 0.2 # Nominal FDR value + adjustment: Adjustment = Adjustment.BH # Multiple Testing Procedure + aggregation: Aggregation = Aggregation.MEDIAN # Score Aggregation (if necessary) + seed: int = 1 + silent: bool = True ``` ### Usage: Bootstrap-after-Jackknife+ (JaB+) +Using `ConformalDetector()` with customized `DetectorConfig()`. +The `BootstrapConformal()` strategy allows to set 2 of the 3 parameters `resampling_ratio`, `n_boostraps` and `n_calib`. +For either combination, the remaining parameter will be filled automatically. This allows exact control of the +calibration procedure when using a bootstrap strategy. + ```python from pyod.models.iforest import IForest -from unquad.utils.data.loader import DataLoader -from unquad.utils.enums.dataset import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.configuration import DetectorConfig from unquad.estimator.detector import ConformalDetector from unquad.strategy.bootstrap import BootstrapConformal -from unquad.utils.enums.aggregation import Aggregation -from unquad.utils.enums.adjustment import Adjustment +from unquad.utils.enums import Aggregation, Adjustment, Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power dl = DataLoader(dataset=Dataset.SHUTTLE) @@ -79,9 +92,7 @@ x_train, x_test, y_test = dl.get_example_setup(random_state=1) ce = ConformalDetector( detector=IForest(behaviour="new"), strategy=BootstrapConformal(resampling_ratio=0.99, n_bootstraps=20, plus=True), - config=DetectorConfig(alpha=0.1, - adjustment=Adjustment.BENJAMINI_HOCHBERG, - aggregation=Aggregation.MEAN), + config=DetectorConfig(alpha=0.1, adjustment=Adjustment.BY, aggregation=Aggregation.MEAN), ) ce.fit(x_train) @@ -92,15 +103,15 @@ print(f"Empirical Power: {statistical_power(y=y_test, y_hat=estimates)}") ``` Output: -```python -Empirical FDR: 0.067 -Empirical Power: 0.933 +```text +Empirical FDR: 0.0 +Empirical Power: 1.0 ``` ### Supported Estimators -The package currently supports anomaly estimators that are suitable for unsupervised one-class classification. As respective -detectors are therefore exclusively fitted on *normal* (or *non-anomalous*) data, parameters like *threshold* are therefore internally +The package only supports anomaly estimators that are suitable for unsupervised one-class classification. As respective +detectors are therefore exclusively fitted on *normal* (or *non-anomalous*) data, parameters like *threshold* are internally set to the smallest possible values. Models that are **currently supported** include: diff --git a/examples/abod.py b/examples/abod.py index d6b9be1..2186f34 100644 --- a/examples/abod.py +++ b/examples/abod.py @@ -1,9 +1,9 @@ from pyod.models.abod import ABOD -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.detector import ConformalDetector from unquad.strategy.cross_val import CrossValidationConformal -from unquad.utils.enums.dataset import Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": diff --git a/examples/autoencoder.py b/examples/autoencoder.py index bd1c0f8..0e77d82 100644 --- a/examples/autoencoder.py +++ b/examples/autoencoder.py @@ -1,10 +1,10 @@ from pyod.models.auto_encoder_torch import AutoEncoder -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.configuration import DetectorConfig from unquad.estimator.detector import ConformalDetector from unquad.strategy.split import SplitConformal -from unquad.utils.enums.dataset import Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": diff --git a/examples/cd.py b/examples/cd.py index 8fd308e..4c37019 100644 --- a/examples/cd.py +++ b/examples/cd.py @@ -1,9 +1,9 @@ from pyod.models.cd import CD -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.detector import ConformalDetector from unquad.strategy.cross_val import CrossValidationConformal -from unquad.utils.enums.dataset import Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": diff --git a/examples/copod.py b/examples/copod.py index d2f9323..bd048a2 100644 --- a/examples/copod.py +++ b/examples/copod.py @@ -1,9 +1,9 @@ from pyod.models.copod import COPOD -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.detector import ConformalDetector from unquad.strategy.jackknife import JackknifeConformal -from unquad.utils.enums.dataset import Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": diff --git a/examples/dif.py b/examples/dif.py index 5c150a2..bf4f17c 100644 --- a/examples/dif.py +++ b/examples/dif.py @@ -1,9 +1,9 @@ from pyod.models.dif import DIF -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.detector import ConformalDetector from unquad.strategy.bootstrap import BootstrapConformal -from unquad.utils.enums.dataset import Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": diff --git a/examples/ecod.py b/examples/ecod.py index 25567c7..6690170 100644 --- a/examples/ecod.py +++ b/examples/ecod.py @@ -1,9 +1,9 @@ from pyod.models.ecod import ECOD -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.detector import ConformalDetector from unquad.strategy.jackknife import JackknifeConformal -from unquad.utils.enums.dataset import Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": diff --git a/examples/gmm.py b/examples/gmm.py index 2e36e4d..5596be5 100644 --- a/examples/gmm.py +++ b/examples/gmm.py @@ -1,8 +1,7 @@ from pyod.models.gmm import GMM -from unquad.utils.data.loader import DataLoader -from unquad.utils.enums.dataset import Dataset -from unquad.estimator.configuration import DetectorConfig +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.detector import ConformalDetector from unquad.strategy.split import SplitConformal from unquad.utils.metrics import false_discovery_rate, statistical_power @@ -11,11 +10,7 @@ dl = DataLoader(dataset=Dataset.SHUTTLE) x_train, x_test, y_test = dl.get_example_setup(random_state=1) - ce = ConformalDetector( - detector=GMM(), - strategy=SplitConformal(calib_size=1_000), - config=DetectorConfig(alpha=0.05), - ) + ce = ConformalDetector(detector=GMM(), strategy=SplitConformal(calib_size=1_000)) ce.fit(x_train) estimates = ce.predict(x_test) diff --git a/examples/hbos.py b/examples/hbos.py index b700d78..43b4919 100644 --- a/examples/hbos.py +++ b/examples/hbos.py @@ -1,9 +1,9 @@ from pyod.models.hbos import HBOS -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.detector import ConformalDetector from unquad.strategy.cross_val import CrossValidationConformal -from unquad.utils.enums.dataset import Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": diff --git a/examples/iforest.py b/examples/iforest.py index e483295..1a2f656 100644 --- a/examples/iforest.py +++ b/examples/iforest.py @@ -1,12 +1,12 @@ from pyod.models.iforest import IForest -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.configuration import DetectorConfig from unquad.estimator.detector import ConformalDetector from unquad.strategy.bootstrap import BootstrapConformal -from unquad.utils.enums.aggregation import Aggregation -from unquad.utils.enums.adjustment import Adjustment -from unquad.utils.enums.dataset import Dataset +from unquad.utils.enums import Aggregation +from unquad.utils.enums import Adjustment from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": @@ -17,9 +17,7 @@ detector=IForest(behaviour="new"), strategy=BootstrapConformal(resampling_ratio=0.99, n_bootstraps=20, plus=True), config=DetectorConfig( - alpha=0.1, - adjustment=Adjustment.BENJAMINI_HOCHBERG, - aggregation=Aggregation.MEAN, + alpha=0.1, adjustment=Adjustment.BY, aggregation=Aggregation.MEAN ), ) diff --git a/examples/inne.py b/examples/inne.py index 19fe0f3..bd20a91 100644 --- a/examples/inne.py +++ b/examples/inne.py @@ -1,9 +1,9 @@ from pyod.models.inne import INNE -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.detector import ConformalDetector from unquad.strategy.bootstrap import BootstrapConformal -from unquad.utils.enums.dataset import Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": diff --git a/examples/kde.py b/examples/kde.py index ad72651..c1d1a90 100644 --- a/examples/kde.py +++ b/examples/kde.py @@ -1,10 +1,10 @@ from pyod.models.kde import KDE -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.configuration import DetectorConfig from unquad.estimator.detector import ConformalDetector from unquad.strategy.split import SplitConformal -from unquad.utils.enums.dataset import Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": diff --git a/examples/knn.py b/examples/knn.py index 01bcf6a..25ab45b 100644 --- a/examples/knn.py +++ b/examples/knn.py @@ -1,10 +1,10 @@ from pyod.models.knn import KNN -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.configuration import DetectorConfig from unquad.estimator.detector import ConformalDetector from unquad.strategy.split import SplitConformal -from unquad.utils.enums.dataset import Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": diff --git a/examples/knn_mahalanobis.py b/examples/knn_mahalanobis.py index a590dc9..1a406de 100644 --- a/examples/knn_mahalanobis.py +++ b/examples/knn_mahalanobis.py @@ -2,11 +2,11 @@ from pyod.models.knn import KNN -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.configuration import DetectorConfig from unquad.estimator.detector import ConformalDetector from unquad.strategy.bootstrap import BootstrapConformal -from unquad.utils.enums.dataset import Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": diff --git a/examples/kpca.py b/examples/kpca.py index c615610..986d62f 100644 --- a/examples/kpca.py +++ b/examples/kpca.py @@ -1,9 +1,9 @@ from pyod.models.kpca import KPCA -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.detector import ConformalDetector from unquad.strategy.cross_val import CrossValidationConformal -from unquad.utils.enums.dataset import Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": diff --git a/examples/lmdd.py b/examples/lmdd.py index 6245c72..036ef7a 100644 --- a/examples/lmdd.py +++ b/examples/lmdd.py @@ -1,10 +1,10 @@ from pyod.models.lmdd import LMDD -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.configuration import DetectorConfig from unquad.estimator.detector import ConformalDetector from unquad.strategy.split import SplitConformal -from unquad.utils.enums.dataset import Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": diff --git a/examples/loci.py b/examples/loci.py index 7ec4fbf..f6ca52f 100644 --- a/examples/loci.py +++ b/examples/loci.py @@ -1,10 +1,10 @@ from pyod.models.loci import LOCI -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.configuration import DetectorConfig from unquad.estimator.detector import ConformalDetector from unquad.strategy.split import SplitConformal -from unquad.utils.enums.dataset import Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": diff --git a/examples/loda.py b/examples/loda.py index 6a75fef..8f1717a 100644 --- a/examples/loda.py +++ b/examples/loda.py @@ -1,9 +1,9 @@ from pyod.models.loda import LODA -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.detector import ConformalDetector from unquad.strategy.cross_val import CrossValidationConformal -from unquad.utils.enums.dataset import Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": diff --git a/examples/lof.py b/examples/lof.py index 7f9ac2b..82b9864 100644 --- a/examples/lof.py +++ b/examples/lof.py @@ -1,9 +1,9 @@ from pyod.models.lof import LOF -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.detector import ConformalDetector from unquad.strategy.jackknife import JackknifeConformal -from unquad.utils.enums.dataset import Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": diff --git a/examples/lscp.py b/examples/lscp.py index 48004cd..1fbbe00 100644 --- a/examples/lscp.py +++ b/examples/lscp.py @@ -1,10 +1,10 @@ from pyod.models.lscp import LSCP from pyod.models.pca import PCA -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.detector import ConformalDetector from unquad.strategy.cross_val import CrossValidationConformal -from unquad.utils.enums.dataset import Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": diff --git a/examples/lunar.py b/examples/lunar.py index 6aae740..84779a4 100644 --- a/examples/lunar.py +++ b/examples/lunar.py @@ -1,10 +1,10 @@ from pyod.models.lunar import LUNAR -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.configuration import DetectorConfig from unquad.estimator.detector import ConformalDetector from unquad.strategy.cross_val import CrossValidationConformal -from unquad.utils.enums.dataset import Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": diff --git a/examples/mad.py b/examples/mad.py index 0ac000d..63ea03c 100644 --- a/examples/mad.py +++ b/examples/mad.py @@ -1,10 +1,10 @@ from pyod.models.mad import MAD -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.configuration import DetectorConfig from unquad.estimator.detector import ConformalDetector from unquad.strategy.cross_val import CrossValidationConformal -from unquad.utils.enums.dataset import Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": diff --git a/examples/mcd.py b/examples/mcd.py index 557ea8a..3b6db14 100644 --- a/examples/mcd.py +++ b/examples/mcd.py @@ -1,10 +1,10 @@ from pyod.models.mcd import MCD -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.configuration import DetectorConfig from unquad.estimator.detector import ConformalDetector from unquad.strategy.bootstrap import BootstrapConformal -from unquad.utils.enums.dataset import Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": diff --git a/examples/ocsvm.py b/examples/ocsvm.py index 950fb67..256c7ed 100644 --- a/examples/ocsvm.py +++ b/examples/ocsvm.py @@ -1,10 +1,10 @@ from pyod.models.ocsvm import OCSVM -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.configuration import DetectorConfig from unquad.estimator.detector import ConformalDetector from unquad.strategy.jackknife import JackknifeConformal -from unquad.utils.enums.dataset import Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": diff --git a/examples/pca.py b/examples/pca.py index 5e585b3..d78576b 100644 --- a/examples/pca.py +++ b/examples/pca.py @@ -1,10 +1,10 @@ from pyod.models.pca import PCA -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.configuration import DetectorConfig from unquad.estimator.detector import ConformalDetector from unquad.strategy.jackknife import JackknifeConformal -from unquad.utils.enums.dataset import Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": diff --git a/examples/qmcd.py b/examples/qmcd.py index 09e9497..37434e6 100644 --- a/examples/qmcd.py +++ b/examples/qmcd.py @@ -1,9 +1,9 @@ from pyod.models.qmcd import QMCD -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.detector import ConformalDetector from unquad.strategy.cross_val import CrossValidationConformal -from unquad.utils.enums.dataset import Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": diff --git a/examples/rod.py b/examples/rod.py index 5d49234..07fa053 100644 --- a/examples/rod.py +++ b/examples/rod.py @@ -1,10 +1,10 @@ from pyod.models.rod import ROD -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.configuration import DetectorConfig from unquad.estimator.detector import ConformalDetector from unquad.strategy.bootstrap import BootstrapConformal -from unquad.utils.enums.dataset import Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": diff --git a/examples/sod.py b/examples/sod.py index ab96bd7..48a4d73 100644 --- a/examples/sod.py +++ b/examples/sod.py @@ -1,10 +1,10 @@ from pyod.models.sod import SOD -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.configuration import DetectorConfig from unquad.estimator.detector import ConformalDetector from unquad.strategy.cross_val import CrossValidationConformal -from unquad.utils.enums.dataset import Dataset from unquad.utils.metrics import false_discovery_rate, statistical_power if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml index 5c96196..5e56ac5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "unquad" -version = "0.1.4" +version = "0.1.5" description = "Conformal Anomaly Detection" authors = [ { name = "Oliver Hennhoefer", email = "oliver.hennhoefer@mail.de" }, diff --git a/tests/test_bootstrap.py b/tests/test_bootstrap.py index ce726b9..ed4e44d 100644 --- a/tests/test_bootstrap.py +++ b/tests/test_bootstrap.py @@ -2,10 +2,10 @@ from pyod.models.iforest import IForest -from unquad.strategy.bootstrap import BootstrapConformal -from unquad.utils.enums.dataset import Dataset -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.detector import ConformalDetector +from unquad.strategy.bootstrap import BootstrapConformal from unquad.utils.metrics import false_discovery_rate, statistical_power diff --git a/tests/test_cross_val.py b/tests/test_cross_val.py index f25e9af..89badb3 100644 --- a/tests/test_cross_val.py +++ b/tests/test_cross_val.py @@ -2,10 +2,10 @@ from pyod.models.iforest import IForest -from unquad.strategy.cross_val import CrossValidationConformal -from unquad.utils.enums.dataset import Dataset -from unquad.utils.data.loader import DataLoader +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.detector import ConformalDetector +from unquad.strategy.cross_val import CrossValidationConformal from unquad.utils.metrics import false_discovery_rate, statistical_power diff --git a/tests/test_jackknife.py b/tests/test_jackknife.py index c3b6de2..fe9ac89 100644 --- a/tests/test_jackknife.py +++ b/tests/test_jackknife.py @@ -2,10 +2,11 @@ from pyod.models.iforest import IForest -from unquad.strategy.jackknife import JackknifeConformal -from unquad.utils.enums.dataset import Dataset -from unquad.utils.data.loader import DataLoader + +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.detector import ConformalDetector +from unquad.strategy.jackknife import JackknifeConformal from unquad.utils.metrics import false_discovery_rate, statistical_power diff --git a/tests/test_split.py b/tests/test_split.py index fa4266c..ee28019 100644 --- a/tests/test_split.py +++ b/tests/test_split.py @@ -2,8 +2,9 @@ from pyod.models.iforest import IForest -from unquad.utils.enums.dataset import Dataset -from unquad.utils.data.loader import DataLoader + +from unquad.utils.enums import Dataset +from unquad.data.loader import DataLoader from unquad.estimator.detector import ConformalDetector from unquad.strategy.split import SplitConformal from unquad.utils.metrics import false_discovery_rate, statistical_power diff --git a/tests/unit/test_utils_aggregation.py b/tests/unit/test_utils_aggregation.py index ada0692..4c89409 100644 --- a/tests/unit/test_utils_aggregation.py +++ b/tests/unit/test_utils_aggregation.py @@ -2,7 +2,7 @@ from unquad.utils.aggregation import aggregate -from unquad.utils.enums.aggregation import Aggregation +from unquad.utils.enums import Aggregation class TestUtilsAggregation(unittest.TestCase): diff --git a/unquad/utils/data/__init__.py b/unquad/data/__init__.py similarity index 100% rename from unquad/utils/data/__init__.py rename to unquad/data/__init__.py diff --git a/unquad/utils/data/loader.py b/unquad/data/loader.py similarity index 98% rename from unquad/utils/data/loader.py rename to unquad/data/loader.py index bcd4dbb..764d224 100644 --- a/unquad/utils/data/loader.py +++ b/unquad/data/loader.py @@ -3,7 +3,7 @@ from pathlib import Path from sklearn.model_selection import train_test_split -from unquad.utils.enums.dataset import Dataset +from unquad.utils.enums import Dataset class DataLoader: diff --git a/unquad/utils/data/parquet/breast/breast.parquet b/unquad/data/parquet/breast/breast.parquet similarity index 100% rename from unquad/utils/data/parquet/breast/breast.parquet rename to unquad/data/parquet/breast/breast.parquet diff --git a/unquad/utils/data/parquet/fraud/fraud.parquet b/unquad/data/parquet/fraud/fraud.parquet similarity index 100% rename from unquad/utils/data/parquet/fraud/fraud.parquet rename to unquad/data/parquet/fraud/fraud.parquet diff --git a/unquad/utils/data/parquet/ionosphere/ionosphere.parquet b/unquad/data/parquet/ionosphere/ionosphere.parquet similarity index 100% rename from unquad/utils/data/parquet/ionosphere/ionosphere.parquet rename to unquad/data/parquet/ionosphere/ionosphere.parquet diff --git a/unquad/utils/data/parquet/mammography/mammography.parquet b/unquad/data/parquet/mammography/mammography.parquet similarity index 100% rename from unquad/utils/data/parquet/mammography/mammography.parquet rename to unquad/data/parquet/mammography/mammography.parquet diff --git a/unquad/utils/data/parquet/musk/musk.parquet b/unquad/data/parquet/musk/musk.parquet similarity index 100% rename from unquad/utils/data/parquet/musk/musk.parquet rename to unquad/data/parquet/musk/musk.parquet diff --git a/unquad/utils/data/parquet/shuttle/shuttle.parquet b/unquad/data/parquet/shuttle/shuttle.parquet similarity index 100% rename from unquad/utils/data/parquet/shuttle/shuttle.parquet rename to unquad/data/parquet/shuttle/shuttle.parquet diff --git a/unquad/utils/data/parquet/thyroid/thyroid.parquet b/unquad/data/parquet/thyroid/thyroid.parquet similarity index 100% rename from unquad/utils/data/parquet/thyroid/thyroid.parquet rename to unquad/data/parquet/thyroid/thyroid.parquet diff --git a/unquad/utils/data/parquet/wbc/wbc.parquet b/unquad/data/parquet/wbc/wbc.parquet similarity index 100% rename from unquad/utils/data/parquet/wbc/wbc.parquet rename to unquad/data/parquet/wbc/wbc.parquet diff --git a/unquad/estimator/configuration.py b/unquad/estimator/configuration.py index beec429..d2b4a96 100644 --- a/unquad/estimator/configuration.py +++ b/unquad/estimator/configuration.py @@ -1,7 +1,7 @@ from dataclasses import dataclass -from unquad.utils.enums.adjustment import Adjustment -from unquad.utils.enums.aggregation import Aggregation +from unquad.utils.enums import Adjustment +from unquad.utils.enums import Aggregation @dataclass @@ -25,7 +25,7 @@ class DetectorConfig: """ alpha: float = 0.2 - adjustment: Adjustment = Adjustment.BENJAMINI_HOCHBERG + adjustment: Adjustment = Adjustment.BH aggregation: Aggregation = Aggregation.MEDIAN seed: int = 1 silent: bool = True diff --git a/unquad/estimator/detector.py b/unquad/estimator/detector.py index 79247e6..e46bee8 100644 --- a/unquad/estimator/detector.py +++ b/unquad/estimator/detector.py @@ -13,7 +13,7 @@ from unquad.estimator.parameter import set_params from unquad.strategy.base import BaseStrategy from unquad.utils.aggregation import aggregate -from unquad.utils.decorator.performance import ensure_numpy_array +from unquad.utils.decorator import ensure_numpy_array from unquad.utils.multiplicity import multiplicity_correction from unquad.utils.statistical import calculate_p_val, get_decision diff --git a/unquad/estimator/parameter.py b/unquad/estimator/parameter.py index d63983f..15d5053 100644 --- a/unquad/estimator/parameter.py +++ b/unquad/estimator/parameter.py @@ -7,7 +7,7 @@ from pyod.models.sampling import Sampling from pyod.models.sos import SOS -from unquad.utils.error.forbidden_model import ForbiddenModelError +from unquad.utils.error import ForbiddenModelError forbidden_model_list = [ CBLOF, diff --git a/unquad/utils/aggregation.py b/unquad/utils/aggregation.py index f1b67c1..9df5ad7 100644 --- a/unquad/utils/aggregation.py +++ b/unquad/utils/aggregation.py @@ -1,7 +1,7 @@ import numpy as np -from unquad.utils.decorator.performance import performance_conversion -from unquad.utils.enums.aggregation import Aggregation +from unquad.utils.decorator import performance_conversion +from unquad.utils.enums import Aggregation @performance_conversion("scores") diff --git a/unquad/utils/decorator/performance.py b/unquad/utils/decorator.py similarity index 100% rename from unquad/utils/decorator/performance.py rename to unquad/utils/decorator.py diff --git a/unquad/utils/decorator/__init__.py b/unquad/utils/decorator/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/unquad/utils/enums/__init__.py b/unquad/utils/enums/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/unquad/utils/enums/adjustment.py b/unquad/utils/enums/adjustment.py deleted file mode 100644 index 9679ba8..0000000 --- a/unquad/utils/enums/adjustment.py +++ /dev/null @@ -1,21 +0,0 @@ -from enum import Enum - - -class Adjustment(Enum): - """ - Enumerators for adjustment procedures used in False Discovery Rate (FDR) control. - - This enum defines the adjustment methods for controlling the False Discovery Rate - in multiple hypothesis testing. These methods are used to adjust p-values to account - for the multiple comparisons problem, ensuring a controlled false positive rate. - - Attributes: - BENJAMINI_HOCHBERG (str): The Benjamini-Hochberg procedure for FDR control. - BENJAMINI_YEKUTIELI (str): The Benjamini-Yekutieli procedure for FDR control. - NONE (None): No adjustment procedure is applied. - - """ - - BENJAMINI_HOCHBERG: str = "bh" - BENJAMINI_YEKUTIELI: str = "by" - NONE = None diff --git a/unquad/utils/enums/aggregation.py b/unquad/utils/enums/aggregation.py deleted file mode 100644 index 0f4df69..0000000 --- a/unquad/utils/enums/aggregation.py +++ /dev/null @@ -1,23 +0,0 @@ -from enum import Enum - - -class Aggregation(Enum): - """ - Enumerators for aggregation functions used in ensemble methods. - - This enum defines the aggregation methods used to combine predictions or scores - from multiple models in ensemble learning methods. The aggregation function - determines how the final decision or score is calculated from the outputs of - individual models. - - Attributes: - MEAN (str): The mean aggregation function. - MEDIAN (str): The median aggregation function. - MINIMUM (str): The minimum aggregation function. - MAXIMUM (str): The maximum aggregation function. - """ - - MEAN: str = "mean" - MEDIAN: str = "median" - MINIMUM: str = "minimum" - MAXIMUM: str = "maximum" diff --git a/unquad/utils/enums/dataset.py b/unquad/utils/enums/dataset.py deleted file mode 100644 index 66ac2a8..0000000 --- a/unquad/utils/enums/dataset.py +++ /dev/null @@ -1,30 +0,0 @@ -from enum import Enum - - -class Dataset(Enum): - """ - Enumerators for available datasets. - - This enum defines the names of datasets that are commonly used in machine learning - and anomaly detection tasks. These datasets represent a variety of real-world - problems, including classification and fraud detection. - - Attributes: - BREAST (str): The breast cancer dataset. - FRAUD (str): The credit card fraud detection dataset. - IONOSPHERE (str): The ionosphere dataset, used for detecting abnormalities. - MAMMOGRAPHY (str): The mammography dataset for detecting abnormalities. - MUSK (str): The musk dataset, used for detecting abnormalities. - SHUTTLE (str): The shuttle dataset, typically used for anomaly detection. - THYROID (str): The thyroid disease detection dataset for detecting abnormalities. - WBC (str): The white blood cell (WBC) dataset used for detecting abnormalities. - """ - - BREAST: str = "breast" - FRAUD: str = "fraud" - IONOSPHERE: str = "ionosphere" - MAMMOGRAPHY: str = "mammography" - MUSK: str = "musk" - SHUTTLE: str = "shuttle" - THYROID: str = "thyroid" - WBC: str = "wbc" diff --git a/unquad/utils/enums/strategy.py b/unquad/utils/enums/strategy.py deleted file mode 100644 index 7abc857..0000000 --- a/unquad/utils/enums/strategy.py +++ /dev/null @@ -1,28 +0,0 @@ -from enum import Enum - - -class Strategy(Enum): - """ - Enumerators for conformal strategies used in anomaly detection. - - This enum defines various conformal strategies that can be applied in anomaly - detection methods. These strategies determine how the model is trained and how - calibration sets are used for conformal prediction. - - Attributes: - SPLIT (str): The Split Conformal strategy (SC). - CV (str): The Cross-Validation Conformal strategy (CV). - CV_PLUS (str): The Cross-Validation Conformal strategy with additional models (CV+). - J (str): The Jackknife Conformal strategy (J). - J_PLUS (str): The Jackknife Conformal strategy with additional models (J+). - JaB (str): The Jackknife-and-Bootstrap Conformal strategy (JaB). - JaB_PLUS (str): The Jackknife-and-Bootstrap Conformal strategy with additional models (J+aB). - """ - - SPLIT: str = "SC" - CV: str = "CV" - CV_PLUS: str = "CV+" - J: str = "J" - J_PLUS: str = "J+" - JaB: str = "JaB" - JaB_PLUS: str = "J+aB" diff --git a/unquad/utils/error/forbidden_model.py b/unquad/utils/error.py similarity index 100% rename from unquad/utils/error/forbidden_model.py rename to unquad/utils/error.py diff --git a/unquad/utils/error/__init__.py b/unquad/utils/error/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/unquad/utils/metrics.py b/unquad/utils/metrics.py index ab7f988..d2f3a9d 100644 --- a/unquad/utils/metrics.py +++ b/unquad/utils/metrics.py @@ -1,6 +1,6 @@ import numpy as np -from unquad.utils.decorator.performance import performance_conversion +from unquad.utils.decorator import performance_conversion @performance_conversion("y", "y_hat") diff --git a/unquad/utils/multiplicity.py b/unquad/utils/multiplicity.py index f2dee2c..0e67998 100644 --- a/unquad/utils/multiplicity.py +++ b/unquad/utils/multiplicity.py @@ -2,8 +2,8 @@ from scipy.stats import false_discovery_control -from unquad.utils.decorator.performance import performance_conversion -from unquad.utils.enums.adjustment import Adjustment +from unquad.utils.decorator import performance_conversion +from unquad.utils.enums import Adjustment @performance_conversion("scores") @@ -27,12 +27,8 @@ def multiplicity_correction(method: Adjustment, scores: np.array) -> float: """ aggregation_methods = { Adjustment.NONE: lambda x: x, - Adjustment.BENJAMINI_HOCHBERG: lambda x: false_discovery_control( - x, method="bh" - ), - Adjustment.BENJAMINI_YEKUTIELI: lambda x: false_discovery_control( - x, method="by" - ), + Adjustment.BH: lambda x: false_discovery_control(x, method="bh"), + Adjustment.BY: lambda x: false_discovery_control(x, method="by"), } func = aggregation_methods.get(method) diff --git a/unquad/utils/statistical.py b/unquad/utils/statistical.py index 6ef4cb5..6c8d52c 100644 --- a/unquad/utils/statistical.py +++ b/unquad/utils/statistical.py @@ -1,6 +1,6 @@ import numpy as np -from unquad.utils.decorator.performance import performance_conversion +from unquad.utils.decorator import performance_conversion @performance_conversion("scores", "calibration_set")