diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 4d81b65..684dfc8 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -13,4 +13,4 @@ env: jobs: testing: - uses: quatrope/scikit-criteria/.github/workflows/tests.yml@0cd9099455b6e45659347dc08e6eeec95f876c10 \ No newline at end of file + uses: quatrope/scikit-criteria/.github/workflows/tests.yml@acc77ab49cec6a541095b1ea46c84e2b16174990 \ No newline at end of file diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index f1176d1..328fc58 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -9,7 +9,7 @@ on: jobs: testing: - uses: quatrope/scikit-criteria/.github/workflows/tests.yml@991bafd28ebc638429e2fcd2c69d7cb9722209ea + uses: quatrope/scikit-criteria/.github/workflows/tests.yml@acc77ab49cec6a541095b1ea46c84e2b16174990 deploy: needs: testing @@ -25,11 +25,11 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install setuptools wheel twine + pip install setuptools wheel twine build - name: Build and publish env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | - python setup.py sdist + python -m build --sdist --wheel twine upload dist/* diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index c0742e2..fea8770 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -16,7 +16,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - tox_env: [py37, py38, py39, py310] + tox_env: [py38, py39, py310] include: - tox_env: style - tox_env: docstyle diff --git a/CHANGELOG.md b/CHANGELOG.md index 36b171a..78ae61b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,15 +2,75 @@ +## Version 0.8 + +- **New** The `skcriteria.cmp` package utilities to compare rankings. + +- **New** The new package `skcriteria.datasets` include two datasets (one a + toy and one real) to quickly start your experiments. + +- **New** DecisionMatrix now can be sliced with a syntax similar of the + pandas.DataFrame. + - `dm["c0"]` cut the $c0$ criteria. + - `dm[["c0", "c2"]` cut the criteria $c0$ and $c2$. + - `dm.loc["a0"]` cut the alternative $a0$. + - `dm.loc[["a0", "a1"]]` cut the alternatives $a0$ and $a1$. + - `dm.iloc[0:3]` cuts from the first to the third alternative. + +- **New** imputation methods for replacing missing data with substituted + values. These methods are in the module `skcriteria.preprocessing.impute`. + +- **New** results object now has a `to_series` method. + +- **Changed Behaviour**: The ranks and kernels `equals` are now called + `values_equals`. The new `aequals` support tolerances to compare + numpy arrays internally stored in `extra_`, and the `equals` method is + equivalent to `aequals(rtol=0, atol=0)`. + +- We detected a bad behavior in ELECTRE2, so we decided to launch a `FutureWarning` when the + class is instantiated. In the version after 0.8, a new implementation of ELECTRE2 will be + provided. + +- Multiple `__repr__` was improved to folow the + [Python recomendation](https://docs.python.org/3/library/functions.html#repr) + +- `Critic` weighter was renamed to `CRITIC` (all capitals) to be consistent + with the literature. The old class is still there but is deprecated. + +- All the functions and classes of `skcriteria.preprocessing.distance` was + moved to `skcriteria.preprocessing.scalers`. + +- The `StdWeighter` now uses the **sample** standar-deviation. + From the numerical point of view, this does not generate any change, + since the deviations are scaled by the sum. Computationally speaking there + may be some difference from the ~5th decimal digit onwards. + +- Two method of the `Objective` enum was deprecated and replaced: + + - `Objective.construct_from_alias()` `->` `Objective.from_alias()` (classmethod) + - `Objective.to_string()` `->` `Objective.to_symbol()'` + + The deprecated methods will be removed in version *1.0*. + +- Add a dominance plot `DecisionMatrix.plot.dominance()`. + +- `WeightedSumModel` raises a `ValueError` when some value $< 0$. + +- Moved internal modules + - `skcriteria.core.methods.SKCTransformerABC` `->` + `skcriteria.preprocessing.SKCTransformerABC` + - `skcriteria.core.methods.SKCMatrixAndWeightTransformerABC` `->` + `skcriteria.preprocessing.SKCMatrixAndWeightTransformerABC` + ## Version 0.7 - **New method**: `ELECTRE2`. -- **New preprocessin strategy:** A new way to transform from minimization to +- **New preprocessing strategy:** A new way to transform from minimization to maximization criteria: `NegateMinimize()` which reverses the sign of the values of the criteria to be minimized (useful for not breaking distance relations in methods like *TOPSIS*). Additionally the previous we rename the `MinimizeToMaximize()` transformer to `InvertMinimize()`. -- Now the `RankingResult`, support repeated/tied rankings and some were +- Now the `RankingResult`, support repeated/tied rankings and some methods were implemented to deal with these cases. - `RankingResult.has_ties_` to see if there are tied values. @@ -21,7 +81,7 @@ - `kernel_alternatives_` to know which alternatives are in the kernel. - `kernel_size_` to know the number of alternatives in the kernel. - - `kernel_where_` was replaced by `kernel_where_` to standardize the api. + - `kernel_where_` was replaced by `kernelwhere_` to standardize the api. ## Version 0.6 diff --git a/MANIFEST.in b/MANIFEST.in index 0b06f18..3e2d6a9 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,11 +1,13 @@ -include LICENSE +include LICENSE.txt include README.md include CHANGELOG.md recursive-include skcriteria *.py +recursive-include skcriteria *.json exclude tox.ini exclude pyproject.toml +exclude requirements_dev.txt exclude .header-template exclude .readthedocs.yml diff --git a/README.md b/README.md index 821dc5c..37c01e4 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ [![Conda Forge](https://anaconda.org/conda-forge/scikit-criteria/badges/version.svg)](https://anaconda.org/conda-forge/scikit-criteria) ![Conda](https://img.shields.io/conda/dn/conda-forge/scikit-criteria?label=Conda-Forge%20downloads) [![License](https://img.shields.io/pypi/l/uttrs?color=blue)](https://www.tldrlegal.com/l/bsd3) -[![Python 3.7+](https://img.shields.io/badge/python-3.7+-blue.svg)](https://badge.fury.io/py/uttrs) +[![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://badge.fury.io/py/uttrs) diff --git a/docs/requirements.txt b/docs/requirements.txt index 295b167..071c7de 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -11,4 +11,4 @@ sphinxcontrib-bibtex sphinx-nested-apidoc pybtex-apa-style -m2r \ No newline at end of file +m2r2 \ No newline at end of file diff --git a/docs/source/_dynamic/CHANGELOG.rst b/docs/source/_dynamic/CHANGELOG.rst deleted file mode 100644 index 141db55..0000000 --- a/docs/source/_dynamic/CHANGELOG.rst +++ /dev/null @@ -1,79 +0,0 @@ -.. FILE AUTO GENERATED !! - -Version 0.7 ------------ - - -* **New method**\ : ``ELECTRE2``. -* **New preprocessin strategy:** A new way to transform from minimization to - maximization criteria: ``NegateMinimize()`` which reverses the sign of the - values of the criteria to be minimized (useful for not breaking distance - relations in methods like *TOPSIS*\ ). Additionally the previous we rename the - ``MinimizeToMaximize()`` transformer to ``InvertMinimize()``. -* - Now the ``RankingResult``\ , support repeated/tied rankings and some were - implemented to deal with these cases. - - - * ``RankingResult.has_ties_`` to see if there are tied values. - * ``RankingResult.ties_`` to see how often values are repeated. - * ``RankingResult.untided_rank_`` to get a ranking with no repeated values. - repeated values. - -* - ``KernelResult`` now implements several new properties: - - - * ``kernel_alternatives_`` to know which alternatives are in the kernel. - * ``kernel_size_`` to know the number of alternatives in the kernel. - * ``kernel_where_`` was replaced by ``kernel_where_`` to standardize the api. - -Version 0.6 ------------ - - -* Support for Python 3.10. -* All the objects of the project are now immutable by design, and can only - be mutated troughs the ``object.copy()`` method. -* Dominance analysis tools (\ ``DecisionMatrix.dominance``\ ). -* The method ``DecisionMatrix.describe()`` was deprecated and will be removed - in version *1.0*. -* New statistics functionalities ``DecisionMatrix.stats`` accessor. -* - The accessors are now cached in the ``DecisionMatrix``. - -* - Tutorial for dominance and satisfaction analysis. - -* - TOPSIS now support hyper-parameters to select different metrics. - -* Generalize the idea of accessors in scikit-criteria througth a common - framework (\ ``skcriteria.utils.accabc`` module). -* New deprecation mechanism through the -* ``skcriteria.utils.decorators.deprecated`` decorator. - -Version 0.5 ------------ - -In this version scikit-criteria was rewritten from scratch. Among other things: - - -* The model implementation API was simplified. -* The ``Data`` object was removed in favor of ``DecisionMatrix`` which implements many more useful features for MCDA. -* Plots were completely re-implemented using `Seaborn `_. -* Coverage was increased to 100%. -* Pipelines concept was added (Thanks to `Scikit-learn `_\ ). -* New documentation. The quick start is totally rewritten! - -**Full Changelog**\ : https://github.com/quatrope/scikit-criteria/commits/0.5 - -Version 0.2 ------------ - -First OO stable version. - -Version 0.1 ------------ - -Only functions. diff --git a/docs/source/_static/css/skcriteria.css b/docs/source/_static/css/skcriteria.css index fc056f1..614ab8d 100644 --- a/docs/source/_static/css/skcriteria.css +++ b/docs/source/_static/css/skcriteria.css @@ -9,6 +9,8 @@ --links: #7b8e2d; /* Background color of jupyter input cell */ --jupyter-in-cell-bg: #e3ffd28c; + /* Deprecation color */ + --deprecated: #ff7474; } @@ -40,6 +42,17 @@ a:hover code { color: var(--primary) !important; } +span.deprecated { + color: var(--deprecated) !important; +} + + +/* remove the title of the first page */ + +section#scikit-criteria-documentation>h1:nth-child(1) { + display: none; +} + /* NAVBAR */ diff --git a/docs/source/api/cmp/index.rst b/docs/source/api/cmp/index.rst new file mode 100644 index 0000000..8d73ab4 --- /dev/null +++ b/docs/source/api/cmp/index.rst @@ -0,0 +1,14 @@ +``skcriteria.cmp`` package +============================ + +.. automodule:: skcriteria.cmp + :members: + :undoc-members: + :show-inheritance: + :ignore-module-all: + +.. toctree:: + :maxdepth: 2 + :glob: + + * \ No newline at end of file diff --git a/docs/source/api/cmp/ranks_cmp.rst b/docs/source/api/cmp/ranks_cmp.rst new file mode 100644 index 0000000..104bf98 --- /dev/null +++ b/docs/source/api/cmp/ranks_cmp.rst @@ -0,0 +1,7 @@ +``skcriteria.cmp.ranks_cmp`` module +=================================== + +.. automodule:: skcriteria.cmp.ranks_cmp + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/api/core/objectives.rst b/docs/source/api/core/objectives.rst new file mode 100644 index 0000000..a29bfa2 --- /dev/null +++ b/docs/source/api/core/objectives.rst @@ -0,0 +1,7 @@ +``skcriteria.core.objectives`` module +===================================== + +.. automodule:: skcriteria.core.objectives + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/api/datasets/index.rst b/docs/source/api/datasets/index.rst new file mode 100644 index 0000000..618393a --- /dev/null +++ b/docs/source/api/datasets/index.rst @@ -0,0 +1,14 @@ +``skcriteria.datasets`` package +=============================== + +.. automodule:: skcriteria.datasets + :members: + :undoc-members: + :show-inheritance: + :ignore-module-all: + +.. .. toctree:: +.. :maxdepth: 2 +.. :glob: + +.. * \ No newline at end of file diff --git a/docs/source/api/index.rst b/docs/source/api/index.rst index 06a96c4..f5a7fff 100644 --- a/docs/source/api/index.rst +++ b/docs/source/api/index.rst @@ -26,6 +26,18 @@ preprocessing/index +.. toctree:: + :maxdepth: 2 + + cmp/index + + +.. toctree:: + :maxdepth: 2 + + datasets/index + + .. toctree:: :maxdepth: 2 diff --git a/docs/source/api/madm/_base.rst b/docs/source/api/madm/_base.rst deleted file mode 100644 index 93cd101..0000000 --- a/docs/source/api/madm/_base.rst +++ /dev/null @@ -1,7 +0,0 @@ -``skcriteria.madm._base`` module -================================== - -.. automodule:: skcriteria.madm._base - :members: - :undoc-members: - :show-inheritance: \ No newline at end of file diff --git a/docs/source/api/madm/_madm_base.rst b/docs/source/api/madm/_madm_base.rst new file mode 100644 index 0000000..eb59834 --- /dev/null +++ b/docs/source/api/madm/_madm_base.rst @@ -0,0 +1,7 @@ +``skcriteria.madm._madm_base`` module +===================================== + +.. automodule:: skcriteria.madm._madm_base + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/api/preprocessing/_preprocessing_base.rst b/docs/source/api/preprocessing/_preprocessing_base.rst new file mode 100644 index 0000000..2088654 --- /dev/null +++ b/docs/source/api/preprocessing/_preprocessing_base.rst @@ -0,0 +1,7 @@ +``skcriteria.preprocessing._preprocessing_base`` module +======================================================= + +.. automodule:: skcriteria.preprocessing._preprocessing_base + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/api/preprocessing/distance.rst b/docs/source/api/preprocessing/distance.rst index a359697..a3cbedf 100644 --- a/docs/source/api/preprocessing/distance.rst +++ b/docs/source/api/preprocessing/distance.rst @@ -1,6 +1,9 @@ ``skcriteria.preprocessing.distance`` module ============================================ +.. warning:: + This module is deprecated. + .. automodule:: skcriteria.preprocessing.distance :members: :undoc-members: diff --git a/docs/source/api/preprocessing/impute.rst b/docs/source/api/preprocessing/impute.rst new file mode 100644 index 0000000..4ce3db7 --- /dev/null +++ b/docs/source/api/preprocessing/impute.rst @@ -0,0 +1,7 @@ +``skcriteria.preprocessing.impute`` module +=========================================== + +.. automodule:: skcriteria.preprocessing.impute + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/api/utils/decorators.rst b/docs/source/api/utils/cmanagers.rst similarity index 53% rename from docs/source/api/utils/decorators.rst rename to docs/source/api/utils/cmanagers.rst index 390ed5c..10a605e 100644 --- a/docs/source/api/utils/decorators.rst +++ b/docs/source/api/utils/cmanagers.rst @@ -1,7 +1,7 @@ -``skcriteria.utils.decorators`` module +``skcriteria.utils.cmanagers`` module ====================================== -.. automodule:: skcriteria.utils.decorators +.. automodule:: skcriteria.utils.cmanagers :members: :undoc-members: :show-inheritance: diff --git a/docs/source/api/utils/deprecate.rst b/docs/source/api/utils/deprecate.rst new file mode 100644 index 0000000..b59cc6f --- /dev/null +++ b/docs/source/api/utils/deprecate.rst @@ -0,0 +1,7 @@ +``skcriteria.utils.deprecate`` module +====================================== + +.. automodule:: skcriteria.utils.deprecate + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/api/utils/doctools.rst b/docs/source/api/utils/doctools.rst new file mode 100644 index 0000000..2936bb9 --- /dev/null +++ b/docs/source/api/utils/doctools.rst @@ -0,0 +1,7 @@ +``skcriteria.utils.doctools`` module +====================================== + +.. automodule:: skcriteria.utils.doctools + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/api/utils/unames.rst b/docs/source/api/utils/unames.rst new file mode 100644 index 0000000..312f0c2 --- /dev/null +++ b/docs/source/api/utils/unames.rst @@ -0,0 +1,7 @@ +``skcriteria.utils.unames`` module +===================================== + +.. automodule:: skcriteria.utils.unames + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/conf.py b/docs/source/conf.py index 55889c6..fcc0fc6 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -79,7 +79,7 @@ numpydoc_class_members_toctree = False -nbsphinx_execute = "always" +nbsphinx_execute = "never" nbsphinx_allow_errors = True @@ -97,7 +97,9 @@ # General information about the project. project = skcriteria.NAME -copyright = "2016-2022, Juan B. Cabral - Nadia A. Luczywo" +copyright = "2016-2021, Juan B. Cabral - Nadia A. Luczywo - Copyright (c) 2022, QuatroPe" + + author = "Juan BC" # The version info for the project you're documenting, acts as replacement for @@ -114,7 +116,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = "en" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -237,10 +239,10 @@ # ============================================================================= -# INJECT REAMDE INTO THE RESTRUCTURED TEXT +# INJECT README INTO THE RESTRUCTURED TEXT # ============================================================================= -import m2r +import m2r2 DYNAMIC_RST = { "README.md": "README.rst", @@ -256,7 +258,7 @@ with open(rst_path, "w") as fp: fp.write(".. FILE AUTO GENERATED !! \n") - fp.write(m2r.convert(readme_md)) + fp.write(m2r2.convert(readme_md)) print(f"{md_path} -> {rst_path} regenerated!") diff --git a/docs/source/index.rst b/docs/source/index.rst index 78520c2..0a180e6 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -3,6 +3,9 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. +Scikit-Criteria Documentation +============================= + .. image:: _static/logo_medium.png :align: center :scale: 100 % diff --git a/docs/source/refs.bib b/docs/source/refs.bib index f0349de..ec27beb 100644 --- a/docs/source/refs.bib +++ b/docs/source/refs.bib @@ -161,4 +161,65 @@ @article{simon1955behavioral pages = {99--118}, year = {1955}, publisher = {MIT Press} +} + +% dominance + +@misc{enwiki:1107297090, + author = {{Wikipedia contributors}}, + title = {Pareto front --- {Wikipedia}{,} The Free Encyclopedia}, + year = {2022}, + howpublished = {\url{https://en.wikipedia.org/w/index.php?title=Pareto_front&oldid=1107297090}}, + note = {[Online; accessed 9-October-2022]} +} + + @misc{enwiki:1110412520, + author = {{Wikipedia contributors}}, + title = {Pareto efficiency --- {Wikipedia}{,} The Free Encyclopedia}, + year = {2022}, + howpublished = {\url{https://en.wikipedia.org/w/index.php?title=Pareto_efficiency&oldid=1110412520}}, + note = {[Online; accessed 9-October-2022]} +} + +% UTILS + +@misc{enwiki:1114075000, + author = {{Wikipedia contributors}}, + title = {Singleton pattern --- {Wikipedia}{,} The Free Encyclopedia}, + year = {2022}, + url = {https://en.wikipedia.org/w/index.php?title=Singleton_pattern&oldid=1114075000}, + note = {[Online; accessed 12-October-2022]} +} + + @book{gamma1995design, + title = {Design patterns: elements of reusable object-oriented software}, + author = {Gamma, Erich and Helm, Richard and Johnson, Ralph and Johnson, Ralph E and Vlissides, John and others}, + year = {1995}, + publisher = {Pearson Deutschland GmbH} +} + + +% DATASETS + +@article{van2021evaluation, + title = {Evaluation of the importance of criteria for the selection of cryptocurrencies}, + author = {Van Heerden, Natalia A and Cabral, Juan B and Luczywo, Nadia}, + journal = {arXiv preprint arXiv:2109.00130}, + year = {2021} +} + +@inproceedings{van2021epio_evaluation, + title = {Evaluaci{\'o}n de la importancia de criterios para la selecci{\'o}n de criptomonedas}, + author = {Van Heerden, Natalia A and Cabral, Juan B and Luczywo, Nadia}, + booktitle = {XXXIV ENDIO - XXXII EPIO Virtual 2021.}, + year = {2021} +} + +@misc{rajkumar_2021, + title = {Cryptocurrency historical prices}, + url = {https://www.kaggle.com/sudalairajkumar/cryptocurrencypricehistory}, + journal = {Kaggle}, + author = {Rajkumar, Sudalai}, + year = {2021}, + month = {Jul} } \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 3962b27..fa19155 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,3 +10,67 @@ markers = [ testpaths = [ "tests", ] + +# ============================================================================= +# PACKAGING +# ============================================================================= + +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[project] +name = "scikit-criteria" +version = "0.8.rc1" +authors = [{name = "Juan B Cabral & QuatroPe", email = "jbcabral@unc.edu.ar"}] +readme = "README.md" +license = {file = "LICENSE.txt"} +description = "Scikit-Criteria is a collections of algorithms, methods and techniques for multiple-criteria decision analysis." +keywords = [ + "muticriteria", + "mcda", + "mcdm", + "weightedsum", + "weightedproduct", + "simus", + "topsis", + "moora", + "electre", + "critic", + "entropy", + "dominance", +] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Education", + "Intended Audience :: Science/Research", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: Implementation :: CPython", + "Topic :: Scientific/Engineering", +] +urls = { Homepage = "https://scikit-criteria.quatrope.org/", Repository = "https://github.com/quatrope/scikit-criteria" } +dependencies = [ + "numpy", + "pandas", + "scipy", + "jinja2", + "custom_inherit", + "seaborn", + "pulp", + "Deprecated", + "scikit-learn", + "matplotlib!=3.6.1", +] + +[tool.setuptools] +include-package-data = true + +[tool.setuptools.packages.find] +include = ["skcriteria", "skcriteria.*"] +namespaces = false diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100644 index 0000000..c1f9f63 --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1,18 @@ +tox +ipdb +pytest +pytest-ordering +pyquery + +flake8 +flake8-import-order +flake8-black +flake8-builtins + +coverage +pytest-cov + +pydocstyle +toml + +https://github.com/quatrope/qafan/archive/refs/heads/master.zip \ No newline at end of file diff --git a/setup.py b/setup.py deleted file mode 100644 index 7aefd3d..0000000 --- a/setup.py +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised)) -# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia -# Copyright (c) 2022, QuatroPe -# All rights reserved. - -# ============================================================================= -# DOCS -# ============================================================================= - -"""This file is for distribute scikit-criteria - -""" - - -# ============================================================================= -# IMPORTS -# ============================================================================= - - -import os -import pathlib - -from setuptools import find_packages, setup - -os.environ["__SKCRITERIA_IN_SETUP__"] = "True" -import skcriteria # noqa - -# ============================================================================= -# CONSTANTS -# ============================================================================= - -REQUIREMENTS = [ - "numpy", - "pandas", - "pyquery", - "scipy", - "jinja2", - "custom_inherit", - "seaborn", - "pulp", - "Deprecated", -] - -PATH = pathlib.Path(os.path.abspath(os.path.dirname(__file__))) - -with open(PATH / "README.md") as fp: - LONG_DESCRIPTION = fp.read() - - -# ============================================================================= -# FUNCTIONS -# ============================================================================= - - -def do_setup(): - setup( - name="scikit-criteria", - version=skcriteria.VERSION, - description=skcriteria.DOC, - long_description=LONG_DESCRIPTION, - long_description_content_type="text/markdown", - author="QuatroPe", - author_email="jbcabral@unc.edu.ar", - url="http://scikit-criteria.org/", - license="3 Clause BSD", - keywords=[ - "muticriteria", - "mcda", - "mcdm", - "weightedsum", - "weightedproduct", - "simus", - "topsis", - "moora", - "electre", - "critic", - "entropy", - "dominance", - ], - classifiers=[ - "Development Status :: 4 - Beta", - "Intended Audience :: Education", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: BSD License", - "Operating System :: OS Independent", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: Implementation :: CPython", - "Topic :: Scientific/Engineering", - ], - packages=[ - pkg for pkg in find_packages() if pkg.startswith("skcriteria") - ], - install_requires=REQUIREMENTS, - ) - - -if __name__ == "__main__": - do_setup() diff --git a/skcriteria/__init__.py b/skcriteria/__init__.py index 6d04dae..e6350b9 100644 --- a/skcriteria/__init__.py +++ b/skcriteria/__init__.py @@ -16,24 +16,26 @@ # IMPORTS # ============================================================================= -import os +import importlib.metadata -if os.getenv("__SKCRITERIA_IN_SETUP__") != "True": - from .core import DecisionMatrix, Objective, mkdm - -del os +from . import datasets +from .core import DecisionMatrix, Objective, mkdm # ============================================================================= # CONSTANTS # ============================================================================= -__all__ = ["mkdm", "DecisionMatrix", "Objective"] +__all__ = ["mkdm", "DecisionMatrix", "Objective", "datasets"] -__version__ = ("0", "7") NAME = "scikit-criteria" DOC = __doc__ -VERSION = ".".join(__version__) +VERSION = importlib.metadata.version(NAME) + +__version__ = tuple(VERSION.split(".")) + + +del importlib diff --git a/skcriteria/cmp/__init__.py b/skcriteria/cmp/__init__.py new file mode 100644 index 0000000..5996aef --- /dev/null +++ b/skcriteria/cmp/__init__.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised)) +# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia +# Copyright (c) 2022, QuatroPe +# All rights reserved. + +# ============================================================================= +# DOCS +# ============================================================================= + +"""Utilities for a-posteriori analysis of experiments.""" + +# ============================================================================= +# IMPORTS +# ============================================================================= + +from .ranks_cmp import RanksComparator, mkrank_cmp + +# ============================================================================= +# ALL +# ============================================================================= + +__all__ = [ + "RanksComparator", + "mkrank_cmp", +] diff --git a/skcriteria/cmp/ranks_cmp.py b/skcriteria/cmp/ranks_cmp.py new file mode 100644 index 0000000..fa52e2d --- /dev/null +++ b/skcriteria/cmp/ranks_cmp.py @@ -0,0 +1,780 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised)) +# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia +# Copyright (c) 2022, QuatroPe +# All rights reserved. + +# ============================================================================= +# DOCS +# ============================================================================= + +"""Ranking comparison routines.""" + +# ============================================================================= +# IMPORTS +# ============================================================================= + +import functools +import itertools as it +from collections import defaultdict + +import matplotlib.pyplot as plt + +import pandas as pd + +from scipy.spatial import distance + +import seaborn as sns + +from sklearn import metrics as _skl_metrics + +from ..core import SKCMethodABC +from ..madm import RankResult +from ..utils import AccessorABC, Bunch, unique_names + + +# ============================================================================= +# CONSTANTS +# ============================================================================= + +RANKS_LABELS = { + True: "Untied ranks (lower is better)", + False: "Ranks (lower is better)", +} + + +# ============================================================================= +# COMPARATOR +# ============================================================================= + + +class RanksComparator(SKCMethodABC): + """Rankings comparator object. + + This class is intended to contain a collection of rankings on which you + want to do comparative analysis. + + All rankings must have exactly the same alternatives, although their order + may vary. + + All methods support the ``untied`` parameter, which serves to untie + rankings in case there are results that can assign more than one + alternative to the same position (e.g.``ELECTRE2``). + + Parameters + ---------- + ranks : list + List of (name, ranking) tuples of ``skcriteria.madm.RankResult`` + with the same alternatives. + + See Also + -------- + skcriteria.cmp.mkrank_cmp : Convenience function for simplified + ranks comparator construction. + + """ + + _skcriteria_dm_type = "ranks_comparator" + _skcriteria_parameters = ["ranks"] + + def __init__(self, ranks): + ranks = list(ranks) + self._validate_ranks(ranks) + self._ranks = ranks + + # INTERNALS =============================================================== + def _validate_ranks(self, ranks): + + if len(ranks) <= 1: + raise ValueError("Please provide more than one ranking") + + used_names = set() + first_alternatives = set(ranks[0][1].alternatives) + for name, part in ranks: + + if not isinstance(name, str): + raise ValueError("'name' must be instance of str") + + if not isinstance(part, RankResult): + raise TypeError("ranks must be instance of madm.RankResult") + + if name in used_names: + raise ValueError(f"Duplicated name {name!r}") + used_names.add(name) + + diff = first_alternatives.symmetric_difference(part.alternatives) + if diff: + miss_str = ", ".join(diff) + raise ValueError( + f"Some ranks miss the alternative/s: {miss_str!r}" + ) + + # PROPERTIES ============================================================== + @property + def ranks(self): + """List of ranks in the comparator.""" + return list(self._ranks) + + @property + def named_ranks(self): + """Dictionary-like object, with the following attributes. + + Read-only attribute to access any rank parameter by user given name. + Keys are ranks names and values are rannks parameters. + + """ + return Bunch("ranks", dict(self.ranks)) + + # MAGIC! ================================================================== + + def __repr__(self): + """x.__repr__() <==> repr(x).""" + cls_name = type(self).__name__ + ranks_names = [rn for rn, _ in self._ranks] + return f"<{cls_name} [ranks={ranks_names!r}]>" + + def __len__(self): + """Return the number of rankings to compare.""" + return len(self._ranks) + + def __getitem__(self, ind): + """Return a sub-comparator or a single ranking in the pipeline. + + Indexing with an integer will return an ranking; using a slice + returns another RankComparator instance which copies a slice of this + RankComparator. This copy is shallow: modifying ranks in the + sub-comparator will affect the larger pipeline and vice-versa. + However, replacing a value in `step` will not affect a copy. + + """ + if isinstance(ind, slice): + if ind.step not in (1, None): + cname = type(self).__name__ + raise ValueError(f"{cname} slicing only supports a step of 1") + return self.__class__(self.ranks[ind]) + elif isinstance(ind, int): + return self._ranks[ind][-1] + elif isinstance(ind, str): + return self.named_ranks[ind] + raise KeyError(ind) + + def __hash__(self): + """x.__hash__() <==> hash(x).""" + return id(self) + + # TO DATA ================================================================= + + def to_dataframe(self, *, untied=False): + """Convert the entire RanksComparator into a dataframe. + + The alternatives are the rows, and the different rankings are the + columns. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + + Returns + ------- + :py:class:`pd.DataFrame` + A RanksComparator as pandas DataFrame. + + """ + columns = { + rank_name: rank.to_series(untied=untied) + for rank_name, rank in self._ranks + } + + df = pd.DataFrame.from_dict(columns) + df.columns.name = "Method" + + return df + + def corr(self, *, untied=False, **kwargs): + """Compute pairwise correlation of rankings, excluding NA/null values. + + By default the pearson correlation coefficient is used. + + Please check the full documentation of a ``pandas.DataFrame.corr()`` + method for details about the implementation. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + kwargs: + Other keyword arguments are passed to the + ``pandas.DataFrame.corr()`` method. + + Returns + ------- + :py:class:`pd.DataFrame` + A DataFrame with the correlation between rankings. + + """ + return self.to_dataframe(untied=untied).corr(**kwargs) + + def cov(self, *, untied=False, **kwargs): + """Compute pairwise covariance of rankings, excluding NA/null values. + + Please check the full documentation of a ``pandas.DataFrame.cov()`` + method for details about the implementation. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + kwargs: + Other keyword arguments are passed to the + ``pandas.DataFrame.cov()`` method. + + Returns + ------- + :py:class:`pd.DataFrame` + A DataFrame with the covariance between rankings. + + """ + return self.to_dataframe(untied=untied).cov(**kwargs) + + def r2_score(self, *, untied=False, **kwargs): + """Compute pairwise coefficient of determination regression score \ + function of rankings, excluding NA/null values. + + Best possible score is 1.0 and it can be negative (because the + model can be arbitrarily worse). + + Please check the full documentation of a ``sklearn.metrics.r2_score`` + function for details about the implementation and the behaviour. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + kwargs: + Other keyword arguments are passed to the + ``sklearn.metrics.r2_score()`` function. + + Returns + ------- + :py:class:`pd.DataFrame` + A DataFrame with the coefficient of determination between rankings. + + """ + df = self.to_dataframe(untied=untied) + # here we are going to create a dict of dict + rows = defaultdict(dict) + + # combine the methods pairwise + for r0, r1 in it.combinations(df.columns, 2): + r2_score = _skl_metrics.r2_score(df[r0], df[r1], **kwargs) + + # add the metrics in both directions + rows[r0][r1] = r2_score + rows[r1][r0] = r2_score + + # create the dataframe and change the nan for 1 (perfect R2) + r2_df = pd.DataFrame.from_dict(rows).fillna(1) + r2_df = r2_df[df.columns].loc[df.columns] + + r2_df.index.name = "Method" + r2_df.columns.name = "Method" + + return r2_df + + def distance(self, *, untied=False, metric="hamming", **kwargs): + """Compute pairwise distance between rankings. + + By default the 'hamming' distance is used, which is simply the + proportion of disagreeing components in Two rankings. + + Please check the full documentation of a + ``scipy.spatial.distance.pdist`` function for details about the + implementation and the behaviour. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + metric: str or function, default ``"hamming"`` + The distance metric to use. The distance function can + be 'braycurtis', 'canberra', 'chebyshev', 'cityblock', + 'correlation', 'cosine', 'dice', 'euclidean', 'hamming', + 'jaccard', 'jensenshannon', 'kulczynski1', + 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', + 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', + 'sqeuclidean', 'yule'. + kwargs: + Other keyword arguments are passed to the + ``scipy.spatial.distance.pdist()`` function. + + Returns + ------- + :py:class:`pd.DataFrame` + A DataFrame with the distance between rankings. + + """ + df = self.to_dataframe(untied=untied).T + dis_array = distance.pdist(df, metric=metric, **kwargs) + dis_mtx = distance.squareform(dis_array) + dis_df = pd.DataFrame( + dis_mtx, columns=df.index.copy(), index=df.index.copy() + ) + return dis_df + + # ACCESSORS (YES, WE USE CACHED PROPERTIES IS THE EASIEST WAY) ============ + + @property + @functools.lru_cache(maxsize=None) + def plot(self): + """Plot accessor.""" + return RanksComparatorPlotter(self) + + +# ============================================================================= +# PLOTTER +# ============================================================================= + + +class RanksComparatorPlotter(AccessorABC): + """RanksComparator plot utilities. + + Kind of plot to produce: + + - 'flow' : Changes in the rankings of the alternatives as flow lines + (default) + - 'reg' : Pairwise rankings data and a linear regression model fit plot. + - 'heatmap' : Rankings as a color-encoded matrix. + - 'corr' : Pairwise correlation of rankings as a color-encoded matrix. + - 'cov' : Pairwise covariance of rankings as a color-encoded matrix. + - 'r2_score' : Pairwise coefficient of determination regression score \ + function of rankings as a color-encoded matrix. + - 'distance' : Pairwise distance between rankings as a color-encoded \ + matrix. + - 'box' : Box-plot of rankings with respect to alternatives + - 'bar' : Ranking of alternatives by method with vertical bars. + - 'barh' : Ranking of alternatives by method with horizontal bars. + + """ + + _default_kind = "flow" + + def __init__(self, ranks_cmp): + self._ranks_cmp = ranks_cmp + + # MANUAL MADE PLOT ======================================================== + # These plots have a much more manually orchestrated code. + + def flow(self, *, untied=False, grid_kws=None, **kwargs): + """Represents changes in the rankings of the alternatives as lines \ + flowing through the ranking-methods. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + grid_kws: dict or None + Dict with keyword arguments passed to + ``matplotlib.axes.plt.Axes.grid`` + kwargs: + Other keyword arguments are passed to the ``seaborn.lineplot()`` + function. except for data, estimator and sort. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + """ + df = self._ranks_cmp.to_dataframe(untied=untied) + + ax = sns.lineplot(data=df.T, estimator=None, sort=False, **kwargs) + + grid_kws = {} if grid_kws is None else grid_kws + grid_kws.setdefault("alpha", 0.3) + ax.grid(**grid_kws) + + ax.set_ylabel(RANKS_LABELS[untied]) + + return ax + + def reg( + self, + *, + untied=False, + r2=True, + palette=None, + legend=True, + r2_fmt=".2g", + r2_kws=None, + **kwargs, + ): + """Plot a pairwise rankings data and a linear regression model fit. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + r2 : bool, default ``True`` + If True, the coefficient of determination results are added to the + regression legend. + palette: matplotlib/seaborn color palette, default ``None`` + Set of colors for mapping the hue variable. + legend: bool, default ``True`` + If False, suppress the legend for semantic variables. + r2_fmt: str, default ``"2.g"`` + String formatting code to use when adding the coefficient of + determination. + r2_kws: dict or None + Dict with keywords arguments passed to + ``sklearn.metrics.r2_score()`` function. + kwargs: + Other keyword arguments are passed to the ``seaborn.lineplot()`` + function. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + """ + df = self._ranks_cmp.to_dataframe(untied=untied) + + # Just to ensure that no manual color reaches regplot + if "color" in kwargs: + cls_name = type(self).__name__ + raise TypeError( + f"{cls_name}.reg() got an unexpected keyword argument 'color'" + ) + + # if there is a custom axis, we take it out + ax = kwargs.pop("ax", None) + + # r2 + if legend and r2: + r2_kws = {} if r2_kws is None else r2_kws + r2_df = self._ranks_cmp.r2_score(untied=untied, **r2_kws) + + # we create the infinite cycle of colors for the palette, + # so we take out as we need + colors = it.cycle(sns.color_palette(palette=palette)) + + # pairwise ranks iteration + for x, y in it.combinations(df.columns, 2): + color = next(colors) + + # The r2 correlation index + r2_label = "" + if legend and r2: + r2_score = format(r2_df[x][y], r2_fmt) + r2_label = f" - $R^2={r2_score}$" + + label = "x={x}, y={y}{r2}".format(x=x, y=y, r2=r2_label) + ax = sns.regplot( + x=x, y=y, data=df, ax=ax, label=label, color=color, **kwargs + ) + + ranks_label = RANKS_LABELS[untied] + ax.set(xlabel=f"'x' {ranks_label}", ylabel=f"'y' {ranks_label}") + + if legend: + ax.legend() + + return ax + + # SEABORN BASED =========================================================== + # Thin wrapper around seaborn plots + + def heatmap(self, *, untied=False, **kwargs): + """Plot the rankings as a color-encoded matrix. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + kwargs: + Other keyword arguments are passed to the ``seaborn.heatmap()`` + function. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + """ + df = self._ranks_cmp.to_dataframe(untied=untied) + kwargs.setdefault("annot", True) + kwargs.setdefault("cbar_kws", {"label": RANKS_LABELS[untied]}) + return sns.heatmap(data=df, **kwargs) + + def corr(self, *, untied=False, corr_kws=None, **kwargs): + """Plot the pairwise correlation of rankings as a color-encoded matrix. + + By default the pearson correlation coefficient is used. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + corr_kws: dict or None + Dict with keywords arguments passed the + ``pandas.DataFrame.corr()`` method. + kwargs: + Other keyword arguments are passed to the ``seaborn.heatmap()`` + function. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + """ + corr_kws = {} if corr_kws is None else corr_kws + corr = self._ranks_cmp.corr(untied=untied, **corr_kws) + + kwargs.setdefault("annot", True) + kwargs.setdefault("cbar_kws", {"label": "Correlation"}) + return sns.heatmap(data=corr, **kwargs) + + def cov(self, *, untied=False, cov_kws=None, **kwargs): + """Plot the pairwise covariance of rankings as a color-encoded matrix. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + cov_kws: dict or None + Dict with keywords arguments passed the + ``pandas.DataFrame.cov()`` method. + kwargs: + Other keyword arguments are passed to the ``seaborn.heatmap()`` + function. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + """ + cov_kws = {} if cov_kws is None else cov_kws + cov = self._ranks_cmp.cov(untied=untied, **cov_kws) + + kwargs.setdefault("annot", True) + kwargs.setdefault("cbar_kws", {"label": "Covariance"}) + return sns.heatmap(data=cov, **kwargs) + + def r2_score(self, untied=False, r2_kws=None, **kwargs): + """Plot the pairwise coefficient of determination regression score \ + function of rankings as a color-encoded matrix. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + cov_kws: dict or None + Dict with keywords arguments passed the + ``pandas.DataFrame.cov()`` method. + kwargs: + Other keyword arguments are passed to the ``seaborn.heatmap()`` + function. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + """ + r2_kws = {} if r2_kws is None else r2_kws + r2 = self._ranks_cmp.r2_score(untied=untied, **r2_kws) + + kwargs.setdefault("annot", True) + kwargs.setdefault("cbar_kws", {"label": "$R^2$"}) + return sns.heatmap(data=r2, **kwargs) + + def distance( + self, *, untied=False, metric="hamming", distance_kws=None, **kwargs + ): + """Plot the pairwise distance between rankings as a color-encoded \ + matrix. + + By default the 'hamming' distance is used, which is simply the + proportion of disagreeing components in Two rankings. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + metric: str or function, default ``"hamming"`` + The distance metric to use. The distance function can + be 'braycurtis', 'canberra', 'chebyshev', 'cityblock', + 'correlation', 'cosine', 'dice', 'euclidean', 'hamming', + 'jaccard', 'jensenshannon', 'kulczynski1', + 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', + 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', + 'sqeuclidean', 'yule'. + distance_kws: dict or None + Dict with keywords arguments passed the + ``scipy.spatial.distance.pdist`` function + kwargs: + Other keyword arguments are passed to the ``seaborn.heatmap()`` + function. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + """ + distance_kws = {} if distance_kws is None else distance_kws + dis = self._ranks_cmp.distance( + untied=untied, metric=metric, **distance_kws + ) + + kwargs.setdefault("annot", True) + kwargs.setdefault( + "cbar_kws", {"label": f"{metric} distance".capitalize()} + ) + return sns.heatmap(data=dis, **kwargs) + + def box(self, *, untied=False, **kwargs): + """Draw a boxplot to show rankings with respect to alternatives. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + kwargs: + Other keyword arguments are passed to the ``seaborn.boxplot()`` + function. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + """ + df = self._ranks_cmp.to_dataframe(untied=untied) + ax = sns.boxplot(data=df.T, **kwargs) + + ranks_label = RANKS_LABELS[untied] + if kwargs.get("orient") in (None, "v"): + ax.set_ylabel(ranks_label) + else: + ax.set_xlabel(ranks_label) + + return ax + + # DATAFRAME BASED ======================================================== + # Thin wrapper around pandas.DataFrame.plot + + def bar(self, *, untied=False, **kwargs): + """Draw plot that presents ranking of alternatives by method with \ + vertical bars. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + kwargs: + Other keyword arguments are passed to the + ``pandas.Dataframe.plot.bar()`` method. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + """ + df = self._ranks_cmp.to_dataframe(untied=untied) + kwargs["ax"] = kwargs.get("ax") or plt.gca() + ax = df.plot.bar(**kwargs) + ax.set_ylabel(RANKS_LABELS[untied]) + return ax + + def barh(self, *, untied=False, **kwargs): + """Draw plot that presents ranking of alternatives by method with \ + horizontal bars. + + Parameters + ---------- + untied: bool, default ``False`` + If it is ``True`` and any ranking has ties, the + ``RankResult.untied_rank_`` property is used to assign each + alternative a single ranked order. On the other hand, if it is + ``False`` the rankings are used as they are. + kwargs: + Other keyword arguments are passed to the + ``pandas.Dataframe.plot.barh()`` method. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + """ + df = self._ranks_cmp.to_dataframe(untied=untied) + kwargs["ax"] = kwargs.get("ax") or plt.gca() + ax = df.plot.barh(**kwargs) + ax.set_xlabel(RANKS_LABELS[untied]) + return ax + + +# ============================================================================= +# FACTORY +# ============================================================================= + + +def mkrank_cmp(*ranks): + """Construct a RankComparator from the given rankings. + + This is a shorthand for the RankComparator constructor; it does not + require, and does not permit, naming the estimators. Instead, their names + will be set to the method attribute of the rankings automatically. + + Parameters + ---------- + *ranks: list of RankResult objects + List of the scikit-criteria RankResult objcects. + + Returns + ------- + rcmp : RanksComparator + Returns a scikit-criteria :class:`RanksComparator` object. + + """ + names = [r.method for r in ranks] + named_ranks = unique_names(names=names, elements=ranks) + return RanksComparator(named_ranks) diff --git a/skcriteria/core/__init__.py b/skcriteria/core/__init__.py index 0716b0c..fe520b2 100644 --- a/skcriteria/core/__init__.py +++ b/skcriteria/core/__init__.py @@ -15,16 +15,9 @@ # IMPORTS # ============================================================================= -from .data import ( - DecisionMatrix, - Objective, - mkdm, -) -from .methods import ( - SKCMatrixAndWeightTransformerABC, - SKCMethodABC, - SKCTransformerABC, -) +from .data import DecisionMatrix, mkdm +from .methods import SKCMethodABC +from .objectives import Objective from .plot import DecisionMatrixPlotter # ============================================================================= @@ -36,7 +29,5 @@ "DecisionMatrix", "DecisionMatrixPlotter", "Objective", - "SKCMatrixAndWeightTransformerABC", "SKCMethodABC", - "SKCTransformerABC", ] diff --git a/skcriteria/core/data.py b/skcriteria/core/data.py index adec9fd..cc4bc5e 100644 --- a/skcriteria/core/data.py +++ b/skcriteria/core/data.py @@ -12,7 +12,7 @@ """Data abstraction layer. This module defines the DecisionMatrix object, which internally encompasses -the alternative matrix, weights and objectives (MIN, MAX) of the criteria. +the alternative matrix, weights and objectives (MIN, MAX) of the criteria. """ @@ -21,7 +21,6 @@ # ============================================================================= -import enum import functools from collections import abc @@ -30,95 +29,15 @@ import pandas as pd from pandas.io.formats import format as pd_fmt -import pyquery as pq - - from .dominance import DecisionMatrixDominanceAccessor +from .objectives import Objective from .plot import DecisionMatrixPlotter from .stats import DecisionMatrixStatsAccessor -from ..utils import deprecated, doc_inherit +from ..utils import deprecated, df_temporal_header, doc_inherit # ============================================================================= -# CONSTANTS -# ============================================================================= -class Objective(enum.Enum): - """Representation of criteria objectives (Minimize, Maximize).""" - - #: Internal representation of minimize criteria - MIN = -1 - - #: Internal representation of maximize criteria - MAX = 1 - - # INTERNALS =============================================================== - - _MIN_STR = "\u25bc" - _MAX_STR = "\u25b2" - - #: Another way to name the maximization criteria. - _MAX_ALIASES = frozenset( - [ - MAX, - _MAX_STR, - max, - np.max, - np.nanmax, - np.amax, - "max", - "maximize", - "+", - ">", - ] - ) - - #: Another ways to name the minimization criteria. - _MIN_ALIASES = frozenset( - [ - MIN, - _MIN_STR, - min, - np.min, - np.nanmin, - np.amin, - "min", - "minimize", - "<", - "-", - ] - ) - - # CUSTOM CONSTRUCTOR ====================================================== - - @classmethod - def construct_from_alias(cls, alias): - """Return the alias internal representation of the objective.""" - if isinstance(alias, cls): - return alias - if isinstance(alias, str): - alias = alias.lower() - if alias in cls._MAX_ALIASES.value: - return cls.MAX - if alias in cls._MIN_ALIASES.value: - return cls.MIN - raise ValueError(f"Invalid criteria objective {alias}") - - # METHODS ================================================================= - - def __str__(self): - """Convert the objective to an string.""" - return self.name - - def to_string(self): - """Return the printable representation of the objective.""" - if self.value in Objective._MIN_ALIASES.value: - return Objective._MIN_STR.value - if self.value in Objective._MAX_ALIASES.value: - return Objective._MAX_STR.value - - -# ============================================================================= -# _SLICER ARRAY +# SLICERS ARRAY # ============================================================================= class _ACArray(np.ndarray, abc.Mapping): """Immutable Array to provide access to the alternative and criteria \ @@ -163,9 +82,50 @@ def values(self): return (self[e] for e in self) +class _Loc: + """Locator abstraction. + + this class ensures that the correct objectives and weights are applied to + the sliced ``DecisionMatrix``. + + """ + + def __init__(self, name, real_loc, objectives, weights): + self._name = name + self._real_loc = real_loc + self._objectives = objectives + self._weights = weights + + @property + def name(self): + """The name of the locator.""" + return self._name + + def __getitem__(self, slc): + """dm[slc] <==> dm.__getitem__(slc).""" + df = self._real_loc.__getitem__(slc) + if isinstance(df, pd.Series): + df = df.to_frame().T + + dtypes = self._real_loc.obj.dtypes + dtypes = dtypes[dtypes.index.isin(df.columns)] + + df = df.astype(dtypes) + + objectives = self._objectives + objectives = objectives[objectives.index.isin(df.columns)].to_numpy() + + weights = self._weights + weights = weights[weights.index.isin(df.columns)].to_numpy() + + return DecisionMatrix(df, objectives, weights) + + # ============================================================================= # DECISION MATRIX # ============================================================================= + + class DecisionMatrix: """Representation of all data needed in the MCDA analysis. @@ -242,9 +202,9 @@ class DecisionMatrix: def __init__(self, data_df, objectives, weights): self._data_df = ( - data_df.copy() + data_df.copy(deep=True) if isinstance(data_df, pd.DataFrame) - else pd.DataFrame(data_df) + else pd.DataFrame(data_df, copy=True) ) self._objectives = np.asarray(objectives, dtype=object) @@ -378,15 +338,25 @@ def from_mcda_data( @property def alternatives(self): - """Names of the alternatives.""" - arr = self._data_df.index.to_numpy() + """Names of the alternatives. + + From this array you can also access the values of the alternatives as + ``pandas.Series``. + + """ + arr = self._data_df.index.to_numpy(copy=True) slicer = self._data_df.loc.__getitem__ return _ACArray(arr, slicer) @property def criteria(self): - """Names of the criteria.""" - arr = self._data_df.columns.to_numpy() + """Names of the criteria. + + From this array you can also access the values of the criteria as + ``pandas.Series``. + + """ + arr = self._data_df.columns.to_numpy(copy=True) slicer = self._data_df.__getitem__ return _ACArray(arr, slicer) @@ -396,17 +366,19 @@ def weights(self): return pd.Series( self._weights, dtype=float, - index=self._data_df.columns, + index=self._data_df.columns.copy(deep=True), name="Weights", + copy=True, ) @property def objectives(self): """Objectives of the criteria as ``Objective`` instances.""" return pd.Series( - [Objective.construct_from_alias(a) for a in self._objectives], + [Objective.from_alias(a) for a in self._objectives], index=self._data_df.columns, name="Objectives", + copy=True, ) @property @@ -436,7 +408,8 @@ def iobjectives(self): return pd.Series( [o.value for o in self.objectives], dtype=np.int8, - index=self._data_df.columns, + index=self._data_df.columns.copy(deep=True), + copy=True, ) @property @@ -445,16 +418,23 @@ def matrix(self): The matrix excludes weights and objectives. - If you want to create a DataFrame with objetvies and weights, use + If you want to create a DataFrame with objectives and weights, use ``DecisionMatrix.to_dataframe()`` """ - return self._data_df.copy() + mtx = self._data_df.copy(deep=True) + mtx.index = self._data_df.index.copy(deep=True) + mtx.index.name = "Alternatives" + mtx.columns = self._data_df.columns.copy(deep=True) + mtx.columns.name = "Criteria" + return mtx @property def dtypes(self): """Dtypes of the criteria.""" - return self._data_df.dtypes.copy() + series = self._data_df.dtypes.copy(deep=True) + series.index = self._data_df.dtypes.index.copy(deep=True) + return series # ACCESSORS (YES, WE USE CACHED PROPERTIES IS THE EASIEST WAY) ============ @@ -553,9 +533,9 @@ def to_dict(self): @deprecated( reason=( - "Use 'DecisionMatrix.stats()', " - "'DecisionMatrix.stats(\"describe\")' or " - "'DecisionMatrix.stats.describe()' instead." + "Use ``DecisionMatrix.stats()``, " + "``DecisionMatrix.stats('describe)`` or " + "``DecisionMatrix.stats.describe()`` instead." ), version=0.6, ) @@ -600,7 +580,7 @@ def __len__(self): def equals(self, other): """Return True if the decision matrix are equal. - This method calls `DecisionMatrix.aquals` whitout tolerance. + This method calls `DecisionMatrix.aquals` without tolerance. Parameters ---------- @@ -690,15 +670,80 @@ def aequals(self, other, rtol=1e-05, atol=1e-08, equal_nan=False): ) ) - # repr ==================================================================== - def _get_cow_headers(self): + # SLICES ================================================================== + + def __getitem__(self, slc): + """dm[slc] <==> dm.__getitem__(slc).""" + df = self._data_df.__getitem__(slc) + if isinstance(df, pd.Series): + df = df.to_frame() + + dtypes = self._data_df.dtypes + dtypes = dtypes[dtypes.index.isin(df.columns)] + + df = df.astype(dtypes) + + objectives = self.objectives + objectives = objectives[objectives.index.isin(df.columns)].to_numpy() + + weights = self.weights + weights = weights[weights.index.isin(df.columns)].to_numpy() + + return DecisionMatrix(df, objectives, weights) + + @property + def loc(self): + """Access a group of alternatives and criteria by label(s) or a \ + boolean array. + + ``.loc[]`` is primarily alternative label based, but may also be used + with a boolean array. + + Unlike DataFrames, `ìloc`` of ``DecisionMatrix`` always returns an + instance of ``DecisionMatrix``. + + """ + return _Loc("loc", self._data_df.loc, self.objectives, self.weights) + + @property + def iloc(self): + """Purely integer-location based indexing for selection by position. + + ``.iloc[]`` is primarily integer position based (from ``0`` to + ``length-1`` of the axis), but may also be used with a boolean + array. + + Unlike DataFrames, `ìloc`` of ``DecisionMatrix`` always returns an + instance of ``DecisionMatrix``. + + """ + return _Loc("iloc", self._data_df.iloc, self.objectives, self.weights) + + # REPR ==================================================================== + + def _get_cow_headers( + self, only=None, fmt="{criteria}[{objective}{weight}]" + ): """Columns names with COW (Criteria, Objective, Weight).""" + criteria = self._data_df.columns + objectives = self.objectives + weights = self.weights + + if only: + mask = self._data_df.columns.isin(only) + criteria = criteria[mask] + objectives = objectives[mask] + weights = weights[mask] + + weights = pd_fmt.format_array(weights, None) + headers = [] - fmt_weights = pd_fmt.format_array(self.weights, None) - for c, o, w in zip(self.criteria, self.objectives, fmt_weights): - header = f"{c}[{o.to_string()}{w}]" + for crit, obj, weight in zip(criteria, objectives, weights): + header = fmt.format( + criteria=crit, objective=obj.to_symbol(), weight=weight + ) headers.append(header) - return headers + return np.array(headers) def _get_axc_dimensions(self): """Dimension foote with AxC (Alternativs x Criteria).""" @@ -711,26 +756,9 @@ def __repr__(self): header = self._get_cow_headers() dimensions = self._get_axc_dimensions() - max_rows = pd.get_option("display.max_rows") - min_rows = pd.get_option("display.min_rows") - max_cols = pd.get_option("display.max_columns") - max_colwidth = pd.get_option("display.max_colwidth") - - width = ( - pd.io.formats.console.get_console_size()[0] - if pd.get_option("display.expand_frame_repr") - else None - ) - - original_string = self._data_df.to_string( - max_rows=max_rows, - min_rows=min_rows, - max_cols=max_cols, - line_width=width, - max_colwidth=max_colwidth, - show_dimensions=False, - header=header, - ) + with df_temporal_header(self._data_df, header) as df: + with pd.option_context("display.show_dimensions", False): + original_string = repr(df) # add dimension string = f"{original_string}\n[{dimensions}]" @@ -742,12 +770,13 @@ def _repr_html_(self): Mainly for IPython notebook. """ - header = dict(zip(self.criteria, self._get_cow_headers())) + header = self._get_cow_headers() dimensions = self._get_axc_dimensions() # retrieve the original string - with pd.option_context("display.show_dimensions", False): - original_html = self._data_df._repr_html_() + with df_temporal_header(self._data_df, header) as df: + with pd.option_context("display.show_dimensions", False): + original_html = df._repr_html_() # add dimension html = ( @@ -757,13 +786,7 @@ def _repr_html_(self): "" ) - # now we need to change the table header - d = pq.PyQuery(html) - for th in d("div.decisionmatrix table.dataframe > thead > tr > th"): - crit = th.text - th.text = header.get(crit, crit) - - return str(d) + return html # ============================================================================= diff --git a/skcriteria/core/dominance.py b/skcriteria/core/dominance.py index 6c584da..28629c7 100644 --- a/skcriteria/core/dominance.py +++ b/skcriteria/core/dominance.py @@ -81,7 +81,7 @@ def _cache_read(self, a0, a1): # FRAME ALT VS ALT ======================================================== - def _create_frame(self, compute_cell): + def _create_frame(self, compute_cell, iname, cname): """Create a data frame comparing two alternatives. The value of each cell is calculated with the "compute_cell" @@ -95,7 +95,13 @@ def _create_frame(self, compute_cell): for a1 in alternatives: row[a1] = compute_cell(a0, a1) rows.append(row) - return pd.DataFrame(rows, index=alternatives) + + df = pd.DataFrame(rows, index=alternatives) + + df.index.name = iname + df.columns.name = cname + + return df def bt(self): """Compare on how many criteria one alternative is better than another. @@ -116,7 +122,9 @@ def compute_cell(a0, a1): centry, ckreverted = self._cache_read(a0, a1) return centry.aDb if not ckreverted else centry.bDa - return self._create_frame(compute_cell) + return self._create_frame( + compute_cell, iname="Better than", cname="Worse than" + ) def eq(self): """Compare on how many criteria two alternatives are equal. @@ -136,7 +144,9 @@ def compute_cell(a0, a1): centry, _ = self._cache_read(a0, a1) return centry.eq - return self._create_frame(compute_cell) + return self._create_frame( + compute_cell, iname="Equals to", cname="Equals to" + ) def dominance(self, *, strict=False): """Compare if one alternative dominates or strictly dominates another \ @@ -176,7 +186,15 @@ def compute_cell(a0, a1): return performance_a0 > 0 and performance_a1 == 0 - return self._create_frame(compute_cell) + iname, cname = ( + ("Strict dominators", "Strictly dominated") + if strict + else ("Dominators", "Dominated") + ) + + dom = self._create_frame(compute_cell, iname=iname, cname=cname) + + return dom # COMPARISONS ============================================================= @@ -238,7 +256,7 @@ def compare(self, a0, a1): return df - # The dominated============================================================ + # The dominated =========================================================== def dominated(self, *, strict=False): """Which alternative is dominated or strictly dominated by at least \ @@ -257,7 +275,10 @@ def dominated(self, *, strict=False): by at least one other alternative. """ - return self.dominance(strict=strict).any() + dom = self.dominance(strict=strict).any() + dom.name = dom.index.name + dom.index.name = "Alternatives" + return dom @functools.lru_cache(maxsize=None) def dominators_of(self, a, *, strict=False): diff --git a/skcriteria/core/methods.py b/skcriteria/core/methods.py index 7974544..639d5bf 100644 --- a/skcriteria/core/methods.py +++ b/skcriteria/core/methods.py @@ -19,8 +19,6 @@ import copy import inspect -from .data import DecisionMatrix -from ..utils import doc_inherit # ============================================================================= # BASE DECISION MAKER CLASS @@ -88,7 +86,7 @@ def __repr__(self): parameters.append(f"{pname}={repr(pvalue)}") str_parameters = ", ".join(parameters) - return f"{cls_name}({str_parameters})" + return f"<{cls_name} [{str_parameters}]>" def get_parameters(self): """Return the parameters of the method as dictionary.""" @@ -116,149 +114,8 @@ def copy(self, **kwargs): """ asdict = self.get_parameters() + asdict.update(kwargs) cls = type(self) return cls(**asdict) - - -# ============================================================================= -# SKCTransformer ABC -# ============================================================================= - - -class SKCTransformerABC(SKCMethodABC): - """Abstract class for all transformer in scikit-criteria.""" - - _skcriteria_dm_type = "transformer" - _skcriteria_abstract_class = True - - @abc.abstractmethod - def _transform_data(self, **kwargs): - """Apply the transformation logic to the decision matrix parameters. - - Parameters - ---------- - kwargs: - The decision matrix as separated parameters. - - Returns - ------- - :py:class:`dict` - A dictionary with all the values of the decision matrix - transformed. - - """ - raise NotImplementedError() - - def transform(self, dm): - """Perform transformation on `dm`. - - Parameters - ---------- - dm: :py:class:`skcriteria.data.DecisionMatrix` - The decision matrix to transform. - - Returns - ------- - :py:class:`skcriteria.data.DecisionMatrix` - Transformed decision matrix. - - """ - data = dm.to_dict() - - transformed_data = self._transform_data(**data) - - transformed_dm = DecisionMatrix.from_mcda_data(**transformed_data) - - return transformed_dm - - -class SKCMatrixAndWeightTransformerABC(SKCTransformerABC): - """Transform weights and matrix together or independently. - - The Transformer that implements this abstract class can be configured to - transform - `weights`, `matrix` or `both` so only that part of the DecisionMatrix - is altered. - - This abstract class require to redefine ``_transform_weights`` and - ``_transform_matrix``, instead of ``_transform_data``. - - """ - - _skcriteria_abstract_class = True - _skcriteria_parameters = ["target"] - - _TARGET_WEIGHTS = "weights" - _TARGET_MATRIX = "matrix" - _TARGET_BOTH = "both" - - def __init__(self, target): - if target not in ( - self._TARGET_MATRIX, - self._TARGET_WEIGHTS, - self._TARGET_BOTH, - ): - raise ValueError( - f"'target' can only be '{self._TARGET_WEIGHTS}', " - f"'{self._TARGET_MATRIX}' or '{self._TARGET_BOTH}', " - f"found '{target}'" - ) - self._target = target - - @property - def target(self): - """Determine which part of the DecisionMatrix will be transformed.""" - return self._target - - @abc.abstractmethod - def _transform_weights(self, weights): - """Execute the transform method over the weights. - - Parameters - ---------- - weights: :py:class:`numpy.ndarray` - The weights to transform. - - Returns - ------- - :py:class:`numpy.ndarray` - The transformed weights. - - """ - raise NotImplementedError() - - @abc.abstractmethod - def _transform_matrix(self, matrix): - """Execute the transform method over the matrix. - - Parameters - ---------- - matrix: :py:class:`numpy.ndarray` - The decision matrix to transform - - Returns - ------- - :py:class:`numpy.ndarray` - The transformed matrix. - - """ - raise NotImplementedError() - - @doc_inherit(SKCTransformerABC._transform_data) - def _transform_data(self, matrix, weights, **kwargs): - transformed_mtx = matrix - transformed_weights = weights - - if self._target in (self._TARGET_MATRIX, self._TARGET_BOTH): - transformed_mtx = self._transform_matrix(matrix) - - if self._target in (self._TARGET_WEIGHTS, self._TARGET_BOTH): - transformed_weights = self._transform_weights(weights) - - kwargs.update( - matrix=transformed_mtx, weights=transformed_weights, dtypes=None - ) - - return kwargs diff --git a/skcriteria/core/objectives.py b/skcriteria/core/objectives.py new file mode 100644 index 0000000..f07fe56 --- /dev/null +++ b/skcriteria/core/objectives.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised)) +# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia +# Copyright (c) 2022, QuatroPe +# All rights reserved. + +# ============================================================================= +# DOCS +# ============================================================================= + +"""Definition of the objectives (MIN, MAX) for the criteria.""" + + +# ============================================================================= +# IMPORTS +# ============================================================================= + +import enum + +import numpy as np + +from ..utils import deprecated + + +# ============================================================================= +# CONSTANTS +# ============================================================================= + + +class Objective(enum.Enum): + """Representation of criteria objectives (Minimize, Maximize).""" + + #: Internal representation of minimize criteria + MIN = -1 + + #: Internal representation of maximize criteria + MAX = 1 + + # INTERNALS =============================================================== + + _MIN_STR = "\u25bc" # ▼ + _MAX_STR = "\u25b2" # ▲ + + #: Another way to name the maximization criteria. + _MAX_ALIASES = frozenset( + [ + MAX, + _MAX_STR, + max, + np.max, + np.nanmax, + np.amax, + "max", + "maximize", + "+", + ">", + ] + ) + + #: Another ways to name the minimization criteria. + _MIN_ALIASES = frozenset( + [ + MIN, + _MIN_STR, + min, + np.min, + np.nanmin, + np.amin, + "min", + "minimize", + "-", + "<", + ] + ) + + # CUSTOM CONSTRUCTOR ====================================================== + + @classmethod + def from_alias(cls, alias): + """Return a n objective instase based on some given alias.""" + if isinstance(alias, cls): + return alias + if isinstance(alias, str): + alias = alias.lower() + if alias in cls._MAX_ALIASES.value: + return cls.MAX + if alias in cls._MIN_ALIASES.value: + return cls.MIN + raise ValueError(f"Invalid criteria objective {alias}") + + # METHODS ================================================================= + + def __str__(self): + """Convert the objective to an string.""" + return self.name + + def to_symbol(self): + """Return the printable symbol representation of the objective.""" + if self.value in Objective._MIN_ALIASES.value: + return Objective._MIN_STR.value + if self.value in Objective._MAX_ALIASES.value: + return Objective._MAX_STR.value + + # DEPRECATED ============================================================== + + @classmethod + @deprecated(reason="Use ``Objective.from_alias()`` instead.", version=0.8) + def construct_from_alias(cls, alias): + """Return an objective instance based on some given alias.""" + return cls.from_alias(alias) + + @deprecated(reason="Use ``MAX/MIN.to_symbol()`` instead.", version=0.8) + def to_string(self): + """Return the printable representation of the objective.""" + return self.to_symbol() diff --git a/skcriteria/core/plot.py b/skcriteria/core/plot.py index b207b6d..1d74ec4 100644 --- a/skcriteria/core/plot.py +++ b/skcriteria/core/plot.py @@ -15,18 +15,21 @@ # IMPORTS # ============================================================================= -import matplotlib.pyplot as plt +import pandas as pd import seaborn as sns +from .objectives import Objective from ..utils import AccessorABC # ============================================================================= # PLOTTER OBJECT # ============================================================================= + + class DecisionMatrixPlotter(AccessorABC): - """Make plots of DecisionMatrix. + """DecisionMatrix plot utilities. Kind of plot to produce: @@ -45,6 +48,8 @@ class DecisionMatrixPlotter(AccessorABC): - 'ogive' : criteria empirical cumulative distribution plot. - 'wogive' : weights empirical cumulative distribution plot. - 'area' : criteria area plot. + - 'dominance': the dominance matrix as a heatmap. + - 'frontier': criteria pair-wise Pareto-Frontier. """ @@ -56,31 +61,28 @@ def __init__(self, dm): # PRIVATE ================================================================= # This method are used "a lot" inside all the different plots, so we can # save some lines of code + def _get_criteria_labels(self, **kwargs): + kwargs.setdefault("fmt", "{criteria} {objective}") + labels = self._dm._get_cow_headers(**kwargs) + return pd.Series(labels, name="Criteria") @property def _ddf(self): # proxy to access the dataframe with the data - return self._dm.matrix + ddf = self._dm.matrix + ddf.columns = self._get_criteria_labels() + return ddf @property def _wdf(self): # proxy to access the dataframe with the weights - return self._dm.weights.to_frame() - - @property - def _criteria_labels(self): - # list with all the criteria + objectives - dm = self._dm - labels = [ - f"{c} {o.to_string()}" for c, o in zip(dm.criteria, dm.objectives) - ] - return labels + wdf = self._dm.weights.to_frame() + wdf.index = self._get_criteria_labels() + return wdf # HEATMAP ================================================================= def _heatmap(self, df, **kwargs): - kwargs.setdefault("annot", True) - kwargs.setdefault("cmap", plt.cm.get_cmap()) ax = sns.heatmap(df, **kwargs) return ax @@ -89,7 +91,7 @@ def heatmap(self, **kwargs): Parameters ---------- - **kwargs + **kwargs: Additional keyword arguments are passed and are documented in ``seaborn.heatmap``. @@ -98,10 +100,8 @@ def heatmap(self, **kwargs): matplotlib.axes.Axes or numpy.ndarray of them """ + kwargs.setdefault("annot", True) ax = self._heatmap(self._ddf, **kwargs) - ax.set_xticklabels(self._criteria_labels) - ax.set_ylabel("Alternatives") - ax.set_xlabel("Criteria") return ax def wheatmap(self, **kwargs): @@ -109,7 +109,7 @@ def wheatmap(self, **kwargs): Parameters ---------- - **kwargs + **kwargs: Additional keyword arguments are passed and are documented in ``seaborn.heatmap``. @@ -118,13 +118,11 @@ def wheatmap(self, **kwargs): matplotlib.axes.Axes or numpy.ndarray of them """ + kwargs.setdefault("annot", True) ax = self._heatmap(self._wdf.T, **kwargs) - ax.set_xticklabels(self._criteria_labels) - ax.set_xlabel("Criteria") - if "ax" not in kwargs: # if the ax is provided by the user we assume that the figure - # is already setted to the expected size. If it's not we resize the + # is already with the expected size. If it's not, we resize the # height to 1/5 of the original size. fig = ax.get_figure() size = fig.get_size_inches() / [1, 5] @@ -145,7 +143,7 @@ def bar(self, **kwargs): Parameters ---------- - **kwargs + **kwargs: Additional keyword arguments are passed and are documented in ``DataFrame.plot.bar``. @@ -155,9 +153,6 @@ def bar(self, **kwargs): """ ax = self._ddf.plot.bar(**kwargs) - ax.set_xlabel("Alternatives") - if kwargs.get("legend", True): - ax.legend(self._criteria_labels) return ax def wbar(self, **kwargs): @@ -171,7 +166,7 @@ def wbar(self, **kwargs): Parameters ---------- - **kwargs + **kwargs: Additional keyword arguments are passed and are documented in ``DataFrame.plot.bar``. @@ -181,8 +176,6 @@ def wbar(self, **kwargs): """ ax = self._wdf.T.plot.bar(**kwargs) - if kwargs.get("legend", True): - ax.legend(self._criteria_labels) return ax # BARH ==================================================================== @@ -198,7 +191,7 @@ def barh(self, **kwargs): Parameters ---------- - **kwargs + **kwargs: Additional keyword arguments are passed and are documented in ``DataFrame.plot.barh``. @@ -208,9 +201,6 @@ def barh(self, **kwargs): """ ax = self._ddf.plot.barh(**kwargs) - ax.set_ylabel("Alternatives") - if kwargs.get("legend", True): - ax.legend(self._criteria_labels) return ax def wbarh(self, **kwargs): @@ -224,7 +214,7 @@ def wbarh(self, **kwargs): Parameters ---------- - **kwargs + **kwargs: Additional keyword arguments are passed and are documented in ``DataFrame.plot.barh``. @@ -234,8 +224,6 @@ def wbarh(self, **kwargs): """ ax = self._wdf.T.plot.barh(**kwargs) - if kwargs.get("legend", True): - ax.legend(self._criteria_labels) return ax # HIST ==================================================================== @@ -249,7 +237,7 @@ def hist(self, **kwargs): Parameters ---------- - **kwargs + **kwargs: Additional keyword arguments are passed and are documented in ``seaborn.histplot``. @@ -259,8 +247,6 @@ def hist(self, **kwargs): """ ax = sns.histplot(self._ddf, **kwargs) - if kwargs.get("legend", True): - ax.legend(self._criteria_labels) return ax def whist(self, **kwargs): @@ -272,7 +258,7 @@ def whist(self, **kwargs): Parameters ---------- - **kwargs + **kwargs: Additional keyword arguments are passed and are documented in ``seaborn.histplot``. @@ -282,8 +268,6 @@ def whist(self, **kwargs): """ ax = sns.histplot(self._wdf.T, **kwargs) - if kwargs.get("legend", True): - ax.legend(self._criteria_labels) return ax # BOX ===================================================================== @@ -299,7 +283,7 @@ def box(self, **kwargs): Parameters ---------- - **kwargs + **kwargs: Additional keyword arguments are passed and are documented in ``seaborn.boxplot``. @@ -308,17 +292,7 @@ def box(self, **kwargs): matplotlib.axes.Axes or numpy.ndarray of them """ - orient = kwargs.setdefault("orient", "v") - ax = sns.boxplot(data=self._ddf, **kwargs) - - if orient == "v": - ax.set_xticklabels(self._criteria_labels) - ax.set_xlabel("Criteria") - elif orient == "h": - ax.set_yticklabels(self._criteria_labels) - ax.set_ylabel("Criteria") - return ax def wbox(self, **kwargs): @@ -332,7 +306,7 @@ def wbox(self, **kwargs): Parameters ---------- - **kwargs + **kwargs: Additional keyword arguments are passed and are documented in ``seaborn.boxplot``. @@ -359,7 +333,7 @@ def kde(self, **kwargs): Parameters ---------- - **kwargs + **kwargs: Additional keyword arguments are passed and are documented in ``seaborn.kdeplot``. @@ -369,8 +343,6 @@ def kde(self, **kwargs): """ ax = sns.kdeplot(data=self._ddf, **kwargs) - if kwargs.get("legend", True): - ax.legend(self._criteria_labels) return ax def wkde(self, **kwargs): @@ -386,7 +358,7 @@ def wkde(self, **kwargs): Parameters ---------- - **kwargs + **kwargs: Additional keyword arguments are passed and are documented in ``seaborn.kdeplot``. @@ -416,7 +388,7 @@ def ogive(self, **kwargs): Parameters ---------- - **kwargs + **kwargs: Additional keyword arguments are passed and are documented in ``seaborn.ecdfplot``. @@ -426,8 +398,6 @@ def ogive(self, **kwargs): """ ax = sns.ecdfplot(data=self._ddf, **kwargs) - if kwargs.get("legend", True): - ax.legend(self._criteria_labels) return ax def wogive(self, **kwargs): @@ -446,7 +416,7 @@ def wogive(self, **kwargs): Parameters ---------- - **kwargs + **kwargs: Additional keyword arguments are passed and are documented in ``seaborn.ecdfplot``. @@ -468,7 +438,7 @@ def area(self, **kwargs): Parameters ---------- - **kwargs + **kwargs: Additional keyword arguments are passed and are documented in :meth:`DataFrame.plot.area`. @@ -479,7 +449,179 @@ def area(self, **kwargs): """ ax = self._ddf.plot.area(**kwargs) - ax.set_xlabel("Alternatives") - if kwargs.get("legend", True): - ax.legend(self._criteria_labels) + return ax + + # DOMINANCE =============================================================== + + def dominance(self, *, strict=False, **kwargs): + """Plot dominance as a color-encoded matrix. + + In order to evaluate the dominance of an alternative *a0* over an + alternative *a1*, the algorithm evaluates that *a0* is better in at + least one criterion and that *a1* is not better in any criterion than + *a0*. In the case that ``strict = True`` it also evaluates that there + are no equal criteria. + + Parameters + ---------- + strict: bool, default ``False`` + If True, strict dominance is evaluated. + **kwargs: + Additional keyword arguments are passed and are documented in + ``seaborn.heatmap``. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + """ + dm = self._dm + import numpy as np + + dom = dm.dominance.dominance(strict=strict) + bt = dm.dominance.bt().to_numpy().astype(str) + eq = dm.dominance.eq().to_numpy().astype(str) + + annot = kwargs.pop("annot", True) + if annot: + annot = "" + for elem in [r"$\succ", bt, "$/$=", eq, "$"]: + annot = np.char.add(annot, elem) + + kwargs.setdefault("cbar", False) + kwargs.setdefault("fmt", "") + ax = self._heatmap(dom, annot=annot, **kwargs) + + return ax + + def frontier( + self, + x, + y, + *, + strict=False, + ax=None, + legend=True, + scatter_kws=None, + line_kws=None, + ): + """Pareto frontier on two arbitrarily selected criteria. + + A selection of an alternative of an $A_o$ is a pareto-optimal solution + when there is no other solution that selects an alternative that does + not belong to $A_o$ such that it improves on one objective without + worsening at least one of the others. + + From this point of view, the concept is used to analyze the possible + optimal options of a solution given a variety of objectives or desires + and one or more evaluation criteria. + + Given a "universe" of alternatives, one seeks to determine the set that + are Pareto efficient (i.e., those alternatives that satisfy the + condition of not being able to better satisfy one of those desires or + objectives without worsening some other). That set of optimal + alternatives establishes a "Pareto set" or the "Pareto Frontier". + + The study of the solutions in the frontier allows designers to analyze + the possible alternatives within the established parameters, without + having to analyze the totality of possible solutions. + + Parameters + ---------- + x, y : str + Criteria names. + Variables that specify positions on the x and y axes. + weighted: bool, default ``False`` + If its True the domination analysis is performed over the weighted + matrix. + strict: bool, default ``False`` + If True, strict dominance is evaluated. + weighted: bool, default ``False`` + If True, the weighted matrix is evaluated. + ax : :class:`matplotlib.axes.Axes` + Pre-existing axes for the plot. Otherwise, call + ``matplotlib.pyplot.gca`` internally. + legend : bool, default ``True`` + If ``False``, no legend data is added and no legend is drawn. + scatter_kws: dict, default ``None`` + Additional parameters passed to ``seaborn.scatterplot``. + scatter_kws: dict, default ``None`` + Additional parameters passed to ``seaborn.lineplot``, + except for ``estimator`` and ``sort``. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + References + ---------- + :cite:p:`enwiki:1107297090` + :cite:p:`enwiki:1110412520` + + """ + # cut the dmatrix to only the necesary criteria + sdm = self._dm[[x, y]] + + # extract the matrix + df = sdm.matrix + + # draw the scatterplot ================================================ + scatter_kws = {} if scatter_kws is None else scatter_kws + scatter_kws.setdefault("ax", ax) + scatter_kws.setdefault("legend", legend) + ax = sns.scatterplot(x=x, y=y, data=df, hue=df.index, **scatter_kws) + + # draw the frontier =================================================== + # Get the non dominated alternatives. + # This alternatives create the frontier + non_dominated = df[ + ~sdm.dominance.dominated(strict=strict) + ].sort_values([x, y]) + + # if we only have one alternative in the frontier but we have more + # alternatives we draw a limit around all the dominated one. + if len(non_dominated) == 1 and len(sdm.alternatives) > 1: + non_dominated = pd.concat([non_dominated] * 3, ignore_index=True) + + # esto cambia si x o y son a minimizar + obj_x, obj_y = sdm.objectives + + non_dominated.iloc[0, 0] = ( + df[x].min() if obj_x is Objective.MAX else df[x].max() + ) + non_dominated.iloc[2, 1] = ( + df[y].min() if obj_y is Objective.MAX else df[y].max() + ) + + # line style and frontier label + frontier_ls, frontier_lb = ( + ("-", "Strict frontier") if strict else ("--", "Frontier") + ) + + # draw the line plot + line_kws = {} if line_kws is None else line_kws + line_kws.setdefault("alpha", 0.5) + line_kws.setdefault("linestyle", frontier_ls) + line_kws.setdefault("label", frontier_lb) + line_kws.setdefault("legend", legend) + + sns.lineplot( + x=x, + y=y, + data=non_dominated, + estimator=None, + sort=False, + ax=ax, + **line_kws, + ) + + # Set the labels + xlabel, ylabel = self._get_criteria_labels(only=[x, y]) + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + + if legend: + handles, labels = ax.get_legend_handles_labels() + ax.legend(handles, labels, title="Alternatives") + return ax diff --git a/skcriteria/core/stats.py b/skcriteria/core/stats.py index 44ccf11..09defb9 100644 --- a/skcriteria/core/stats.py +++ b/skcriteria/core/stats.py @@ -29,24 +29,24 @@ class DecisionMatrixStatsAccessor(AccessorABC): Kind of statistic to produce: - 'corr' : Compute pairwise correlation of columns, excluding - NA/null values. + NA/null values. - 'cov' : Compute pairwise covariance of columns, excluding NA/null - values. + values. - 'describe' : Generate descriptive statistics. - 'kurtosis' : Return unbiased kurtosis over requested axis. - 'mad' : Return the mean absolute deviation of the values over the - requested axis. + requested axis. - 'max' : Return the maximum of the values over the requested axis. - 'mean' : Return the mean of the values over the requested axis. - 'median' : Return the median of the values over the requested - axis. + axis. - 'min' : Return the minimum of the values over the requested axis. - 'pct_change' : Percentage change between the current and a prior - element. + element. - 'quantile' : Return values at the given quantile over requested - axis. + axis. - 'sem' : Return unbiased standard error of the mean over requested - axis. + axis. - 'skew' : Return unbiased skew over requested axis. - 'std' : Return sample standard deviation over requested axis. - 'var' : Return unbiased variance over requested axis. @@ -59,7 +59,6 @@ class DecisionMatrixStatsAccessor(AccessorABC): "cov", "describe", "kurtosis", - "mad", "max", "mean", "median", @@ -88,3 +87,17 @@ def __dir__(self): return super().__dir__() + [ e for e in dir(self._dm._data_df) if e in self._DF_WHITELIST ] + + def mad(self, axis=0, skipna=True): + """Return the mean absolute deviation of the values over a given axis. + + Parameters + ---------- + axis : int + Axis for the function to be applied on. + skipna : bool, default True + Exclude NA/null values when computing the result. + + """ + df = self._dm._data_df + return (df - df.mean(axis=axis)).abs().mean(axis=axis, skipna=skipna) diff --git a/skcriteria/datasets/__init__.py b/skcriteria/datasets/__init__.py new file mode 100644 index 0000000..0efbe86 --- /dev/null +++ b/skcriteria/datasets/__init__.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised)) +# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia +# Copyright (c) 2022, QuatroPe +# All rights reserved. + +# ============================================================================= +# DOCS +# ============================================================================= + +"""The :mod:`skcriteria.datasets` module includes utilities to load \ +datasets.""" + + +# ============================================================================= +# IMPORRTS +# ============================================================================= + +import json +import os +import pathlib + +from skcriteria.core.data import mkdm + +from .. import core + +# ============================================================================= +# CONSTANTS +# ============================================================================= + +_PATH = pathlib.Path(os.path.abspath(os.path.dirname(__file__))) + +# ============================================================================= +# FUNCTIONS +# ============================================================================= + + +def load_simple_stock_selection(): + """Simple stock selection decision matrix. + + This matrix was designed primarily for teaching and evaluating the behavior + of an experiment. + + Among the data we can find: two maximization criteria (ROE, CAP), + one minimization criterion (RI), dominated alternatives (FX), and + one alternative with an outlier criterion (ROE, MM = 1). + + Although the criteria and alternatives are original from the authors of + Scikit-Criteria, the numerical values were extracted at some point from a + somewhere which we have forgotten. + + Description: + + In order to decide to buy a series of stocks, a company studied 5 candidate + investments: PE, JN, AA, FX, MM and GN. The finance department decides to + consider the following criteria for selection: + + 1. ROE (Max): Return % for each monetary unit invested. + 2. CAP (Max): Years of market capitalization. + 3. RI (Min): Risk of the stock. + + """ + dm = core.mkdm( + matrix=[ + [7, 5, 35], + [5, 4, 26], + [5, 6, 28], + [3, 4, 36], + [1, 7, 30], + [5, 8, 30], + ], + objectives=[max, max, min], + weights=[2, 4, 1], + alternatives=["PE", "JN", "AA", "FX", "MM", "GN"], + criteria=["ROE", "CAP", "RI"], + ) + return dm + + +def load_van2021evaluation(windows_size=7): + r"""Dataset extracted from from historical time series cryptocurrencies. + + This dataset is extracted from:: + + Van Heerden, N., Cabral, J. y Luczywo, N. (2021). Evaluación de la + importancia de criterios para la selección de criptomonedas. + XXXIV ENDIO - XXXII EPIO Virtual 2021, Argentina. + + The nine available alternatives are based on the ranking of the 20 + cryptocurrencies with the largest market capitalization calculated on the + basis of circulating supply, according to information retrieved from + Cryptocurrency Historical Prices" retrieved on July 21st, 2021, from + there only the coins with complete data between October 9th, 2018 to July + 6th of 2021, excluding stable-coins, since they maintain a stable price and + therefore do not carry associated yields; the alternatives that met these + requirements turned out to be: Cardano (ADA), Binance coin (BNB), + Bitcoin (BTC), Dogecoin (DOGE), Ethereum (ETH), Chainlink (LINK), + Litecoin (LTC), Stellar (XLM) and Ripple (XRP). + + Two decision matrices were created for two sizes of overlapping moving + windows: 7 and 15 days. Six criteria were defined on these windows that + seek to represent returns and risks: + + - ``xRv`` - average Window return (:math:`\bar{x}RV`) - Maximize: is the + average of the differences between the closing price of the + cryptocurrency on the last day and the first day of each window, divided + by the price on the first day. + - ``sRV`` - window return deviation (:math:`sRV`) - Minimize: is the + standard deviation of window return. The greater the deviation, the + returns within the windows have higher variance and are unstable. + - ``xVV`` - average of the volume of the window (:math:`\bar{x}VV`) - + Maximize: it is the average of the summations of the transaction amount + of the cryptocurrency in dollars in each window, representing a liquidity + measure of the asset. + - ``sVV`` - window volume deviation (:math:`sVV`) - Minimize: it is the + deviation of the window volumes. The greater the deviation, the volumes + within the windows have higher variance and are unstable. + - ``xR2`` - mean of the correlation coefficient (:math:`\bar{x}R^2`) - + Maximize: it is the mean of the :math:`R^2` of the fit of the linear + trends with respect to the data. It is a measure that defines how well it + explains that linear trend to the data within the window. + - ``xm`` - mean of the slope (:math:`\bar{x}m`) - Maximize: it is the mean + of the slope of the linear trend between the closing prices in dollars + and the volumes traded in dollars of the cryptocurrency within each + window. + + Parameters + ---------- + windows_size: 7 o 15, default 7 + If the decision matrix based on 7 or 15 day overlapping moving windows + is desired. + + + References + ---------- + :cite:p:`van2021evaluation` + :cite:p:`van2021epio_evaluation` + :cite:p:`rajkumar_2021` + + """ + paths = { + 7: _PATH / "van2021evaluation" / "windows_size_7.json", + 15: _PATH / "van2021evaluation" / "windows_size_15.json", + } + + path = paths.get(windows_size) + if path is None: + raise ValueError( + f"Windows size must be '7' or '15'. Found {windows_size!r}" + ) + + with open(path) as fp: + data = json.load(fp) + + return mkdm(**data) diff --git a/skcriteria/datasets/van2021evaluation/windows_size_15.json b/skcriteria/datasets/van2021evaluation/windows_size_15.json new file mode 100644 index 0000000..cbbe34a --- /dev/null +++ b/skcriteria/datasets/van2021evaluation/windows_size_15.json @@ -0,0 +1,109 @@ +{ + "matrix": [ + [ + 0.072, + 0.274, + 17440000000.0, + 32880000000.0, + 0.281, + 3.806e-11 + ], + [ + 0.087, + 0.348, + 13160000000.0, + 23330000000.0, + 0.339, + 1.195e-08 + ], + [ + 0.036, + 0.159, + 450200000000.0, + 289400000000.0, + 0.237, + 2.192e-08 + ], + [ + 0.153, + 0.805, + 17770000000.0, + 52850000000.0, + 0.314, + 2.441e-12 + ], + [ + 0.055, + 0.213, + 214500000000.0, + 169500000000.0, + 0.239, + 2.52e-09 + ], + [ + 0.097, + 0.302, + 14440000000.0, + 27920000000.0, + 0.277, + 2.544e-09 + ], + [ + 0.034, + 0.207, + 54150000000.0, + 35570000000.0, + 0.28, + 2.679e-09 + ], + [ + 0.031, + 0.275, + 8951000000.0, + 11040000000.0, + 0.276, + 2.454e-11 + ], + [ + 0.037, + 0.292, + 49660000000.0, + 59500000000.0, + 0.26, + 9.236e-12 + ] + ], + "objectives": [ + 1, -1, + 1, -1, + 1, + 1 + ], + "weights": [ + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0 + ], + "alternatives": [ + "ADA", + "BNB", + "BTC", + "DOGE", + "ETH", + "LINK", + "LTC", + "XLM", + "XRP" + ], + "criteria": [ + "xRV", + "sRV", + "xVV", + "sVV", + "xR2", + "xm" + ] +} \ No newline at end of file diff --git a/skcriteria/datasets/van2021evaluation/windows_size_7.json b/skcriteria/datasets/van2021evaluation/windows_size_7.json new file mode 100644 index 0000000..06f9950 --- /dev/null +++ b/skcriteria/datasets/van2021evaluation/windows_size_7.json @@ -0,0 +1,111 @@ +{ + "objectives": [ + "max", + "min", + "max", + "min", + "max", + "max" + ], + "weights": [ + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0 + ], + "alternatives": [ + "ADA", + "BNB", + "BTC", + "DOGE", + "ETH", + "LINK", + "LTC", + "XLM", + "XRP" + ], + "criteria": [ + "xRV", + "sRV", + "xVV", + "sVV", + "xR2", + "xm" + ], + "matrix": [ + [ + 0.029, + 0.156, + 8144000000.0, + 15860000000.0, + 0.312, + 1.821e-11 + ], + [ + 0.033, + 0.167, + 6141000000.0, + 11180000000.0, + 0.396, + 9.167e-09 + ], + [ + 0.015, + 0.097, + 209500000000.0, + 138800000000.0, + 0.281, + 1.254e-08 + ], + [ + 0.057, + 0.399, + 8287000000.0, + 27260000000.0, + 0.327, + 1.459e-12 + ], + [ + 0.023, + 0.127, + 100000000000.0, + 80540000000.0, + 0.313, + 1.737e-09 + ], + [ + 0.04, + 0.179, + 6707000000.0, + 16650000000.0, + 0.319, + 1.582e-09 + ], + [ + 0.015, + 0.134, + 25130000000.0, + 17310000000.0, + 0.32, + 1.816e-09 + ], + [ + 0.013, + 0.176, + 4157000000.0, + 5469000000.0, + 0.321, + 1.876e-11 + ], + [ + 0.014, + 0.164, + 23080000000.0, + 29240000000.0, + 0.322, + 7.996e-12 + ] + ] +} \ No newline at end of file diff --git a/skcriteria/madm/__init__.py b/skcriteria/madm/__init__.py index 13b30fe..d8a26a2 100644 --- a/skcriteria/madm/__init__.py +++ b/skcriteria/madm/__init__.py @@ -11,10 +11,25 @@ """MCDA methods.""" -from ._base import KernelResult, RankResult, ResultABC, SKCDecisionMakerABC + +# ============================================================================= +# IMPORTS +# ============================================================================= + +from ._madm_base import ( + KernelResult, + RankResult, + ResultABC, + SKCDecisionMakerABC, +) # ============================================================================= # ALL # ============================================================================= -__all__ = ["KernelResult", "RankResult", "ResultABC", "SKCDecisionMakerABC"] +__all__ = [ + "KernelResult", + "RankResult", + "ResultABC", + "SKCDecisionMakerABC", +] diff --git a/skcriteria/madm/_base.py b/skcriteria/madm/_madm_base.py similarity index 61% rename from skcriteria/madm/_base.py rename to skcriteria/madm/_madm_base.py index 8419e48..3484c5d 100644 --- a/skcriteria/madm/_base.py +++ b/skcriteria/madm/_madm_base.py @@ -97,22 +97,23 @@ class ResultABC(metaclass=abc.ABCMeta): """ - _skcriteria_result_column = None + _skcriteria_result_series = None def __init_subclass__(cls): """Validate if the subclass are well formed.""" - result_column = cls._skcriteria_result_column + result_column = cls._skcriteria_result_series if result_column is None: - raise TypeError(f"{cls} must redefine '_skcriteria_result_column'") + raise TypeError(f"{cls} must redefine '_skcriteria_result_series'") def __init__(self, method, alternatives, values, extra): self._validate_result(values) self._method = str(method) self._extra = Bunch("extra", extra) - self._result_df = pd.DataFrame( + self._result_series = pd.Series( values, - index=alternatives, - columns=[self._skcriteria_result_column], + index=pd.Index(alternatives, name="Alternatives", copy=True), + name=self._skcriteria_result_series, + copy=True, ) @abc.abstractmethod @@ -127,7 +128,7 @@ def values(self): The i-th value refers to the valuation of the i-th. alternative. """ - return self._result_df[self._skcriteria_result_column].to_numpy() + return self._result_series.to_numpy(copy=True) @property def method(self): @@ -137,7 +138,7 @@ def method(self): @property def alternatives(self): """Names of the alternatives evaluated.""" - return self._result_df.index.to_numpy() + return self._result_series.index.to_numpy(copy=True) @property def extra_(self): @@ -152,16 +153,24 @@ def extra_(self): e_ = extra_ + # UTILS =================================================================== + + def to_series(self): + """The result as `pandas.Series`.""" + series = self._result_series.copy(deep=True) + series.index = self._result_series.index.copy(deep=True) + return series + # CMP ===================================================================== @property def shape(self): - """Tuple with (number_of_alternatives, number_of_alternatives). + """Tuple with (number_of_alternatives, ). rank.shape <==> np.shape(rank) """ - return np.shape(self._result_df) + return np.shape(self._result_series) def __len__(self): """Return the number ot alternatives. @@ -169,18 +178,119 @@ def __len__(self): rank.__len__() <==> len(rank). """ - return len(self._result_df) + return len(self._result_series) - def equals(self, other): + def values_equals(self, other): """Check if the alternatives and ranking are the same. The method doesn't check the method or the extra parameters. """ return (self is other) or ( - isinstance(other, RankResult) - and self._result_df.equals(other._result_df) + isinstance(other, type(self)) + and self._result_series.equals(other._result_series) + ) + + def aequals(self, other, rtol=1e-05, atol=1e-08, equal_nan=False): + """Return True if the result are equal within a tolerance. + + The tolerance values are positive, typically very small numbers. The + relative difference (`rtol` * abs(`b`)) and the absolute difference + `atol` are added together to compare against the absolute difference + between `a` and `b`. + + NaNs are treated as equal if they are in the same place and if + ``equal_nan=True``. Infs are treated as equal if they are in the same + place and of the same sign in both arrays. + + The proceeds as follows: + + - If ``other`` is the same object return ``True``. + - If ``other`` is not instance of 'DecisionMatrix', has different shape + 'criteria', 'alternatives' or 'objectives' returns ``False``. + - Next check the 'weights' and the matrix itself using the provided + tolerance. + + Parameters + ---------- + other : Result + Other result to compare. + rtol : float + The relative tolerance parameter + (see Notes in :py:func:`numpy.allclose`). + atol : float + The absolute tolerance parameter + (see Notes in :py:func:`numpy.allclose`). + equal_nan : bool + Whether to compare NaN's as equal. If True, NaN's in dm will be + considered equal to NaN's in `other` in the output array. + + Returns + ------- + aequals : :py:class:`bool:py:class:` + Returns True if the two result are equal within the given + tolerance; False otherwise. + + See Also + -------- + equals, :py:func:`numpy.isclose`, :py:func:`numpy.all`, + :py:func:`numpy.any`, :py:func:`numpy.equal`, + :py:func:`numpy.allclose`. + + """ + if self is other: + return True + is_veq = self.values_equals(other) and set(self._extra) == set( + other._extra ) + keys = set(self._extra) + while is_veq and keys: + k = keys.pop() + sv = self._extra[k] + ov = other._extra[k] + if isinstance(ov, np.ndarray): + is_veq = is_veq and np.allclose( + sv, + ov, + rtol=rtol, + atol=atol, + equal_nan=equal_nan, + ) + else: + is_veq = is_veq and sv == ov + return is_veq + + def equals(self, other): + """Return True if the results are equal. + + This method calls `aquals` without tolerance. + + Parameters + ---------- + other : :py:class:`skcriteria.DecisionMatrix` + Other instance to compare. + + Returns + ------- + equals : :py:class:`bool:py:class:` + Returns True if the two results are equals. + + See Also + -------- + aequals, :py:func:`numpy.isclose`, :py:func:`numpy.all`, + :py:func:`numpy.any`, :py:func:`numpy.equal`, + :py:func:`numpy.allclose`. + + """ + return self.aequals(other, 0, 0, False) + + def __eq__(self, other): + """x.__eq__(y) <==> x == y.""" + return self.equals(other) + + def __ne__(self, other): + """x.__eq__(y) <==> x == y.""" + return not self == other # REPR ==================================================================== @@ -189,7 +299,7 @@ def __repr__(self): kwargs = {"show_dimensions": False} # retrieve the original string - df = self._result_df.T + df = self._result_series.to_frame().T original_string = df.to_string(**kwargs) # add dimension @@ -197,6 +307,26 @@ def __repr__(self): return string + def _repr_html_(self): + """Return a html representation for a particular result. + + Mainly for IPython notebook. + + """ + df = self._result_series.to_frame().T + original_html = df.style._repr_html_() + rtype = self._skcriteria_result_series.lower() + + # add metadata + html = ( + f"
\n" + f"{original_html}" + f"Method: {self.method}\n" + "
" + ) + + return html + @doc_inherit(ResultABC, warn_class=False) class RankResult(ResultABC): @@ -207,7 +337,7 @@ class RankResult(ResultABC): """ - _skcriteria_result_column = "Rank" + _skcriteria_result_series = "Rank" @doc_inherit(ResultABC._validate_result) def _validate_result(self, values): @@ -248,24 +378,16 @@ def untied_rank_(self): return np.argsort(self.rank_) + 1 return self.rank_ - def _repr_html_(self): - """Return a html representation for a particular result. - - Mainly for IPython notebook. - - """ - df = self._result_df.T - original_html = df.style._repr_html_() - - # add metadata - html = ( - "
\n" - f"{original_html}" - f"Method: {self.method}\n" - "
" - ) - - return html + def to_series(self, *, untied=False): + """The result as `pandas.Series`.""" + if untied: + return pd.Series( + self.untied_rank_, + index=self._result_series.index.copy(deep=True), + copy=True, + name="Untied rank", + ) + return super().to_series() @doc_inherit(ResultABC, warn_class=False) @@ -277,7 +399,7 @@ class KernelResult(ResultABC): """ - _skcriteria_result_column = "Kernel" + _skcriteria_result_series = "Kernel" @doc_inherit(ResultABC._validate_result) def _validate_result(self, values): @@ -301,7 +423,7 @@ def kernel_where_(self): @property @deprecated( - reason=("Use 'kernel_where_' instead"), + reason=("Use ``kernel_where_`` instead"), version=0.7, ) def kernelwhere_(self): @@ -311,23 +433,6 @@ def kernelwhere_(self): @property def kernel_alternatives_(self): """Return the names of alternatives in the kernel.""" - return self._result_df.index[self._result_df.Kernel].to_numpy() - - def _repr_html_(self): - """Return a html representation for a particular result. - - Mainly for IPython notebook. - - """ - df = self._result_df.T - original_html = df._repr_html_() - - # add metadata - html = ( - "
\n" - f"{original_html}" - f"Method: {self.method}\n" - "
" + return self._result_series.index[self._result_series].to_numpy( + copy=True ) - - return html diff --git a/skcriteria/madm/electre.py b/skcriteria/madm/electre.py index a9cb532..c2c52db 100644 --- a/skcriteria/madm/electre.py +++ b/skcriteria/madm/electre.py @@ -33,9 +33,9 @@ from scipy import stats -from ._base import KernelResult, RankResult, SKCDecisionMakerABC +from ._madm_base import KernelResult, RankResult, SKCDecisionMakerABC from ..core import Objective -from ..utils import doc_inherit +from ..utils import doc_inherit, will_change # ============================================================================= @@ -214,17 +214,18 @@ def weights_outrank(matrix, weights, objectives): for a0_idx, a1_idx in alt_combs: - # sacamos las alternativas + # select the two alternatives to compare a0, a1 = matrix[[a0_idx, a1_idx]] - # vemos donde hay maximos y donde hay minimos estrictos + # we see where there are strict maximums and minimums maxs, mins = (a0 > a1), (a0 < a1) - # armamos los vectores de a \succ b teniendo en cuenta los objetivs + # we assemble the vectors of a \succ b taking the + # objectives into account a0_s_a1 = np.where(objectives == Objective.MAX.value, maxs, mins) a1_s_a0 = np.where(objectives == Objective.MAX.value, mins, maxs) - # sacamos ahora los criterios + # we now draw out the criteria outrank[a0_idx, a1_idx] = np.sum(weights * a0_s_a1) >= np.sum( weights * a1_s_a0 ) @@ -297,6 +298,10 @@ def _electre2_ranker( return ranking +@will_change( + reason="electre2 implementation will change in version after 0.8", + version=0.8, +) def electre2( matrix, objectives, weights, p0=0.65, p1=0.5, p2=0.35, q0=0.65, q1=0.35 ): @@ -319,7 +324,7 @@ def electre2( # TODO: remove loops - # calculo del ranking directo + # calculation of direct and indirect ranking ranking_direct = _electre2_ranker( alt_n, outrank_s, outrank_w, invert_ranking=False @@ -345,8 +350,12 @@ def electre2( ) +@will_change( + reason="ELECTRE2 implementation will change in version after 0.8", + version=0.8, +) class ELECTRE2(SKCDecisionMakerABC): - """Find the rankin solution through ELECTRE-2. + """Find the ranking solution through ELECTRE-2. ELECTRE II was proposed by Roy and Bertier (1971-1973) to overcome ELECTRE I's inability to produce a ranking of alternatives. Instead of simply @@ -389,12 +398,12 @@ def __init__(self, *, p0=0.65, p1=0.5, p2=0.35, q0=0.65, q1=0.35): if not (1 >= p0 >= p1 >= p2 >= 0): raise ValueError( "Condition '1 >= p0 >= p1 >= p2 >= 0' must be fulfilled. " - "Found: p0={p0}, p1={p1} p2={p2}.'" + f"Found: p0={p0}, p1={p1} p2={p2}.'" ) if not (1 >= q0 >= q1 >= 0): raise ValueError( "Condition '1 >= q0 >= q1 >= 0' must be fulfilled. " - "Found: q0={q0}, q1={q1}.'" + f"Found: q0={q0}, q1={q1}.'" ) self._p0, self._p1, self._p2, self._q0, self._q1 = (p0, p1, p2, q0, q1) diff --git a/skcriteria/madm/moora.py b/skcriteria/madm/moora.py index 4280ab2..cefe12b 100644 --- a/skcriteria/madm/moora.py +++ b/skcriteria/madm/moora.py @@ -20,7 +20,7 @@ import numpy as np -from ._base import RankResult, SKCDecisionMakerABC +from ._madm_base import RankResult, SKCDecisionMakerABC from ..core import Objective from ..utils import doc_inherit, rank diff --git a/skcriteria/madm/similarity.py b/skcriteria/madm/similarity.py index b3e60b1..82f867f 100644 --- a/skcriteria/madm/similarity.py +++ b/skcriteria/madm/similarity.py @@ -21,42 +21,11 @@ from scipy.spatial import distance -from ._base import RankResult, SKCDecisionMakerABC +from ._madm_base import RankResult, SKCDecisionMakerABC from ..core import Objective from ..utils import doc_inherit, rank -# ============================================================================= -# CONSTANTS -# ============================================================================= - -_VALID_DISTANCES_METRICS = [ - "braycurtis", - "canberra", - "chebyshev", - "cityblock", - "correlation", - "cosine", - "dice", - "euclidean", - "hamming", - "jaccard", - "jensenshannon", - "kulsinski", - "mahalanobis", - "matching", - "minkowski", - "rogerstanimoto", - "russellrao", - "seuclidean", - "sokalmichener", - "sokalsneath", - "sqeuclidean", - "wminkowski", - "yule", -] - - # ============================================================================= # TOPSIS # ============================================================================= @@ -72,8 +41,10 @@ def topsis(matrix, objectives, weights, metric="euclidean", **kwargs): maxs = np.max(wmtx, axis=0) # create the ideal and the anti ideal arrays - ideal = np.where(objectives == Objective.MAX.value, maxs, mins) - anti_ideal = np.where(objectives == Objective.MIN.value, maxs, mins) + where_max = np.equal(objectives, Objective.MAX.value) + + ideal = np.where(where_max, maxs, mins) + anti_ideal = np.where(where_max, mins, maxs) # calculate distances d_better = distance.cdist( @@ -136,8 +107,8 @@ class TOPSIS(SKCDecisionMakerABC): def __init__(self, *, metric="euclidean"): - if not callable(metric) and metric not in _VALID_DISTANCES_METRICS: - metrics = ", ".join(f"'{m}'" for m in _VALID_DISTANCES_METRICS) + if not callable(metric) and metric not in distance._METRICS_NAMES: + metrics = ", ".join(f"'{m}'" for m in distance._METRICS_NAMES) raise ValueError( f"Invalid metric '{metric}'. Plese choose from: {metrics}" ) diff --git a/skcriteria/madm/simple.py b/skcriteria/madm/simple.py index 1f1a90a..c1508f8 100644 --- a/skcriteria/madm/simple.py +++ b/skcriteria/madm/simple.py @@ -18,7 +18,7 @@ import numpy as np -from ._base import RankResult, SKCDecisionMakerABC +from ._madm_base import RankResult, SKCDecisionMakerABC from ..core import Objective from ..utils import doc_inherit, rank @@ -85,6 +85,8 @@ def _evaluate_data(self, matrix, weights, objectives, **kwargs): raise ValueError( "WeightedSumModel can't operate with minimize objective" ) + if np.any(matrix < 0): + raise ValueError("WeightedSumModel can't operate with values < 0") rank, score = wsm(matrix, weights) return rank, {"score": score} diff --git a/skcriteria/madm/simus.py b/skcriteria/madm/simus.py index fb3115b..e1e9028 100644 --- a/skcriteria/madm/simus.py +++ b/skcriteria/madm/simus.py @@ -20,7 +20,7 @@ import numpy as np -from ._base import RankResult, SKCDecisionMakerABC +from ._madm_base import RankResult, SKCDecisionMakerABC from ..core import Objective from ..preprocessing.scalers import scale_by_sum from ..utils import doc_inherit, lp, rank diff --git a/skcriteria/pipeline.py b/skcriteria/pipeline.py index 9eb7a81..7e6ebf9 100644 --- a/skcriteria/pipeline.py +++ b/skcriteria/pipeline.py @@ -15,10 +15,8 @@ # IMPORTS # ============================================================================= -from collections import Counter - from .core import SKCMethodABC -from .utils import Bunch +from .utils import Bunch, unique_names # ============================================================================= @@ -34,10 +32,7 @@ class SKCPipeline(SKCMethodABC): The final decision-maker only needs to implement `evaluate`. The purpose of the pipeline is to assemble several steps that can be - applied together while setting different parameters. A step's - estimator may be replaced entirely by setting the parameter with its name - to another dmaker or a transformer removed by setting it to - `'passthrough'` or `None`. + applied together while setting different parameters. Parameters ---------- @@ -61,34 +56,7 @@ def __init__(self, steps): self._validate_steps(steps) self._steps = steps - @property - def steps(self): - """List of steps of the pipeline.""" - return list(self._steps) - - def __len__(self): - """Return the length of the Pipeline.""" - return len(self.steps) - - def __getitem__(self, ind): - """Return a sub-pipeline or a single step in the pipeline. - - Indexing with an integer will return an step; using a slice - returns another Pipeline instance which copies a slice of this - Pipeline. This copy is shallow: modifying steps in the sub-pipeline - will affect the larger pipeline and vice-versa. - However, replacing a value in `step` will not affect a copy. - - """ - if isinstance(ind, slice): - if ind.step not in (1, None): - raise ValueError("Pipeline slicing only supports a step of 1") - return self.__class__(self.steps[ind]) - elif isinstance(ind, int): - return self.steps[ind][-1] - elif isinstance(ind, str): - return self.named_steps[ind] - raise KeyError(ind) + # INTERNALS =============================================================== def _validate_steps(self, steps): for name, step in steps[:-1]: @@ -107,6 +75,13 @@ def _validate_steps(self, steps): f"step '{name}' must implement 'evaluate()' method" ) + # PROPERTIES ============================================================== + + @property + def steps(self): + """List of steps of the pipeline.""" + return list(self._steps) + @property def named_steps(self): """Dictionary-like object, with the following attributes. @@ -117,6 +92,35 @@ def named_steps(self): """ return Bunch("steps", dict(self.steps)) + # DUNDERS ================================================================= + + def __len__(self): + """Return the length of the Pipeline.""" + return len(self._steps) + + def __getitem__(self, ind): + """Return a sub-pipeline or a single step in the pipeline. + + Indexing with an integer will return an step; using a slice + returns another Pipeline instance which copies a slice of this + Pipeline. This copy is shallow: modifying steps in the sub-pipeline + will affect the larger pipeline and vice-versa. + However, replacing a value in `step` will not affect a copy. + + """ + if isinstance(ind, slice): + if ind.step not in (1, None): + cname = type(self).__name__ + raise ValueError(f"{cname} slicing only supports a step of 1") + return self.__class__(self.steps[ind]) + elif isinstance(ind, int): + return self.steps[ind][-1] + elif isinstance(ind, str): + return self.named_steps[ind] + raise KeyError(ind) + + # API ===================================================================== + def evaluate(self, dm): """Run the all the transformers and the decision maker. @@ -157,34 +161,10 @@ def transform(self, dm): # ============================================================================= -# FUNCTIONS +# FACTORY # ============================================================================= -def _name_steps(steps): - """Generate names for steps.""" - # Based on sklearn.pipeline._name_estimators - - steps = list(reversed(steps)) - - names = [type(step).__name__.lower() for step in steps] - - name_count = {k: v for k, v in Counter(names).items() if v > 1} - - named_steps = [] - for name, step in zip(names, steps): - count = name_count.get(name, 0) - if count: - name_count[name] = count - 1 - name = f"{name}_{count}" - - named_steps.append((name, step)) - - named_steps.reverse() - - return named_steps - - def mkpipe(*steps): """Construct a Pipeline from the given transformers and decision-maker. @@ -201,8 +181,9 @@ def mkpipe(*steps): Returns ------- p : SKCPipeline - Returns a scikit-learn :class:`SKCPipeline` object. + Returns a scikit-criteria :class:`SKCPipeline` object. """ - named_steps = _name_steps(steps) + names = [type(step).__name__.lower() for step in steps] + named_steps = unique_names(names=names, elements=steps) return SKCPipeline(named_steps) diff --git a/skcriteria/preprocessing/__init__.py b/skcriteria/preprocessing/__init__.py index 24c1611..0542020 100644 --- a/skcriteria/preprocessing/__init__.py +++ b/skcriteria/preprocessing/__init__.py @@ -10,3 +10,19 @@ # ============================================================================= """Multiple data transformation routines.""" + +# ============================================================================= +# IMPORTS +# ============================================================================= + +from ._preprocessing_base import ( + SKCMatrixAndWeightTransformerABC, + SKCTransformerABC, +) + + +# ============================================================================= +# ALL +# ============================================================================= + +__all__ = ["SKCTransformerABC", "SKCMatrixAndWeightTransformerABC"] diff --git a/skcriteria/preprocessing/_preprocessing_base.py b/skcriteria/preprocessing/_preprocessing_base.py new file mode 100644 index 0000000..9dc7f74 --- /dev/null +++ b/skcriteria/preprocessing/_preprocessing_base.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised)) +# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia +# Copyright (c) 2022, QuatroPe +# All rights reserved. + +# ============================================================================= +# DOCS +# ============================================================================= + +"""Core functionalities to create transformers.""" + +# ============================================================================= +# IMPORTS +# ============================================================================= + +import abc + +from ..core import DecisionMatrix, SKCMethodABC +from ..utils import doc_inherit + + +# ============================================================================= +# SKCTransformer ABC +# ============================================================================= + + +class SKCTransformerABC(SKCMethodABC): + """Abstract class for all transformer in scikit-criteria.""" + + _skcriteria_dm_type = "transformer" + _skcriteria_abstract_class = True + + @abc.abstractmethod + def _transform_data(self, **kwargs): + """Apply the transformation logic to the decision matrix parameters. + + Parameters + ---------- + kwargs: + The decision matrix as separated parameters. + + Returns + ------- + :py:class:`dict` + A dictionary with all the values of the decision matrix + transformed. + + """ + raise NotImplementedError() + + def transform(self, dm): + """Perform transformation on `dm`. + + Parameters + ---------- + dm: :py:class:`skcriteria.data.DecisionMatrix` + The decision matrix to transform. + + Returns + ------- + :py:class:`skcriteria.data.DecisionMatrix` + Transformed decision matrix. + + """ + data = dm.to_dict() + + transformed_data = self._transform_data(**data) + + transformed_dm = DecisionMatrix.from_mcda_data(**transformed_data) + + return transformed_dm + + +# ============================================================================= +# MATRIX & WEIGHTS TRANSFORMER +# ============================================================================= + + +class SKCMatrixAndWeightTransformerABC(SKCTransformerABC): + """Transform weights and matrix together or independently. + + The Transformer that implements this abstract class can be configured to + transform + `weights`, `matrix` or `both` so only that part of the DecisionMatrix + is altered. + + This abstract class require to redefine ``_transform_weights`` and + ``_transform_matrix``, instead of ``_transform_data``. + + """ + + _skcriteria_abstract_class = True + _skcriteria_parameters = ["target"] + + _TARGET_WEIGHTS = "weights" + _TARGET_MATRIX = "matrix" + _TARGET_BOTH = "both" + + def __init__(self, target): + if target not in ( + self._TARGET_MATRIX, + self._TARGET_WEIGHTS, + self._TARGET_BOTH, + ): + raise ValueError( + f"'target' can only be '{self._TARGET_WEIGHTS}', " + f"'{self._TARGET_MATRIX}' or '{self._TARGET_BOTH}', " + f"found '{target}'" + ) + self._target = target + + @property + def target(self): + """Determine which part of the DecisionMatrix will be transformed.""" + return self._target + + @abc.abstractmethod + def _transform_weights(self, weights): + """Execute the transform method over the weights. + + Parameters + ---------- + weights: :py:class:`numpy.ndarray` + The weights to transform. + + Returns + ------- + :py:class:`numpy.ndarray` + The transformed weights. + + """ + raise NotImplementedError() + + @abc.abstractmethod + def _transform_matrix(self, matrix): + """Execute the transform method over the matrix. + + Parameters + ---------- + matrix: :py:class:`numpy.ndarray` + The decision matrix to transform + + Returns + ------- + :py:class:`numpy.ndarray` + The transformed matrix. + + """ + raise NotImplementedError() + + @doc_inherit(SKCTransformerABC._transform_data) + def _transform_data(self, matrix, weights, **kwargs): + transformed_mtx = matrix + transformed_weights = weights + + if self._target in (self._TARGET_MATRIX, self._TARGET_BOTH): + transformed_mtx = self._transform_matrix(matrix) + + if self._target in (self._TARGET_WEIGHTS, self._TARGET_BOTH): + transformed_weights = self._transform_weights(weights) + + kwargs.update( + matrix=transformed_mtx, weights=transformed_weights, dtypes=None + ) + + return kwargs diff --git a/skcriteria/preprocessing/distance.py b/skcriteria/preprocessing/distance.py index 8ad7aca..254ff43 100644 --- a/skcriteria/preprocessing/distance.py +++ b/skcriteria/preprocessing/distance.py @@ -9,90 +9,43 @@ # DOCS # ============================================================================= -"""Normalization through the distance to distance function.""" +"""Normalization through the distance to distance function. + +This entire module is deprecated. + +""" # ============================================================================= # IMPORTS # ============================================================================= -import numpy as np +from . import scalers +from ..utils import deprecated, doc_inherit -from ..core import Objective, SKCTransformerABC -from ..utils import doc_inherit # ============================================================================= -# FUNCTIONS +# CENIT DISTANCE # ============================================================================= +_skc_prep_scalers = "skcriteria.preprocessing.scalers" -def cenit_distance(matrix, objectives): - r"""Calculate a scores with respect to an ideal and anti-ideal alternative. - - For every criterion :math:`f` of this multicriteria problem we define a - membership function :math:`x_j` mapping the values of :math:`f_j` to the - interval [0, 1]. - - The result score :math:`x_{aj}`expresses the degree to which the - alternative :math:`a` is close to the ideal value :math:`f_{j}^*`, which - is the best performance in criterion , and far from the anti-ideal value - :math:`f_{j^*}`, which is the worst performance in criterion :math:`j`. - Both ideal and anti-ideal, are achieved by at least one of the alternatives - under consideration. - - .. math:: - - x_{aj} = \frac{f_j(a) - f_{j^*}}{f_{j}^* - f_{j^*}} - - """ - matrix = np.asarray(matrix, dtype=float) - - maxs = np.max(matrix, axis=0) - mins = np.min(matrix, axis=0) - - where_max = np.equal(objectives, Objective.MAX.value) - - cenit = np.where(where_max, maxs, mins) - nadir = np.where(where_max, mins, maxs) - - return (matrix - nadir) / (cenit - nadir) +@deprecated( + reason=( + f"Use ``{_skc_prep_scalers}.matrix_scale_by_cenit_distance`` instead" + ), + version=0.8, +) +@doc_inherit(scalers.matrix_scale_by_cenit_distance) +def cenit_distance(matrix, objectives): + return scalers.matrix_scale_by_cenit_distance(matrix, objectives) -class CenitDistance(SKCTransformerABC): - r"""Relative scores with respect to an ideal and anti-ideal alternative. - - For every criterion :math:`f` of this multicriteria problem we define a - membership function :math:`x_j` mapping the values of :math:`f_j` to the - interval [0, 1]. - - The result score :math:`x_{aj}`expresses the degree to which the - alternative :math:`a` is close to the ideal value :math:`f_{j}^*`, which - is the best performance in criterion , and far from the anti-ideal value - :math:`f_{j^*}`, which is the worst performance in criterion :math:`j`. - Both ideal and anti-ideal, are achieved by at least one of the alternatives - under consideration. - - .. math:: - - x_{aj} = \frac{f_j(a) - f_{j^*}}{f_{j}^* - f_{j^*}} - - - References - ---------- - :cite:p:`diakoulaki1995determining` - - """ - - _skcriteria_parameters = [] - - @doc_inherit(SKCTransformerABC._transform_data) - def _transform_data(self, matrix, objectives, **kwargs): - - distance_mtx = cenit_distance(matrix, objectives) - - dtypes = np.full(np.shape(objectives), float) - kwargs.update( - matrix=distance_mtx, objectives=objectives, dtypes=dtypes - ) - return kwargs +@deprecated( + reason=f"Use ``{_skc_prep_scalers}.CenitDistanceMatrixScaler`` instead", + version=0.8, +) +@doc_inherit(scalers.CenitDistanceMatrixScaler, warn_class=False) +class CenitDistance(scalers.CenitDistanceMatrixScaler): + ... diff --git a/skcriteria/preprocessing/filters.py b/skcriteria/preprocessing/filters.py index fb418b9..ecf9b3b 100644 --- a/skcriteria/preprocessing/filters.py +++ b/skcriteria/preprocessing/filters.py @@ -21,7 +21,8 @@ import numpy as np -from ..core import DecisionMatrix, SKCTransformerABC +from ._preprocessing_base import SKCTransformerABC +from ..core import DecisionMatrix from ..utils import doc_inherit # ============================================================================= @@ -486,12 +487,12 @@ class FilterNE(SKCArithmeticFilterABC): @doc_inherit(SKCByCriteriaFilterABC, warn_class=False) class SKCSetFilterABC(SKCByCriteriaFilterABC): - """Provide a common behavior to make filters based on set operatopms. + """Provide a common behavior to make filters based on set operations. This abstract class require to redefine ``_set_filter`` method, and this will apply to each criteria separately. - This class is designed to implement in general set comparision like + This class is designed to implement in general set comparison like "inclusion" and "exclusion". """ diff --git a/skcriteria/preprocessing/impute.py b/skcriteria/preprocessing/impute.py new file mode 100644 index 0000000..4445365 --- /dev/null +++ b/skcriteria/preprocessing/impute.py @@ -0,0 +1,475 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised)) +# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia +# Copyright (c) 2022, QuatroPe +# All rights reserved. + +# ============================================================================= +# DOCS +# ============================================================================= + +"""Module that provides multiple strategies for missing value imputation. + +The classes implemented here are a thin layer on top of the `sklearn.impute` +module classes. + +""" + + +# ============================================================================= +# IMPORTS +# ============================================================================= + + +import abc + +import numpy as np + +import sklearn.impute as _sklimpute + +from ._preprocessing_base import SKCTransformerABC +from ..utils import doc_inherit + +# ============================================================================= +# BASE CLASS +# ============================================================================= + + +class SKCImputerABC(SKCTransformerABC): + """Abstract class capable of impute missing values of the matrix. + + This abstract class require to redefine ``_impute``, instead of + ``_transform_data``. + + """ + + _skcriteria_abstract_class = True + + @abc.abstractmethod + def _impute(self, matrix): + """Impute the missing values. + + Parameters + ---------- + matrix: :py:class:`numpy.ndarray` + The decision matrix to weights. + + Returns + ------- + :py:class:`numpy.ndarray` + The imputed matrix. + + """ + raise NotImplementedError() + + @doc_inherit(SKCTransformerABC._transform_data) + def _transform_data(self, matrix, **kwargs): + imputed_matrix = self._impute(matrix=matrix) + kwargs.update(matrix=imputed_matrix, dtypes=None) + return kwargs + + +# ============================================================================= +# SIMPLE IMPUTER +# ============================================================================= + + +class SimpleImputer(SKCImputerABC): + """Imputation transformer for completing missing values. + + Internally this class uses the ``sklearn.impute.SimpleImputer`` class. + + Parameters + ---------- + missing_values : int, float, str, np.nan, None or pandas.NA, default=np.nan + The placeholder for the missing values. All occurrences of + `missing_values` will be imputed. + + strategy : str, default='mean' + The imputation strategy. + + - If "mean", then replace missing values using the mean along + each column. Can only be used with numeric data. + - If "median", then replace missing values using the median along + each column. Can only be used with numeric data. + - If "most_frequent", then replace missing using the most frequent + value along each column. Can be used with strings or numeric data. + If there is more than one such value, only the smallest is returned. + - If "constant", then replace missing values with fill_value. Can be + used with strings or numeric data. + + fill_value : str or numerical value, default=None + When strategy == "constant", fill_value is used to replace all + occurrences of missing_values. + If left to the default, fill_value will be 0. + + """ + + _skcriteria_parameters = ["missing_values", "strategy", "fill_value"] + + def __init__( + self, + *, + missing_values=np.nan, + strategy="mean", + fill_value=None, + ): + self._missing_values = missing_values + self._strategy = strategy + self._fill_value = fill_value + + # PROPERTIES ============================================================== + + @property + def missing_values(self): + """The placeholder for the missing values.""" + return self._missing_values + + @property + def strategy(self): + """The imputation strategy.""" + return self._strategy + + @property + def fill_value(self): + """Used to replace all occurrences of missing_values, \ + when strategy == "constant".""" + return self._fill_value + + # THE IMPUTATION LOGIC ==================================================== + + @doc_inherit(SKCImputerABC._impute) + def _impute(self, matrix): + imputer = _sklimpute.SimpleImputer( + missing_values=self._missing_values, + strategy=self._strategy, + fill_value=self._fill_value, + ) + imputed_matrix = imputer.fit_transform(matrix) + return imputed_matrix + + +# ============================================================================= +# MULTIVARIATE +# ============================================================================= + + +class IterativeImputer(SKCImputerABC): + """Multivariate imputer that estimates each feature from all the others. + + A strategy for imputing missing values by modeling each feature with + missing values as a function of other features in a round-robin fashion. + + Internally this class uses the ``sklearn.impute.IterativeImputer`` class. + + This estimator is still **experimental** for now: the predictions + and the API might change without any deprecation cycle. To use it, + you need to explicitly import `enable_iterative_imputer`:: + + >>> # explicitly require this experimental feature + >>> from sklearn.experimental import enable_iterative_imputer # noqa + >>> # now you can import normally from sklearn.impute + >>> from skcriteria.preprocess.impute import IterativeImputer + + Parameters + ---------- + estimator : estimator object, default=BayesianRidge() + The estimator to use at each step of the round-robin imputation. + If `sample_posterior=True`, the estimator must support + `return_std` in its `predict` method. + missing_values : int or np.nan, default=np.nan + The placeholder for the missing values. All occurrences of + `missing_values` will be imputed. + sample_posterior : bool, default=False + Whether to sample from the (Gaussian) predictive posterior of the + fitted estimator for each imputation. Estimator must support + `return_std` in its `predict` method if set to `True`. Set to + `True` if using `IterativeImputer` for multiple imputations. + max_iter : int, default=10 + Maximum number of imputation rounds to perform before returning the + imputations computed during the final round. A round is a single + imputation of each criteria with missing values. The stopping criterion + is met once `max(abs(X_t - X_{t-1}))/max(abs(X[known_vals])) < tol`, + where `X_t` is `X` at iteration `t`. Note that early stopping is only + applied if `sample_posterior=False`. + tol : float, default=1e-3 + Tolerance of the stopping condition. + n_nearest_criteria : int, default=None + Number of other criteria to use to estimate the missing values of + each criteria column. Nearness between criteria is measured using + the absolute correlation coefficient between each criteria pair (after + initial imputation). To ensure coverage of criteria throughout the + imputation process, the neighbor criteria are not necessarily nearest, + but are drawn with probability proportional to correlation for each + imputed target criteria. Can provide significant speed-up when the + number of criteria is huge. If `None`, all criteria will be used. + initial_strategy : {'mean', 'median', 'most_frequent', 'constant'}, \ + default='mean' + Which strategy to use to initialize the missing values. Same as the + `strategy` parameter in :class:`~sklearn.impute.SimpleImputer`. + imputation_order : {'ascending', 'descending', 'roman', 'arabic', \ + 'random'}, default='ascending' + The order in which the criteria will be imputed. Possible values: + + - `'ascending'`: From criteria with fewest missing values to most. + - `'descending'`: From criteria with most missing values to fewest. + - `'roman'`: Left to right. + - `'arabic'`: Right to left. + - `'random'`: A random order for each round. + + min_value : float or array-like of shape (n_criteria,), default=-np.inf + Minimum possible imputed value. Broadcast to shape `(n_criteria,)` if + scalar. If array-like, expects shape `(n_criteria,)`, one min value for + each criteria. The default is `-np.inf`. + max_value : float or array-like of shape (n_criteria,), default=np.inf + Maximum possible imputed value. Broadcast to shape `(n_criteria,)` if + scalar. If array-like, expects shape `(n_criteria,)`, one max value for + each criteria. The default is `np.inf`. + verbose : int, default=0 + Verbosity flag, controls the debug messages that are issued + as functions are evaluated. The higher, the more verbose. Can be 0, 1, + or 2. + random_state : int, RandomState instance or None, default=None + The seed of the pseudo random number generator to use. Randomizes + selection of estimator criteria if `n_nearest_criteria` is not `None`, + the `imputation_order` if `random`, and the sampling from posterior if + `sample_posterior=True`. Use an integer for determinism. + + """ + + _skcriteria_parameters = [ + "estimator", + "missing_values", + "sample_posterior", + "max_iter", + "tol", + "n_nearest_criteria", + "initial_strategy", + "imputation_order", + "min_value", + "max_value", + "verbose", + "random_state", + ] + + def __init__( + self, + estimator=None, + *, + missing_values=np.nan, + sample_posterior=False, + max_iter=10, + tol=1e-3, + n_nearest_criteria=None, + initial_strategy="mean", + imputation_order="ascending", + skip_complete=False, + min_value=-np.inf, + max_value=np.inf, + verbose=0, + random_state=None, + ): + self._estimator = estimator + self._missing_values = missing_values + self._sample_posterior = sample_posterior + self._max_iter = max_iter + self._tol = tol + self._n_nearest_criteria = n_nearest_criteria + self._initial_strategy = initial_strategy + self._imputation_order = imputation_order + self._skip_complete = skip_complete + self._min_value = min_value + self._max_value = max_value + self._verbose = verbose + self._random_state = random_state + + # PROPERTIES ============================================================== + + @property + def estimator(self): + """Used at each step of the round-robin imputation.""" + return self._estimator + + @property + def missing_values(self): + """The placeholder for the missing values.""" + return self._missing_values + + @property + def sample_posterior(self): + """Whether to sample from the (Gaussian) predictive posterior of the \ + fitted estimator for each imputation.""" + return self._sample_posterior + + @property + def max_iter(self): + """Maximum number of imputation rounds.""" + return self._max_iter + + @property + def tol(self): + """Tolerance of the stopping condition.""" + return self._tol + + @property + def n_nearest_criteria(self): + """Number of other criteria to use to estimate the missing values of \ + each criteria column.""" + return self._n_nearest_criteria + + @property + def initial_strategy(self): + """Which strategy to use to initialize the missing values.""" + return self._initial_strategy + + @property + def imputation_order(self): + """The order in which the criteria will be imputed.""" + return self._imputation_order + + @property + def min_value(self): + """Minimum possible imputed value.""" + return self._min_value + + @property + def max_value(self): + """Maximum possible imputed value.""" + return self._max_value + + @property + def verbose(self): + """Verbosity flag, controls the debug messages that are issued as \ + functions are evaluated.""" + return self._verbose + + @property + def random_state(self): + """The seed of the pseudo random number generator to use.""" + return self._random_state + + # THE IMPUTATION LOGIC ==================================================== + + @doc_inherit(SKCImputerABC._impute) + def _impute(self, matrix): + + imputer = _sklimpute.IterativeImputer( + estimator=self._estimator, + missing_values=self._missing_values, + sample_posterior=self._sample_posterior, + max_iter=self._max_iter, + tol=self._tol, + n_nearest_features=self._n_nearest_criteria, + initial_strategy=self._initial_strategy, + imputation_order=self._imputation_order, + skip_complete=False, # is + min_value=self._min_value, + max_value=self._max_value, + verbose=self._verbose, + random_state=self._random_state, + ) + imputed_matrix = imputer.fit_transform(matrix) + return imputed_matrix + + +# ============================================================================= +# KNN +# ============================================================================= + + +class KNNImputer(SKCImputerABC): + """Imputation for completing missing values using k-Nearest Neighbors. + + Internally this class uses the ``sklearn.impute.KNNImputer`` class. + + Each sample's missing values are imputed using the mean value from + `n_neighbors` nearest neighbors found in the training set. + Two samples are close if the criteria that neither is missing are close. + + Parameters + ---------- + missing_values : int, float, str, np.nan or None, default=np.nan + The placeholder for the missing values. All occurrences of + `missing_values` will be imputed. + + n_neighbors : int, default=5 + Number of neighboring samples to use for imputation. + + weights : {'uniform', 'distance'} or callable, default='uniform' + Weight function used in prediction. Possible values: + + - `'uniform'`: uniform weights. All points in each neighborhood are + weighted equally. + - `'distance'`: weight points by the inverse of their distance. + in this case, closer neighbors of a query point will have a + greater influence than neighbors which are further away. + - callable: a user-defined function which accepts an + array of distances, and returns an array of the same shape + containing the weights. + + metric : {'nan_euclidean'} or callable, default='nan_euclidean' + Distance metric for searching neighbors. Possible values: + + - 'nan_euclidean' + - callable : a user-defined function which conforms to the definition + of ``_pairwise_callable(X, Y, metric, **kwds)``. The function + accepts two arrays, X and Y, and a `missing_values` keyword in + `kwds` and returns a scalar distance value. + + """ + + _skcriteria_parameters = [ + "missing_values", + "n_neighbors", + "weights", + "metric", + ] + + def __init__( + self, + *, + missing_values=np.nan, + n_neighbors=5, + weights="uniform", + metric="nan_euclidean", + ): + self._missing_values = missing_values + self._n_neighbors = n_neighbors + self._weights = weights + self._metric = metric + + # PROPERTIES ============================================================== + + @property + def missing_values(self): + """The placeholder for the missing values.""" + return self._missing_values + + @property + def n_neighbors(self): + """Number of neighboring samples to use for imputation.""" + return self._n_neighbors + + @property + def weights(self): + """Weight function used in prediction.""" + return self._weights + + @property + def metric(self): + """Distance metric for searching neighbors.""" + return self._metric + + # THE IMPUTATION LOGIC ==================================================== + + @doc_inherit(SKCImputerABC._impute) + def _impute(self, matrix): + imputer = _sklimpute.KNNImputer( + missing_values=self._missing_values, + n_neighbors=self._n_neighbors, + weights=self._weights, + metric=self._metric, + ) + imputed_matrix = imputer.fit_transform(matrix) + return imputed_matrix diff --git a/skcriteria/preprocessing/increment.py b/skcriteria/preprocessing/increment.py index d3410a8..ce1b949 100644 --- a/skcriteria/preprocessing/increment.py +++ b/skcriteria/preprocessing/increment.py @@ -24,7 +24,7 @@ import numpy as np -from ..core import SKCMatrixAndWeightTransformerABC +from ._preprocessing_base import SKCMatrixAndWeightTransformerABC from ..utils import doc_inherit # ============================================================================= diff --git a/skcriteria/preprocessing/invert_objectives.py b/skcriteria/preprocessing/invert_objectives.py index cd6585d..30eeee5 100644 --- a/skcriteria/preprocessing/invert_objectives.py +++ b/skcriteria/preprocessing/invert_objectives.py @@ -19,7 +19,8 @@ import numpy as np -from ..core import Objective, SKCTransformerABC +from ._preprocessing_base import SKCTransformerABC +from ..core import Objective from ..utils import deprecated, doc_inherit @@ -133,7 +134,10 @@ def _invert(self, matrix, minimize_mask): # DEPRECATED # ============================================================================= @deprecated( - reason="Use 'skcriteria.preprocessing.InvertMinimize' instead", + reason=( + "Use ``skcriteria.preprocessing.invert_objectives.InvertMinimize`` " + "instead" + ), version=0.7, ) class MinimizeToMaximize(InvertMinimize): diff --git a/skcriteria/preprocessing/push_negatives.py b/skcriteria/preprocessing/push_negatives.py index 763a8fd..81ecb27 100644 --- a/skcriteria/preprocessing/push_negatives.py +++ b/skcriteria/preprocessing/push_negatives.py @@ -22,7 +22,7 @@ import numpy as np -from ..core import SKCMatrixAndWeightTransformerABC +from ._preprocessing_base import SKCMatrixAndWeightTransformerABC from ..utils import doc_inherit # ============================================================================= diff --git a/skcriteria/preprocessing/scalers.py b/skcriteria/preprocessing/scalers.py index 03e6c4f..124c115 100644 --- a/skcriteria/preprocessing/scalers.py +++ b/skcriteria/preprocessing/scalers.py @@ -9,7 +9,7 @@ # DOCS # ============================================================================= -"""Functionalities for scale values based on differrent strategies. +"""Functionalities for scale values based on different strategies. In addition to the Transformers, a collection of an MCDA agnostic functions are offered to scale an array along an arbitrary axis. @@ -25,83 +25,217 @@ import numpy as np from numpy import linalg -from ..core import SKCMatrixAndWeightTransformerABC -from ..utils import doc_inherit +from sklearn import preprocessing as _sklpreproc + +from ._preprocessing_base import ( + SKCMatrixAndWeightTransformerABC, + SKCTransformerABC, +) +from ..core import Objective +from ..utils import deprecated, doc_inherit + + +# ============================================================================= +# HELPER FUNCTION +# ============================================================================= + + +def _run_sklearn_scaler(mtx_or_weights, scaler): + """Runs sklearn scalers against 1D (weights) or 2D (alternatives) \ + arrays. + + This function is in charge of verifying if the array provided has adequate + dimensions to work with the scikit-learn scalers. + + It also ensures that the output has the same input dimensions. + + """ + ndims = np.ndim(mtx_or_weights) + if ndims == 1: # is a weights + mtx_or_weights = mtx_or_weights.reshape(len(mtx_or_weights), 1) + result = scaler.fit_transform(mtx_or_weights) + if ndims == 1: + result = result.flatten() + return result + # ============================================================================= # STANDAR SCALER # ============================================================================= -def scale_by_stdscore(arr, axis=None): - r"""Standardize the values by removing the mean and divided by the std-dev. +class StandarScaler(SKCMatrixAndWeightTransformerABC): + """Standardize the dm by removing the mean and scaling to unit variance. The standard score of a sample `x` is calculated as: - .. math:: + z = (x - u) / s + + where `u` is the mean of the values, and `s` is the standard deviation + of the training samples or one if `with_std=False`. - z = (x - \mu) / \sigma + This is a thin wrapper around ``sklearn.preprocessing.StandarScaler``. Parameters ---------- - arr: :py:class:`numpy.ndarray` like. - A array with values - axis : :py:class:`int` optional - Axis along which to operate. By default, flattened input is used. + with_mean : bool, default=True + If True, center the data before scaling. - Returns - ------- - :py:class:`numpy.ndarray` - array of ratios + with_std : bool, default=True + If True, scale the data to unit variance (or equivalently, unit + standard deviation). - Examples - -------- - .. code-block:: pycon + """ - >>> from skcriteria.preprocess import scale_by_stdscore - >>> mtx = [[1, 2], [3, 4]] + _skcriteria_parameters = ["target", "with_mean", "with_std"] + + def __init__(self, target, *, with_mean=True, with_std=True): + super().__init__(target) + self._with_mean = bool(with_mean) + self._with_std = bool(with_std) + + @property + def with_mean(self): + """True if the features will be center before scaling.""" + return self._with_mean + + @property + def with_std(self): + """True if the features will be scaled to the unit variance.""" + return self._with_std + + def _get_scaler(self): + return _sklpreproc.StandardScaler( + with_mean=self.with_mean, + with_std=self.with_std, + ) + + @doc_inherit(SKCMatrixAndWeightTransformerABC._transform_weights) + def _transform_weights(self, weights): + scaler = self._get_scaler() + return _run_sklearn_scaler(weights, scaler) + + @doc_inherit(SKCMatrixAndWeightTransformerABC._transform_matrix) + def _transform_matrix(self, matrix): + scaler = self._get_scaler() + return _run_sklearn_scaler(matrix, scaler) - # ratios with the max value of the array - >>> scale_by_stdscore(mtx) - array([[-1.34164079, -0.4472136 ], - [ 0.4472136 , 1.34164079]]) - # ratios with the max value of the arr by column - >>> scale_by_stdscore(mtx, axis=0) - array([[-1., -1.], - [ 1., 1.]]) +# ============================================================================= +# MINMAX +# ============================================================================= - # ratios with the max value of the array by row - >>> scale_by_stdscore(mtx, axis=1) - array([[-1., 1.], - [-1., 1.]]) + +class MinMaxScaler(SKCMatrixAndWeightTransformerABC): + r"""Scaler based on the range. + + The matrix transformation is given by:: + + X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0)) + X_scaled = X_std * (max - min) + min + + And the weight transformation:: + + X_std = (X - X.min(axis=None)) / (X.max(axis=None) - X.min(axis=None)) + X_scaled = X_std * (max - min) + min + + If the scaler is configured to work with 'matrix' each value + of each criteria is divided by the range of that criteria. + In other hand if is configure to work with 'weights', + each value of weight is divided by the range the weights. + + This is a thin wrapper around ``sklearn.preprocessing.MinMaxScaler``. + + Parameters + ---------- + criteria_range : tuple (min, max), default=(0, 1) + Desired range of transformed data. + + clip : bool, default=False + Set to True to clip transformed values of held-out data to + provided `criteria_range`. """ - arr = np.asarray(arr, dtype=float) - mean = np.mean(arr, axis=axis, keepdims=True) - std = np.std(arr, axis=axis, keepdims=True) - return (arr - mean) / std + _skcriteria_parameters = ["target", "clip", "criteria_range"] -class StandarScaler(SKCMatrixAndWeightTransformerABC): - """Standardize the dm by removing the mean and scaling to unit variance. + def __init__(self, target, *, clip=False, criteria_range=(0, 1)): + super().__init__(target) + self._clip = bool(clip) + self._cr_min, self._cr_max = map(float, criteria_range) - The standard score of a sample `x` is calculated as: + @property + def clip(self): + """True if the transformed values will be clipped to held-out the \ + value provided `criteria_range`.""" + return self._clip - z = (x - u) / s + @property + def criteria_range(self): + """Range of transformed data.""" + return (self._cr_min, self._cr_max) - where `u` is the mean of the values, and `s` is the standard deviation - of the training samples or one if `with_std=False`. + def _get_scaler(self): + return _sklpreproc.MinMaxScaler( + clip=self.clip, + feature_range=self.criteria_range, + ) + + @doc_inherit(SKCMatrixAndWeightTransformerABC._transform_weights) + def _transform_weights(self, weights): + scaler = self._get_scaler() + return _run_sklearn_scaler(weights, scaler) + + @doc_inherit(SKCMatrixAndWeightTransformerABC._transform_matrix) + def _transform_matrix(self, matrix): + scaler = self._get_scaler() + return _run_sklearn_scaler(matrix, scaler) + + +# ============================================================================= +# MAX +# ============================================================================= + + +class MaxAbsScaler(SKCMatrixAndWeightTransformerABC): + r"""Scaler based on the maximum values. + + If the scaler is configured to work with 'matrix' each value + of each criteria is divided by the maximum value of that criteria. + In other hand if is configure to work with 'weights', + each value of weight is divided by the maximum value the weights. + + This estimator scales and translates each criteria individually such that + the maximal absolute value of each criteria in the training set will be + 1.0. It does not shift/center the data, and thus does not destroy any + sparsity. + + This is a thin wrapper around ``sklearn.preprocessing.MaxAbsScaler``. """ @doc_inherit(SKCMatrixAndWeightTransformerABC._transform_weights) def _transform_weights(self, weights): - return scale_by_stdscore(weights, axis=None) + scaler = _sklpreproc.MaxAbsScaler() + return _run_sklearn_scaler(weights, scaler) @doc_inherit(SKCMatrixAndWeightTransformerABC._transform_matrix) def _transform_matrix(self, matrix): - return scale_by_stdscore(matrix, axis=0) + scaler = _sklpreproc.MaxAbsScaler() + return _run_sklearn_scaler(matrix, scaler) + + +@deprecated( + reason="Use ``skcriteria.preprocessing.scalers.MaxAbsScaler`` instead", + version=0.8, +) +class MaxScaler(MaxAbsScaler): + r"""Scaler based on the maximum values. + + From skcriteria >= 0.8 this is a thin wrapper around + ``sklearn.preprocessing.MaxAbsScaler``. + + """ # ============================================================================= @@ -187,88 +321,6 @@ def _transform_matrix(self, matrix): return scale_by_vector(matrix, axis=0) -# ============================================================================= -# MINMAX -# ============================================================================= - - -def scale_by_minmax(arr, axis=None): - r"""Fraction of the range normalizer. - - Subtracts to each value of the array the minimum and then divides - it by the total range. - - .. math:: - - \overline{X}_{ij} = - \frac{X_{ij} - \min{X_{ij}}}{\max_{X_{ij}} - \min_{X_{ij}}} - - Parameters - ---------- - arr: :py:class:`numpy.ndarray` like. - A array with values - axis : :py:class:`int` optional - Axis along which to operate. By default, flattened input is used. - - Returns - ------- - :py:class:`numpy.ndarray` - array of ratios - - - Examples - -------- - .. code-block:: pycon - - >>> from skcriteria.preprocess import scale_by_minmax - >>> mtx = [[1, 2], [3, 4]] - - # ratios with the range of the array - >>> scale_by_minmax(mtx) - array([[0. , 0.33333333], - [0.66666667, 1. ]]) - - # ratios with the range by column - >>> scale_by_minmax(mtx, axis=0) - array([[0., 0.], - [1., 1.]]) - - # ratios with the range by row - >>> scale_by_minmax(mtx, axis=1) - array([[0., 1.], - [0., 1.]]) - - """ - arr = np.asarray(arr, dtype=float) - minval = np.min(arr, axis=axis, keepdims=True) - maxval = np.max(arr, axis=axis, keepdims=True) - return (arr - minval) / (maxval - minval) - - -class MinMaxScaler(SKCMatrixAndWeightTransformerABC): - r"""Scaler based on the range. - - .. math:: - - \overline{X}_{ij} = - \frac{X_{ij} - \min{X_{ij}}}{\max_{X_{ij}} - \min_{X_{ij}}} - - If the scaler is configured to work with 'matrix' each value - of each criteria is divided by the range of that criteria. - In other hand if is configure to work with 'weights', - each value of weight is divided by the range the weights. - - """ - - @doc_inherit(SKCMatrixAndWeightTransformerABC._transform_weights) - def _transform_weights(self, weights): - return scale_by_minmax(weights, axis=None) - - @doc_inherit(SKCMatrixAndWeightTransformerABC._transform_matrix) - def _transform_matrix(self, matrix): - return scale_by_minmax(matrix, axis=0) - - # ============================================================================= # SUM # ============================================================================= @@ -345,75 +397,77 @@ def _transform_matrix(self, matrix): # ============================================================================= -# MAX +# CENIT DISTANCE # ============================================================================= -def scale_by_max(arr, axis=None): - r"""Divide of every value on the array by max value along an axis. +def matrix_scale_by_cenit_distance(matrix, objectives): + r"""Calculate a scores with respect to an ideal and anti-ideal alternative. - .. math:: + For every criterion :math:`f` of this multicriteria problem we define a + membership function :math:`x_j` mapping the values of :math:`f_j` to the + interval [0, 1]. - \overline{X}_{ij} = \frac{X_{ij}}{\max_{X_{ij}}} + The result score :math:`x_{aj}`expresses the degree to which the + alternative :math:`a` is close to the ideal value :math:`f_{j}^*`, which + is the best performance in criterion , and far from the anti-ideal value + :math:`f_{j^*}`, which is the worst performance in criterion :math:`j`. + Both ideal and anti-ideal, are achieved by at least one of the alternatives + under consideration. - Parameters - ---------- - arr: :py:class:`numpy.ndarray` like. - A array with values - axis : :py:class:`int` optional - Axis along which to operate. By default, flattened input is used. + .. math:: - Returns - ------- - :py:class:`numpy.ndarray` - array of ratios + x_{aj} = \frac{f_j(a) - f_{j^*}}{f_{j}^* - f_{j^*}} - Examples - -------- - .. code-block:: pycon + """ + matrix = np.asarray(matrix, dtype=float) - >>> from skcriteria.preprocess import scale_by_max - >>> mtx = [[1, 2], [3, 4]] + maxs = np.max(matrix, axis=0) + mins = np.min(matrix, axis=0) - # ratios with the max value of the array - >>> scale_by_max(mtx) - array([[ 0.25, 0.5 ], - [ 0.75, 1. ]]) + where_max = np.equal(objectives, Objective.MAX.value) - # ratios with the max value of the arr by column - >>> scale_by_max(mtx, axis=0) - array([[ 0.33333334, 0.5], - [ 1. , 1. ]]) + cenit = np.where(where_max, maxs, mins) + nadir = np.where(where_max, mins, maxs) - # ratios with the max value of the array by row - >>> scale_by_max(mtx, axis=1) - array([[ 0.5 , 1.], - [ 0.75, 1.]]) + return (matrix - nadir) / (cenit - nadir) - """ - arr = np.asarray(arr, dtype=float) - maxval = np.max(arr, axis=axis, keepdims=True) - return arr / maxval +class CenitDistanceMatrixScaler(SKCTransformerABC): + r"""Relative scores with respect to an ideal and anti-ideal alternative. -class MaxScaler(SKCMatrixAndWeightTransformerABC): - r"""Scaler based on the maximum values. + For every criterion :math:`f` of this multicriteria problem we define a + membership function :math:`x_j` mapping the values of :math:`f_j` to the + interval [0, 1]. + + The result score :math:`x_{aj}`expresses the degree to which the + alternative :math:`a` is close to the ideal value :math:`f_{j}^*`, which + is the best performance in criterion , and far from the anti-ideal value + :math:`f_{j^*}`, which is the worst performance in criterion :math:`j`. + Both ideal and anti-ideal, are achieved by at least one of the alternatives + under consideration. .. math:: - \overline{X}_{ij} = \frac{X_{ij}}{\max_{X_{ij}}} + x_{aj} = \frac{f_j(a) - f_{j^*}}{f_{j}^* - f_{j^*}} - If the scaler is configured to work with 'matrix' each value - of each criteria is divided by the maximum value of that criteria. - In other hand if is configure to work with 'weights', - each value of weight is divided by the maximum value the weights. + + References + ---------- + :cite:p:`diakoulaki1995determining` """ - @doc_inherit(SKCMatrixAndWeightTransformerABC._transform_weights) - def _transform_weights(self, weights): - return scale_by_max(weights, axis=None) + _skcriteria_parameters = [] - @doc_inherit(SKCMatrixAndWeightTransformerABC._transform_matrix) - def _transform_matrix(self, matrix): - return scale_by_max(matrix, axis=0) + @doc_inherit(SKCTransformerABC._transform_data) + def _transform_data(self, matrix, objectives, **kwargs): + + distance_mtx = matrix_scale_by_cenit_distance(matrix, objectives) + + dtypes = np.full(np.shape(objectives), float) + + kwargs.update( + matrix=distance_mtx, objectives=objectives, dtypes=dtypes + ) + return kwargs diff --git a/skcriteria/preprocessing/weighters.py b/skcriteria/preprocessing/weighters.py index a280a9d..0c4371d 100644 --- a/skcriteria/preprocessing/weighters.py +++ b/skcriteria/preprocessing/weighters.py @@ -28,10 +28,14 @@ import scipy.stats +from ._preprocessing_base import SKCTransformerABC +from .scalers import matrix_scale_by_cenit_distance +from ..core import Objective +from ..utils import deprecated, doc_inherit -from .distance import cenit_distance -from ..core import Objective, SKCTransformerABC -from ..utils import doc_inherit +# ============================================================================= +# BASE CLASS +# ============================================================================= class SKCWeighterABC(SKCTransformerABC): @@ -159,7 +163,7 @@ def std_weights(matrix): .. math:: - w_j = \frac{base\_value}{m} + w_j = \frac{s_j}{m} Where $m$ is the number os columns/criteria in matrix. @@ -184,7 +188,7 @@ def std_weights(matrix): array([0.5, 0.5]) """ - std = np.std(matrix, axis=0) + std = np.std(matrix, axis=0, ddof=1) return std / np.sum(std) @@ -235,7 +239,7 @@ class EntropyWeighter(SKCWeighterABC): It uses the underlying ``scipy.stats.entropy`` function which assumes that the values of the criteria are probabilities of a distribution. - This transformer will normalize the criteria if they don’t sum to 1. + This transformer will normalize the criteria if they don't sum to 1. See Also -------- @@ -256,6 +260,10 @@ def _weight_matrix(self, matrix, **kwargs): # ============================================================================= +@deprecated( + reason="Please use ``pd.DataFrame(arr.T).correlation('pearson')``", + version=0.8, +) def pearson_correlation(arr): """Return Pearson product-moment correlation coefficients. @@ -282,6 +290,10 @@ def pearson_correlation(arr): return np.corrcoef(arr) +@deprecated( + reason="Please use ``pd.DataFrame(arr.T).correlation('spearman')``", + version=0.8, +) def spearman_correlation(arr): """Calculate a Spearman correlation coefficient. @@ -308,22 +320,25 @@ def spearman_correlation(arr): return scipy.stats.spearmanr(arr.T, axis=0).correlation -def critic_weights( - matrix, objectives, correlation=pearson_correlation, scale=True -): +def critic_weights(matrix, objectives, correlation="pearson", scale=True): """Execute the CRITIC method without any validation.""" matrix = np.asarray(matrix, dtype=float) - matrix = cenit_distance(matrix, objectives=objectives) if scale else matrix + matrix = ( + matrix_scale_by_cenit_distance(matrix, objectives=objectives) + if scale + else matrix + ) dindex = np.std(matrix, axis=0) + import pandas as pd - corr_m1 = 1 - correlation(matrix.T) + corr_m1 = 1 - pd.DataFrame(matrix).corr(method=correlation).to_numpy() uweights = dindex * np.sum(corr_m1, axis=0) weights = uweights / np.sum(uweights) return weights -class Critic(SKCWeighterABC): +class CRITIC(SKCWeighterABC): """CRITIC (CRiteria Importance Through Intercriteria Correlation). The method aims at the determination of objective weights of relative @@ -333,19 +348,21 @@ class Critic(SKCWeighterABC): Parameters ---------- - correlation: str ["pearson" or "spearman"] or callable. (default "pearson") + correlation: str ["pearson", "spearman", "kendall"] or callable. This is the correlation function used to evaluate the discordance between two criteria. In other words, what conflict does one criterion a criterion with respect to the decision made by the other criteria. - By default the ``pearson`` correlation is used, and the ``kendall`` - correlation is also available implemented. - It is also possible to provide a function that receives as a single - parameter, the matrix of alternatives, and returns the correlation - matrix. + By default the ``pearson`` correlation is used, and the ``spearman`` + and ``kendall`` correlation is also available implemented. + It is also possible to provide a callable with input two 1d arrays + and returning a float. Note that the returned matrix from corr will + have 1 along the diagonals and will be symmetric regardless of the + callable's behavior + scale: bool (default ``True``) True if it is necessary to scale the data with - ``skcriteria.preprocesisng.cenit_distance`` prior to calculating the - correlation + ``skcriteria.preprocessing.matrix_scale_by_cenit_distance`` prior + to calculating the correlation Warnings -------- @@ -360,19 +377,15 @@ class Critic(SKCWeighterABC): """ - CORRELATION = { - "pearson": pearson_correlation, - "spearman": spearman_correlation, - } + CORRELATION = ("pearson", "spearman", "kendall") _skcriteria_parameters = ["correlation", "scale"] def __init__(self, correlation="pearson", scale=True): - correlation_func = self.CORRELATION.get(correlation, correlation) - if not callable(correlation_func): + if not (correlation in self.CORRELATION or callable(correlation)): corr_keys = ", ".join(f"'{c}'" for c in self.CORRELATION) - raise ValueError(f"Correlation must be {corr_keys} or callable") - self._correlation = correlation_func + raise ValueError(f"Correlation must be {corr_keys} or a callable") + self._correlation = correlation self._scale = bool(scale) @@ -398,3 +411,12 @@ def _weight_matrix(self, matrix, objectives, **kwargs): return critic_weights( matrix, objectives, correlation=self.correlation, scale=self.scale ) + + +@deprecated( + reason="Use ``skcriteria.preprocessing.weighters.CRITIC`` instead", + version=0.8, +) +@doc_inherit(CRITIC, warn_class=False) +class Critic(CRITIC): + ... diff --git a/skcriteria/utils/__init__.py b/skcriteria/utils/__init__.py index 5d955a9..83575d6 100644 --- a/skcriteria/utils/__init__.py +++ b/skcriteria/utils/__init__.py @@ -18,7 +18,11 @@ from . import lp, rank from .accabc import AccessorABC from .bunch import Bunch -from .decorators import deprecated, doc_inherit +from .cmanagers import df_temporal_header +from .deprecate import deprecated, will_change +from .doctools import doc_inherit +from .unames import unique_names + # ============================================================================= # ALL @@ -28,8 +32,10 @@ "AccessorABC", "doc_inherit", "deprecated", + "df_temporal_header", "rank", "Bunch", "lp", - "dominance", + "unique_names", + "will_change", ] diff --git a/skcriteria/utils/bunch.py b/skcriteria/utils/bunch.py index 8396ba1..62a7ba1 100644 --- a/skcriteria/utils/bunch.py +++ b/skcriteria/utils/bunch.py @@ -16,8 +16,10 @@ # IMPORTS # ============================================================================= +import copy from collections.abc import Mapping + # ============================================================================= # DOC INHERITANCE # ============================================================================= @@ -61,10 +63,32 @@ def __getitem__(self, k): def __getattr__(self, a): """x.__getattr__(y) <==> x.y.""" try: - return self[a] + return self._data[a] except KeyError: raise AttributeError(a) + def __copy__(self): + """x.__copy__() <==> copy.copy(x).""" + cls = type(self) + return cls(str(self._name), data=self._data) + + def __deepcopy__(self, memo): + """x.__deepcopy__() <==> copy.copy(x).""" + # extract the class + cls = type(self) + + # make the copy but without the data + clone = cls(name=str(self._name), data=None) + + # store in the memo that clone is copy of self + # https://docs.python.org/3/library/copy.html + memo[id(self)] = clone + + # now we copy the data + clone._data = copy.deepcopy(self._data, memo) + + return clone + def __iter__(self): """x.__iter__() <==> iter(x).""" return iter(self._data) @@ -76,7 +100,7 @@ def __len__(self): def __repr__(self): """x.__repr__() <==> repr(x).""" content = repr(set(self._data)) if self._data else "{}" - return f"{self._name}({content})" + return f"<{self._name} {content}>" def __dir__(self): """x.__dir__() <==> dir(x).""" diff --git a/skcriteria/utils/cmanagers.py b/skcriteria/utils/cmanagers.py new file mode 100644 index 0000000..7a1684d --- /dev/null +++ b/skcriteria/utils/cmanagers.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised)) +# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia +# Copyright (c) 2022, QuatroPe +# All rights reserved. + +# ============================================================================= +# DOCS +# ============================================================================= + +"""Multiple context managers to use inside scikit-criteria.""" + +# ============================================================================= +# IMPORTS +# ============================================================================= + +import contextlib + +# ============================================================================= +# FUNCTIONS +# ============================================================================= + + +@contextlib.contextmanager +def df_temporal_header(df, header, name=None): + """Temporarily replaces a DataFrame columns names. + + Optionally also assign another name to the columns. + + Parameters + ---------- + header : sequence + The new names of the columns. + name : str or None (default None) + New name for the index containing the columns in the DataFrame. If + 'None' the original name of the columns present in the DataFrame is + preserved. + + """ + original_header = df.columns + original_name = original_header.name + + name = original_name if name is None else name + try: + df.columns = header + df.columns.name = name + yield df + finally: + df.columns = original_header + df.columns.name = original_name diff --git a/skcriteria/utils/deprecate.py b/skcriteria/utils/deprecate.py new file mode 100644 index 0000000..28c0561 --- /dev/null +++ b/skcriteria/utils/deprecate.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised)) +# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia +# Copyright (c) 2022, QuatroPe +# All rights reserved. + +# ============================================================================= +# DOCS +# ============================================================================= + +"""Multiple decorator to use inside scikit-criteria.""" + +# ============================================================================= +# IMPORTS +# ============================================================================= + +from deprecated import deprecated as _deprecated + + +# ============================================================================= +# CONSTANTS +# ============================================================================= + +# _ If the version of the warning is >= ERROR_GE the action is setted to +# 'error', otherwise is 'once'. +ERROR_GE = 1.0 + +# ============================================================================= +# WARNINGS +# ============================================================================= + + +class SKCriteriaDeprecationWarning(DeprecationWarning): + """Skcriteria deprecation warning.""" + + +class SKCriteriaFutureWarning(FutureWarning): + """Skcriteria future warning.""" + + +# ============================================================================= +# FUNCTIONS +# ============================================================================= + +DEPRECATION_DIRECTIVE = """ +{indent}.. deprecated:: {version} +{indent} {reason} +""" + + +def _create_doc_with_deprecated_directive(text, *, reason, version): + # first let split the text in lines + lines = text.splitlines() + + # the location is where in between lines we must insert the + # deprecation directive. By default "at the end" + location = len(lines) + + # indentation is how much away from the margin (in number os spaces) we + # must insert the directive. By default n indentation is required + indentation = "" + + # we iterate line by line + for idx, line in enumerate(lines): + + line_stripped = line.strip() + + # if we found a line full of "-" is a underline of the first section + # in numpy format. + # check: https://numpydoc.readthedocs.io/en/latest/format.html + if line_stripped and line_stripped.replace("-", "") == "": + + # the the location of the directive is one line above the first + # section + location = idx - 2 + + # and the indentation is the number os white spaces on the left + indentation = " " * (len(line) - len(line.lstrip())) + + break + + # we create the directive here + directive = DEPRECATION_DIRECTIVE.format( + reason=reason, version=version, indent=indentation + ) + + # we insert the directive in the correct location + lines.insert(location, directive) + + # recreate the doct with the directive + new_doc = "\n".join(lines) + return new_doc + + +# ============================================================================= +# DECORATORS +# ============================================================================= + + +def deprecated(*, reason, version): + """Mark functions, classes and methods as deprecated. + + It will result in a warning being emitted when the object is called, + and the "deprecated" directive was added to the docstring. + + Parameters + ---------- + reason: str + Reason message which documents the deprecation in your library. + version: str + Version of your project which deprecates this feature. + If you follow the `Semantic Versioning `_, + the version number has the format "MAJOR.MINOR.PATCH". + + Notes + ----- + This decorator is a thin layer over + :py:func:`deprecated.deprecated`. + + Check: __ + + """ + add_warning = _deprecated( + reason=reason, + version=version, + category=SKCriteriaDeprecationWarning, + action=("error" if version >= ERROR_GE else "once"), + ) + + def _dec(func): + decorated_func = add_warning(func) + decorated_func.__doc__ = _create_doc_with_deprecated_directive( + func.__doc__, reason=reason, version=version + ) + return decorated_func + + return _dec + + +def will_change(*, reason, version): + """Mark functions, classes and methods as "to be changed". + + It will result in a warning being emitted when the object is called, + and the "deprecated" directive was added to the docstring. + + Parameters + ---------- + reason: str + Reason message which documents the "to be changed" in your library. + version: str + Version of your project which marks as this feature. + If you follow the `Semantic Versioning `_, + the version number has the format "MAJOR.MINOR.PATCH". + + Notes + ----- + This decorator is a thin layer over + :py:func:`deprecated.deprecated`. + + Check: __ + + """ + add_warning = _deprecated( + reason=reason, + version=version, + category=SKCriteriaFutureWarning, + action="once", + ) + + def _dec(func): + decorated_func = add_warning(func) + decorated_func.__doc__ = _create_doc_with_deprecated_directive( + func.__doc__, reason=reason, version=version + ) + return decorated_func + + return _dec diff --git a/skcriteria/utils/decorators.py b/skcriteria/utils/doctools.py similarity index 59% rename from skcriteria/utils/decorators.py rename to skcriteria/utils/doctools.py index 27db58a..78b4981 100644 --- a/skcriteria/utils/decorators.py +++ b/skcriteria/utils/doctools.py @@ -20,8 +20,6 @@ from custom_inherit import doc_inherit as _doc_inherit -from deprecated import deprecated as _deprecated - # ============================================================================= # DOC INHERITANCE # ============================================================================= @@ -39,7 +37,7 @@ def doc_inherit(parent, warn_class=True): parent : Union[str, Any] The docstring, or object of which the docstring is utilized as the parent docstring during the docstring merge. - warn_class: bool + warn_class : bool If it is true, and the decorated is a class, it throws a warning since there are some issues with inheritance of documentation in classes. @@ -64,48 +62,3 @@ def _wrapper(obj): return dec(obj) return _wrapper - - -# ============================================================================= -# Deprecation -# ============================================================================= - - -class SKCriteriaDeprecationWarning(DeprecationWarning): - """Skcriteria deprecation warning.""" - - -# _ If the version of the warning is >= ERROR_GE the action is setted to -# 'error', otherwise is 'once'. -ERROR_GE = 1.0 - - -def deprecated(*, reason, version): - """Mark functions, classes and methods as deprecated. - - It will result in a warning being emitted when the object is called, - and the "deprecated" directive was added to the docstring. - - Parameters - ---------- - reason: str - Reason message which documents the deprecation in your library. - version: str - Version of your project which deprecates this feature. - If you follow the `Semantic Versioning `_, - the version number has the format "MAJOR.MINOR.PATCH". - - Notes - ----- - This decorator is a thin layer over - :py:func:`deprecated.deprecated`. - - Check: __ - - """ - return _deprecated( - reason=reason, - version=version, - category=SKCriteriaDeprecationWarning, - action=("error" if version >= ERROR_GE else "once"), - ) diff --git a/skcriteria/utils/lp.py b/skcriteria/utils/lp.py index a0ea501..9b10de2 100644 --- a/skcriteria/utils/lp.py +++ b/skcriteria/utils/lp.py @@ -26,7 +26,7 @@ import pulp from .bunch import Bunch -from .decorators import doc_inherit +from .doctools import doc_inherit # ============================================================================= diff --git a/skcriteria/utils/unames.py b/skcriteria/utils/unames.py new file mode 100644 index 0000000..a7bc12f --- /dev/null +++ b/skcriteria/utils/unames.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised)) +# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia +# Copyright (c) 2022, QuatroPe +# All rights reserved. + +# ============================================================================= +# DOCS +# ============================================================================= + +"""Utility to achieve unique names for a collection of objects.""" + +# ============================================================================= +# IMPORTS +# ============================================================================= + +from collections import Counter + + +# ============================================================================= +# FUNCTIONS +# ============================================================================= + + +def unique_names(*, names, elements): + """Generate names unique name. + + Parameters + ---------- + elements: iterable of size n + objects to be named + names: iterable of size n + names candidates + + Returns + ------- + list of tuples: + Returns a list where each element is a tuple. + Each tuple contains two elements: The first is the unique name of + the second. + + """ + # Based on sklearn.pipeline._name_estimators + if len(names) != len(elements): + raise ValueError("'names' and 'elements' must has same length") + + names = list(reversed(names)) + elements = list(reversed(elements)) + + name_count = {k: v for k, v in Counter(names).items() if v > 1} + + named_elements = [] + for name, step in zip(names, elements): + count = name_count.get(name, 0) + if count: + name_count[name] = count - 1 + name = f"{name}_{count}" + + named_elements.append((name, step)) + + named_elements.reverse() + + return named_elements diff --git a/tests/cmp/test_ranks_cmp.py b/tests/cmp/test_ranks_cmp.py new file mode 100644 index 0000000..b177630 --- /dev/null +++ b/tests/cmp/test_ranks_cmp.py @@ -0,0 +1,550 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised)) +# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia +# Copyright (c) 2022, QuatroPe +# All rights reserved. + +# ============================================================================= +# DOCS +# ============================================================================= + +"""test for skcriteria.cmp.ranks_cmp + +""" + + +# ============================================================================= +# IMPORTS +# ============================================================================= + +from matplotlib.testing.decorators import check_figures_equal + +import numpy as np + +import pandas as pd + +import pytest + +import seaborn as sns + +from skcriteria import madm +from skcriteria.cmp import ranks_cmp + +# ============================================================================= +# TESTS +# ============================================================================= + + +def test_Ranks_only_one_rank(): + rank = madm.RankResult("test", ["a"], [1], {}) + with pytest.raises(ValueError): + ranks_cmp.mkrank_cmp(rank) + + +def test_Ranks_name_not_str(): + rank = madm.RankResult("test", ["a"], [1], {}) + with pytest.raises(ValueError): + ranks_cmp.RanksComparator([("a", rank), (1, rank)]) + + +def test_Ranks_not_rank_result(): + rank = madm.RankResult("test", ["a"], [1], {}) + with pytest.raises(TypeError): + ranks_cmp.RanksComparator([("a", rank), ("b", None)]) + + +def test_Ranks_duplicated_names(): + rank = madm.RankResult("test", ["a"], [1], {}) + with pytest.raises(ValueError): + ranks_cmp.RanksComparator([("a", rank), ("a", rank)]) + + +def test_RanksComparator_missing_alternatives(): + rank0 = madm.RankResult("test", ["a"], [1], {}) + rank1 = madm.RankResult("test", ["a", "b"], [1, 2], {}) + with pytest.raises(ValueError): + ranks_cmp.mkrank_cmp(rank0, rank1) + + +def test_RanksComparator_repr(): + rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rcmp = ranks_cmp.mkrank_cmp(rank0, rank1) + assert repr(rcmp) == "" + + +@pytest.mark.parametrize("untied", [True, False]) +def test_RanksComparator_to_dataframe(untied): + rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + df = ranks_cmp.mkrank_cmp(rank0, rank1).to_dataframe(untied=untied) + + expected = pd.DataFrame.from_dict( + { + "test_1": {"a": 1, "b": 2 if untied else 1}, + "test_2": {"a": 1, "b": 2 if untied else 1}, + } + ) + + expected.columns.name = "Method" + expected.index.name = "Alternatives" + + pd.testing.assert_frame_equal(df, expected) + + +@pytest.mark.parametrize("untied", [True, False]) +def test_RanksComparator_cov(untied): + rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + cov = ranks_cmp.mkrank_cmp(rank0, rank1).cov(untied=untied) + + expected = pd.DataFrame.from_dict( + { + "test_1": {"test_1": 0.5, "test_2": 0.5} + if untied + else {"test_1": 0.0, "test_2": 0.0}, + "test_2": {"test_1": 0.5, "test_2": 0.5} + if untied + else {"test_1": 0.0, "test_2": 0.0}, + }, + ) + + expected.columns.name = "Method" + expected.index.name = "Method" + + pd.testing.assert_frame_equal(cov, expected) + + +@pytest.mark.parametrize("untied", [True, False]) +def test_RanksComparator_corr(untied): + rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + corr = ranks_cmp.mkrank_cmp(rank0, rank1).corr(untied=untied) + + expected = pd.DataFrame.from_dict( + { + "test_1": {"test_1": 1.0, "test_2": 1.0} + if untied + else {"test_1": np.nan, "test_2": np.nan}, + "test_2": {"test_1": 1.0, "test_2": 1.0} + if untied + else {"test_1": np.nan, "test_2": np.nan}, + }, + ) + + expected.columns.name = "Method" + expected.index.name = "Method" + + pd.testing.assert_frame_equal(corr, expected) + + +@pytest.mark.parametrize("untied", [True, False]) +def test_RanksComparator_r2_score(untied): + rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + r2 = ranks_cmp.mkrank_cmp(rank0, rank1).r2_score(untied=untied) + + expected = pd.DataFrame.from_dict( + { + "test_1": {"test_1": 1.0, "test_2": 1.0}, + "test_2": {"test_1": 1.0, "test_2": 1.0}, + }, + ) + + expected.columns.name = "Method" + expected.index.name = "Method" + + pd.testing.assert_frame_equal(r2, expected) + + +@pytest.mark.parametrize("untied", [True, False]) +def test_RanksComparator_distance(untied): + rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + dis = ranks_cmp.mkrank_cmp(rank0, rank1).distance(untied=untied) + + expected = pd.DataFrame.from_dict( + { + "test_1": {"test_1": 0.0, "test_2": 0.0}, + "test_2": {"test_1": 0.0, "test_2": 0.0}, + }, + ) + + expected.columns.name = "Method" + expected.index.name = "Method" + + pd.testing.assert_frame_equal(dis, expected) + + +def test_RanksComparator_len(): + rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + assert len(ranks_cmp.mkrank_cmp(rank0, rank1)) == 2 + + +def test_RanksComparator_getitem(): + rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rcmp = ranks_cmp.mkrank_cmp(rank0, rank1) + copy = rcmp[0:] + + assert rank0 == rcmp["test_1"] == rcmp[0] == copy[0] + assert rank1 == rcmp["test_2"] == rcmp[1] == copy[1] + + with pytest.raises(ValueError): + rcmp[0::2] + + with pytest.raises(KeyError): + rcmp[object] + + +def test_RanksComparator_hash(): + rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rcmp = ranks_cmp.mkrank_cmp(rank0, rank1) + assert id(rcmp) == hash(rcmp) + + +def test_RanksComparator_plot(): + rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rcmp = ranks_cmp.mkrank_cmp(rank0, rank1) + + assert isinstance(rcmp.plot, ranks_cmp.RanksComparatorPlotter) + assert rcmp.plot._ranks_cmp is rcmp + + +# ============================================================================= +# RanksComparatorPlotter +# ============================================================================= + + +@pytest.mark.slow +@pytest.mark.parametrize("untied", [True, False]) +@check_figures_equal() +def test_RanksComparatorPlotter_flow(fig_test, fig_ref, untied): + test_ax = fig_test.subplots() + + rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rcmp = ranks_cmp.mkrank_cmp(rank0, rank1) + + rcmp.plot.flow(ax=test_ax, untied=untied) + + # EXPECTED + exp_ax = fig_ref.subplots() + + expected = pd.DataFrame.from_dict( + { + "test_1": {"a": 1, "b": 2 if untied else 1}, + "test_2": {"a": 1, "b": 2 if untied else 1}, + } + ) + expected.columns.name = "Method" + expected.index.name = "Alternatives" + + sns.lineplot(data=expected.T, estimator=None, sort=False, ax=exp_ax) + exp_ax.grid(alpha=0.3) + + exp_ax.set_ylabel(ranks_cmp.RANKS_LABELS[untied]) + + +@pytest.mark.slow +@pytest.mark.parametrize("untied", [True, False]) +@check_figures_equal() +def test_RanksComparatorPlotter_reg(fig_test, fig_ref, untied): + test_ax = fig_test.subplots() + + rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rcmp = ranks_cmp.mkrank_cmp(rank0, rank1) + + rcmp.plot.reg(ax=test_ax, untied=untied) + + # EXPECTED + exp_ax = fig_ref.subplots() + + expected = pd.DataFrame.from_dict( + { + "test_1": {"a": 1, "b": 2 if untied else 1}, + "test_2": {"a": 1, "b": 2 if untied else 1}, + } + ) + expected.columns.name = "Method" + expected.index.name = "Alternatives" + + sns.regplot( + x="test_1", + y="test_2", + data=expected, + label="x=test_1, y=test_2 - $R^2=1$", + ax=exp_ax, + ) + + ranks_label = ranks_cmp.RANKS_LABELS[untied] + exp_ax.set(xlabel=f"'x' {ranks_label}", ylabel=f"'y' {ranks_label}") + + exp_ax.legend() + + +@pytest.mark.parametrize("untied", [True, False]) +def test_RanksComparatorPlotter_reg_unexpected_keyword_argument_color(untied): + rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rcmp = ranks_cmp.mkrank_cmp(rank0, rank1) + + with pytest.raises(TypeError): + rcmp.plot.reg(color="k", untied=untied) + + +@pytest.mark.slow +@pytest.mark.parametrize("untied", [True, False]) +@check_figures_equal() +def test_RanksComparatorPlotter_heatmap(fig_test, fig_ref, untied): + test_ax = fig_test.subplots() + + rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rcmp = ranks_cmp.mkrank_cmp(rank0, rank1) + + rcmp.plot.heatmap(ax=test_ax, untied=untied) + + # EXPECTED + exp_ax = fig_ref.subplots() + + expected = pd.DataFrame.from_dict( + { + "test_1": {"a": 1, "b": 2 if untied else 1}, + "test_2": {"a": 1, "b": 2 if untied else 1}, + } + ) + expected.columns.name = "Method" + expected.index.name = "Alternatives" + + sns.heatmap( + expected, + annot=True, + cbar_kws={"label": ranks_cmp.RANKS_LABELS[untied]}, + ax=exp_ax, + ) + + +@pytest.mark.slow +@pytest.mark.parametrize("untied", [True, False]) +@check_figures_equal() +def test_RanksComparatorPlotter_corr(fig_test, fig_ref, untied): + test_ax = fig_test.subplots() + + rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rcmp = ranks_cmp.mkrank_cmp(rank0, rank1) + + rcmp.plot.corr(ax=test_ax, untied=untied) + + # EXPECTED + exp_ax = fig_ref.subplots() + + expected = pd.DataFrame.from_dict( + { + "test_1": {"a": 1, "b": 2 if untied else 1}, + "test_2": {"a": 1, "b": 2 if untied else 1}, + } + ) + expected.columns.name = "Method" + expected.index.name = "Alternatives" + + sns.heatmap( + expected.corr(), + annot=True, + cbar_kws={"label": "Correlation"}, + ax=exp_ax, + ) + + +@pytest.mark.slow +@pytest.mark.parametrize("untied", [True, False]) +@check_figures_equal() +def test_RanksComparatorPlotter_cov(fig_test, fig_ref, untied): + test_ax = fig_test.subplots() + + rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rcmp = ranks_cmp.mkrank_cmp(rank0, rank1) + + rcmp.plot.cov(ax=test_ax, untied=untied) + + # EXPECTED + exp_ax = fig_ref.subplots() + + expected = pd.DataFrame.from_dict( + { + "test_1": {"a": 1, "b": 2 if untied else 1}, + "test_2": {"a": 1, "b": 2 if untied else 1}, + } + ) + expected.columns.name = "Method" + expected.index.name = "Alternatives" + + sns.heatmap( + expected.cov(), + annot=True, + cbar_kws={"label": "Covariance"}, + ax=exp_ax, + ) + + +@pytest.mark.slow +@pytest.mark.parametrize("untied", [True, False]) +@check_figures_equal() +def test_RanksComparatorPlotter_r2_score(fig_test, fig_ref, untied): + test_ax = fig_test.subplots() + + rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rcmp = ranks_cmp.mkrank_cmp(rank0, rank1) + + rcmp.plot.r2_score(ax=test_ax, untied=untied) + + # EXPECTED + exp_ax = fig_ref.subplots() + + expected = pd.DataFrame.from_dict( + { + "test_1": {"test_1": 1.0, "test_2": 1.0}, + "test_2": {"test_1": 1.0, "test_2": 1.0}, + }, + ) + expected.columns.name = "Method" + expected.index.name = "Method" + + sns.heatmap( + expected, + annot=True, + cbar_kws={"label": "$R^2$"}, + ax=exp_ax, + ) + + +@pytest.mark.slow +@pytest.mark.parametrize("untied", [True, False]) +@check_figures_equal() +def test_RanksComparatorPlotter_distance(fig_test, fig_ref, untied): + test_ax = fig_test.subplots() + + rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rcmp = ranks_cmp.mkrank_cmp(rank0, rank1) + + rcmp.plot.distance(ax=test_ax, untied=untied) + + # EXPECTED + exp_ax = fig_ref.subplots() + + expected = pd.DataFrame.from_dict( + { + "test_1": {"test_1": 0, "test_2": 0}, + "test_2": {"test_1": 0, "test_2": 0}, + }, + ) + expected.columns.name = "Method" + expected.index.name = "Method" + + sns.heatmap( + expected, + annot=True, + cbar_kws={"label": "Hamming distance"}, + ax=exp_ax, + ) + + +@pytest.mark.slow +@pytest.mark.parametrize("untied", [True, False]) +@pytest.mark.parametrize("orient", ["v", "h"]) +@check_figures_equal() +def test_RanksComparatorPlotter_box(fig_test, fig_ref, untied, orient): + test_ax = fig_test.subplots() + + rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rcmp = ranks_cmp.mkrank_cmp(rank0, rank1) + + rcmp.plot.box(ax=test_ax, orient=orient, untied=untied) + + # EXPECTED + exp_ax = fig_ref.subplots() + + expected = pd.DataFrame.from_dict( + { + "test_1": {"a": 1, "b": 2 if untied else 1}, + "test_2": {"a": 1, "b": 2 if untied else 1}, + } + ) + expected.columns.name = "Method" + expected.index.name = "Alternatives" + + sns.boxplot(data=expected.T, orient=orient) + + ranks_label = ranks_cmp.RANKS_LABELS[untied] + if orient in (None, "v"): + exp_ax.set_ylabel(ranks_label) + else: + exp_ax.set_xlabel(ranks_label) + + +@pytest.mark.slow +@pytest.mark.parametrize("untied", [True, False]) +@check_figures_equal() +def test_RanksComparatorPlotter_bar(fig_test, fig_ref, untied): + test_ax = fig_test.subplots() + + rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rcmp = ranks_cmp.mkrank_cmp(rank0, rank1) + + rcmp.plot.bar(ax=test_ax, untied=untied) + + # EXPECTED + exp_ax = fig_ref.subplots() + + expected = pd.DataFrame.from_dict( + { + "test_1": {"a": 1, "b": 2 if untied else 1}, + "test_2": {"a": 1, "b": 2 if untied else 1}, + } + ) + expected.columns.name = "Method" + expected.index.name = "Alternatives" + + expected.plot.bar(ax=exp_ax) + + exp_ax.set_ylabel(ranks_cmp.RANKS_LABELS[untied]) + + +@pytest.mark.slow +@pytest.mark.parametrize("untied", [True, False]) +@check_figures_equal() +def test_RanksComparatorPlotter_barh(fig_test, fig_ref, untied): + test_ax = fig_test.subplots() + + rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {}) + rcmp = ranks_cmp.mkrank_cmp(rank0, rank1) + + rcmp.plot.barh(ax=test_ax, untied=untied) + + # EXPECTED + exp_ax = fig_ref.subplots() + + expected = pd.DataFrame.from_dict( + { + "test_1": {"a": 1, "b": 2 if untied else 1}, + "test_2": {"a": 1, "b": 2 if untied else 1}, + } + ) + expected.columns.name = "Method" + expected.index.name = "Alternatives" + + expected.plot.barh(ax=exp_ax) + + exp_ax.set_xlabel(ranks_cmp.RANKS_LABELS[untied]) diff --git a/tests/conftest.py b/tests/conftest.py index 2dfca6c..438f0f6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -45,12 +45,14 @@ @pytest.fixture(scope="session") def data_values(): def make( + *, seed=None, min_alternatives=3, max_alternatives=10, min_criteria=3, max_criteria=10, min_objectives_proportion=0.5, + nan_proportion=0, ): # start the random generator @@ -71,9 +73,15 @@ def make( # create the data matrix with rows = alt and columns = crit mtx = random.random((alternatives_number, criteria_number)) - # determine the number of minimize objectives bases on the proportion - # of the total number of criteria, and the maximize is the complement + # if we have a nan ratio >0 of nan we have to add them randomly + # in the matrix + if nan_proportion: + nan_number = round(mtx.size * float(nan_proportion)) + nan_positions = random.choice(mtx.size, nan_number, replace=False) + mtx.ravel()[nan_positions] = np.nan + # determine the number of minimize objectives based on the proportion + # of the total number of criteria, and the maximize is the complement min_objectives_number = round( criteria_number * min_objectives_proportion ) @@ -118,9 +126,9 @@ def make( @pytest.fixture(scope="session") def decision_matrix(data_values): @functools.wraps(data_values) - def make(*args, **kwargs): + def make(**kwargs): mtx, objectives, weights, alternatives, criteria = data_values( - *args, **kwargs + **kwargs ) dm = core.mkdm( diff --git a/tests/core/test_data.py b/tests/core/test_data.py index 733f630..fe508b3 100644 --- a/tests/core/test_data.py +++ b/tests/core/test_data.py @@ -24,7 +24,7 @@ import pandas as pd -from pyquery import PyQuery +import pyquery import pytest @@ -37,37 +37,11 @@ def construct_iobjectives(arr): - return [data.Objective.construct_from_alias(obj).value for obj in arr] + return [data.Objective.from_alias(obj).value for obj in arr] def construct_objectives(arr): - return [data.Objective.construct_from_alias(obj) for obj in arr] - - -# ============================================================================= -# ENUM -# ============================================================================= - - -def test_objective_construct(): - for alias in data.Objective._MAX_ALIASES.value: - objective = data.Objective.construct_from_alias(alias) - assert objective is data.Objective.MAX - for alias in data.Objective._MIN_ALIASES.value: - objective = data.Objective.construct_from_alias(alias) - assert objective is data.Objective.MIN - with pytest.raises(ValueError): - data.Objective.construct_from_alias("no anda") - - -def test_objective_str(): - assert str(data.Objective.MAX) == data.Objective.MAX.name - assert str(data.Objective.MIN) == data.Objective.MIN.name - - -def test_objective_to_string(): - assert data.Objective.MAX.to_string() == data.Objective._MAX_STR.value - assert data.Objective.MIN.to_string() == data.Objective._MIN_STR.value + return [data.Objective.from_alias(obj) for obj in arr] # ============================================================================= @@ -519,12 +493,80 @@ def test_DecisionMatrix_self_ne(data_values): assert not dm.equals(other) +# ============================================================================= +# SLICES +# ============================================================================= + + +def test_DecisionMatrix__getitem__(): + dm = data.mkdm( + matrix=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], + objectives=[min, max, min], + weights=[0.1, 0.2, 0.3], + alternatives="A B C".split(), + criteria="X Y Z".split(), + ) + assert dm["X"].equals(dm[["X"]]) + + expected = data.mkdm( + matrix=[[1, 3], [4, 6], [7, 9]], + objectives=[min, min], + weights=[0.1, 0.3], + alternatives="A B C".split(), + criteria="X Z".split(), + ) + assert dm[["X", "Z"]].equals(expected) + + +def test_DecisionMatrix_loc(): + dm = data.mkdm( + matrix=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], + objectives=[min, max, min], + weights=[0.1, 0.2, 0.3], + alternatives="A B C".split(), + criteria="X Y Z".split(), + ) + assert dm.loc.name == "loc" + assert dm.loc["A"].equals(dm.loc[["A"]]) + + expected = data.mkdm( + matrix=[[1, 2, 3], [7, 8, 9]], + objectives=[min, max, min], + weights=[0.1, 0.2, 0.3], + alternatives="A C".split(), + criteria="X Y Z".split(), + ) + assert dm.loc[["A", "C"]].equals(expected) + + +def test_DecisionMatrix_iloc(): + dm = data.mkdm( + matrix=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], + objectives=[min, max, min], + weights=[0.1, 0.2, 0.3], + alternatives="A B C".split(), + criteria="X Y Z".split(), + ) + assert dm.iloc.name == "iloc" + assert dm.iloc[2].equals(dm.iloc[[2]]) + + expected = data.mkdm( + matrix=[[1, 2, 3], [7, 8, 9]], + objectives=[min, max, min], + weights=[0.1, 0.2, 0.3], + alternatives="A C".split(), + criteria="X Y Z".split(), + ) + + assert dm.iloc[[0, 2]].equals(expected) + + # ============================================================================= # REPR # ============================================================================= -def test_mksm_simple_repr(): +def test_mkdm_simple_repr(): dm = data.mkdm( matrix=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], @@ -533,10 +575,10 @@ def test_mksm_simple_repr(): ) expected = ( - " C0[\u25bc 0.1] C1[\u25b2 0.2] C2[\u25bc 0.3]\n" - "A0 1 2 3\n" - "A1 4 5 6\n" - "A2 7 8 9\n" + " C0[▼ 0.1] C1[▲ 0.2] C2[▼ 0.3]\n" + "A0 1 2 3\n" + "A1 4 5 6\n" + "A2 7 8 9\n" "[3 Alternatives x 3 Criteria]" ) @@ -544,18 +586,18 @@ def test_mksm_simple_repr(): assert result == expected -def test_simple_html(): +def test_mkdm_simple_html(): dm = data.mkdm( matrix=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], objectives=[min, max, min], weights=[0.1, 0.2, 0.3], ) - expected = PyQuery( + expected = pyquery.PyQuery( """
-