`_\ ).
-* New documentation. The quick start is totally rewritten!
-
-**Full Changelog**\ : https://github.com/quatrope/scikit-criteria/commits/0.5
-
-Version 0.2
------------
-
-First OO stable version.
-
-Version 0.1
------------
-
-Only functions.
diff --git a/docs/source/_static/css/skcriteria.css b/docs/source/_static/css/skcriteria.css
index fc056f1..614ab8d 100644
--- a/docs/source/_static/css/skcriteria.css
+++ b/docs/source/_static/css/skcriteria.css
@@ -9,6 +9,8 @@
--links: #7b8e2d;
/* Background color of jupyter input cell */
--jupyter-in-cell-bg: #e3ffd28c;
+ /* Deprecation color */
+ --deprecated: #ff7474;
}
@@ -40,6 +42,17 @@ a:hover code {
color: var(--primary) !important;
}
+span.deprecated {
+ color: var(--deprecated) !important;
+}
+
+
+/* remove the title of the first page */
+
+section#scikit-criteria-documentation>h1:nth-child(1) {
+ display: none;
+}
+
/* NAVBAR */
diff --git a/docs/source/api/cmp/index.rst b/docs/source/api/cmp/index.rst
new file mode 100644
index 0000000..8d73ab4
--- /dev/null
+++ b/docs/source/api/cmp/index.rst
@@ -0,0 +1,14 @@
+``skcriteria.cmp`` package
+============================
+
+.. automodule:: skcriteria.cmp
+ :members:
+ :undoc-members:
+ :show-inheritance:
+ :ignore-module-all:
+
+.. toctree::
+ :maxdepth: 2
+ :glob:
+
+ *
\ No newline at end of file
diff --git a/docs/source/api/cmp/ranks_cmp.rst b/docs/source/api/cmp/ranks_cmp.rst
new file mode 100644
index 0000000..104bf98
--- /dev/null
+++ b/docs/source/api/cmp/ranks_cmp.rst
@@ -0,0 +1,7 @@
+``skcriteria.cmp.ranks_cmp`` module
+===================================
+
+.. automodule:: skcriteria.cmp.ranks_cmp
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/api/core/objectives.rst b/docs/source/api/core/objectives.rst
new file mode 100644
index 0000000..a29bfa2
--- /dev/null
+++ b/docs/source/api/core/objectives.rst
@@ -0,0 +1,7 @@
+``skcriteria.core.objectives`` module
+=====================================
+
+.. automodule:: skcriteria.core.objectives
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/api/datasets/index.rst b/docs/source/api/datasets/index.rst
new file mode 100644
index 0000000..618393a
--- /dev/null
+++ b/docs/source/api/datasets/index.rst
@@ -0,0 +1,14 @@
+``skcriteria.datasets`` package
+===============================
+
+.. automodule:: skcriteria.datasets
+ :members:
+ :undoc-members:
+ :show-inheritance:
+ :ignore-module-all:
+
+.. .. toctree::
+.. :maxdepth: 2
+.. :glob:
+
+.. *
\ No newline at end of file
diff --git a/docs/source/api/index.rst b/docs/source/api/index.rst
index 06a96c4..f5a7fff 100644
--- a/docs/source/api/index.rst
+++ b/docs/source/api/index.rst
@@ -26,6 +26,18 @@
preprocessing/index
+.. toctree::
+ :maxdepth: 2
+
+ cmp/index
+
+
+.. toctree::
+ :maxdepth: 2
+
+ datasets/index
+
+
.. toctree::
:maxdepth: 2
diff --git a/docs/source/api/madm/_base.rst b/docs/source/api/madm/_base.rst
deleted file mode 100644
index 93cd101..0000000
--- a/docs/source/api/madm/_base.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-``skcriteria.madm._base`` module
-==================================
-
-.. automodule:: skcriteria.madm._base
- :members:
- :undoc-members:
- :show-inheritance:
\ No newline at end of file
diff --git a/docs/source/api/madm/_madm_base.rst b/docs/source/api/madm/_madm_base.rst
new file mode 100644
index 0000000..eb59834
--- /dev/null
+++ b/docs/source/api/madm/_madm_base.rst
@@ -0,0 +1,7 @@
+``skcriteria.madm._madm_base`` module
+=====================================
+
+.. automodule:: skcriteria.madm._madm_base
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/source/api/preprocessing/_preprocessing_base.rst b/docs/source/api/preprocessing/_preprocessing_base.rst
new file mode 100644
index 0000000..2088654
--- /dev/null
+++ b/docs/source/api/preprocessing/_preprocessing_base.rst
@@ -0,0 +1,7 @@
+``skcriteria.preprocessing._preprocessing_base`` module
+=======================================================
+
+.. automodule:: skcriteria.preprocessing._preprocessing_base
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/docs/source/api/preprocessing/distance.rst b/docs/source/api/preprocessing/distance.rst
index a359697..a3cbedf 100644
--- a/docs/source/api/preprocessing/distance.rst
+++ b/docs/source/api/preprocessing/distance.rst
@@ -1,6 +1,9 @@
``skcriteria.preprocessing.distance`` module
============================================
+.. warning::
+ This module is deprecated.
+
.. automodule:: skcriteria.preprocessing.distance
:members:
:undoc-members:
diff --git a/docs/source/api/preprocessing/impute.rst b/docs/source/api/preprocessing/impute.rst
new file mode 100644
index 0000000..4ce3db7
--- /dev/null
+++ b/docs/source/api/preprocessing/impute.rst
@@ -0,0 +1,7 @@
+``skcriteria.preprocessing.impute`` module
+===========================================
+
+.. automodule:: skcriteria.preprocessing.impute
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/api/utils/decorators.rst b/docs/source/api/utils/cmanagers.rst
similarity index 53%
rename from docs/source/api/utils/decorators.rst
rename to docs/source/api/utils/cmanagers.rst
index 390ed5c..10a605e 100644
--- a/docs/source/api/utils/decorators.rst
+++ b/docs/source/api/utils/cmanagers.rst
@@ -1,7 +1,7 @@
-``skcriteria.utils.decorators`` module
+``skcriteria.utils.cmanagers`` module
======================================
-.. automodule:: skcriteria.utils.decorators
+.. automodule:: skcriteria.utils.cmanagers
:members:
:undoc-members:
:show-inheritance:
diff --git a/docs/source/api/utils/deprecate.rst b/docs/source/api/utils/deprecate.rst
new file mode 100644
index 0000000..b59cc6f
--- /dev/null
+++ b/docs/source/api/utils/deprecate.rst
@@ -0,0 +1,7 @@
+``skcriteria.utils.deprecate`` module
+======================================
+
+.. automodule:: skcriteria.utils.deprecate
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/api/utils/doctools.rst b/docs/source/api/utils/doctools.rst
new file mode 100644
index 0000000..2936bb9
--- /dev/null
+++ b/docs/source/api/utils/doctools.rst
@@ -0,0 +1,7 @@
+``skcriteria.utils.doctools`` module
+======================================
+
+.. automodule:: skcriteria.utils.doctools
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/api/utils/unames.rst b/docs/source/api/utils/unames.rst
new file mode 100644
index 0000000..312f0c2
--- /dev/null
+++ b/docs/source/api/utils/unames.rst
@@ -0,0 +1,7 @@
+``skcriteria.utils.unames`` module
+=====================================
+
+.. automodule:: skcriteria.utils.unames
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 55889c6..fcc0fc6 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -79,7 +79,7 @@
numpydoc_class_members_toctree = False
-nbsphinx_execute = "always"
+nbsphinx_execute = "never"
nbsphinx_allow_errors = True
@@ -97,7 +97,9 @@
# General information about the project.
project = skcriteria.NAME
-copyright = "2016-2022, Juan B. Cabral - Nadia A. Luczywo"
+copyright = "2016-2021, Juan B. Cabral - Nadia A. Luczywo - Copyright (c) 2022, QuatroPe"
+
+
author = "Juan BC"
# The version info for the project you're documenting, acts as replacement for
@@ -114,7 +116,7 @@
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
-language = None
+language = "en"
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
@@ -237,10 +239,10 @@
# =============================================================================
-# INJECT REAMDE INTO THE RESTRUCTURED TEXT
+# INJECT README INTO THE RESTRUCTURED TEXT
# =============================================================================
-import m2r
+import m2r2
DYNAMIC_RST = {
"README.md": "README.rst",
@@ -256,7 +258,7 @@
with open(rst_path, "w") as fp:
fp.write(".. FILE AUTO GENERATED !! \n")
- fp.write(m2r.convert(readme_md))
+ fp.write(m2r2.convert(readme_md))
print(f"{md_path} -> {rst_path} regenerated!")
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 78520c2..0a180e6 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -3,6 +3,9 @@
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
+Scikit-Criteria Documentation
+=============================
+
.. image:: _static/logo_medium.png
:align: center
:scale: 100 %
diff --git a/docs/source/refs.bib b/docs/source/refs.bib
index f0349de..ec27beb 100644
--- a/docs/source/refs.bib
+++ b/docs/source/refs.bib
@@ -161,4 +161,65 @@ @article{simon1955behavioral
pages = {99--118},
year = {1955},
publisher = {MIT Press}
+}
+
+% dominance
+
+@misc{enwiki:1107297090,
+ author = {{Wikipedia contributors}},
+ title = {Pareto front --- {Wikipedia}{,} The Free Encyclopedia},
+ year = {2022},
+ howpublished = {\url{https://en.wikipedia.org/w/index.php?title=Pareto_front&oldid=1107297090}},
+ note = {[Online; accessed 9-October-2022]}
+}
+
+ @misc{enwiki:1110412520,
+ author = {{Wikipedia contributors}},
+ title = {Pareto efficiency --- {Wikipedia}{,} The Free Encyclopedia},
+ year = {2022},
+ howpublished = {\url{https://en.wikipedia.org/w/index.php?title=Pareto_efficiency&oldid=1110412520}},
+ note = {[Online; accessed 9-October-2022]}
+}
+
+% UTILS
+
+@misc{enwiki:1114075000,
+ author = {{Wikipedia contributors}},
+ title = {Singleton pattern --- {Wikipedia}{,} The Free Encyclopedia},
+ year = {2022},
+ url = {https://en.wikipedia.org/w/index.php?title=Singleton_pattern&oldid=1114075000},
+ note = {[Online; accessed 12-October-2022]}
+}
+
+ @book{gamma1995design,
+ title = {Design patterns: elements of reusable object-oriented software},
+ author = {Gamma, Erich and Helm, Richard and Johnson, Ralph and Johnson, Ralph E and Vlissides, John and others},
+ year = {1995},
+ publisher = {Pearson Deutschland GmbH}
+}
+
+
+% DATASETS
+
+@article{van2021evaluation,
+ title = {Evaluation of the importance of criteria for the selection of cryptocurrencies},
+ author = {Van Heerden, Natalia A and Cabral, Juan B and Luczywo, Nadia},
+ journal = {arXiv preprint arXiv:2109.00130},
+ year = {2021}
+}
+
+@inproceedings{van2021epio_evaluation,
+ title = {Evaluaci{\'o}n de la importancia de criterios para la selecci{\'o}n de criptomonedas},
+ author = {Van Heerden, Natalia A and Cabral, Juan B and Luczywo, Nadia},
+ booktitle = {XXXIV ENDIO - XXXII EPIO Virtual 2021.},
+ year = {2021}
+}
+
+@misc{rajkumar_2021,
+ title = {Cryptocurrency historical prices},
+ url = {https://www.kaggle.com/sudalairajkumar/cryptocurrencypricehistory},
+ journal = {Kaggle},
+ author = {Rajkumar, Sudalai},
+ year = {2021},
+ month = {Jul}
}
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 3962b27..fa19155 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,3 +10,67 @@ markers = [
testpaths = [
"tests",
]
+
+# =============================================================================
+# PACKAGING
+# =============================================================================
+
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "scikit-criteria"
+version = "0.8.rc1"
+authors = [{name = "Juan B Cabral & QuatroPe", email = "jbcabral@unc.edu.ar"}]
+readme = "README.md"
+license = {file = "LICENSE.txt"}
+description = "Scikit-Criteria is a collections of algorithms, methods and techniques for multiple-criteria decision analysis."
+keywords = [
+ "muticriteria",
+ "mcda",
+ "mcdm",
+ "weightedsum",
+ "weightedproduct",
+ "simus",
+ "topsis",
+ "moora",
+ "electre",
+ "critic",
+ "entropy",
+ "dominance",
+]
+classifiers = [
+ "Development Status :: 4 - Beta",
+ "Intended Audience :: Education",
+ "Intended Audience :: Science/Research",
+ "Operating System :: OS Independent",
+ "Programming Language :: Python",
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: Implementation :: CPython",
+ "Topic :: Scientific/Engineering",
+]
+urls = { Homepage = "https://scikit-criteria.quatrope.org/", Repository = "https://github.com/quatrope/scikit-criteria" }
+dependencies = [
+ "numpy",
+ "pandas",
+ "scipy",
+ "jinja2",
+ "custom_inherit",
+ "seaborn",
+ "pulp",
+ "Deprecated",
+ "scikit-learn",
+ "matplotlib!=3.6.1",
+]
+
+[tool.setuptools]
+include-package-data = true
+
+[tool.setuptools.packages.find]
+include = ["skcriteria", "skcriteria.*"]
+namespaces = false
diff --git a/requirements_dev.txt b/requirements_dev.txt
new file mode 100644
index 0000000..c1f9f63
--- /dev/null
+++ b/requirements_dev.txt
@@ -0,0 +1,18 @@
+tox
+ipdb
+pytest
+pytest-ordering
+pyquery
+
+flake8
+flake8-import-order
+flake8-black
+flake8-builtins
+
+coverage
+pytest-cov
+
+pydocstyle
+toml
+
+https://github.com/quatrope/qafan/archive/refs/heads/master.zip
\ No newline at end of file
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 7aefd3d..0000000
--- a/setup.py
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised))
-# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia
-# Copyright (c) 2022, QuatroPe
-# All rights reserved.
-
-# =============================================================================
-# DOCS
-# =============================================================================
-
-"""This file is for distribute scikit-criteria
-
-"""
-
-
-# =============================================================================
-# IMPORTS
-# =============================================================================
-
-
-import os
-import pathlib
-
-from setuptools import find_packages, setup
-
-os.environ["__SKCRITERIA_IN_SETUP__"] = "True"
-import skcriteria # noqa
-
-# =============================================================================
-# CONSTANTS
-# =============================================================================
-
-REQUIREMENTS = [
- "numpy",
- "pandas",
- "pyquery",
- "scipy",
- "jinja2",
- "custom_inherit",
- "seaborn",
- "pulp",
- "Deprecated",
-]
-
-PATH = pathlib.Path(os.path.abspath(os.path.dirname(__file__)))
-
-with open(PATH / "README.md") as fp:
- LONG_DESCRIPTION = fp.read()
-
-
-# =============================================================================
-# FUNCTIONS
-# =============================================================================
-
-
-def do_setup():
- setup(
- name="scikit-criteria",
- version=skcriteria.VERSION,
- description=skcriteria.DOC,
- long_description=LONG_DESCRIPTION,
- long_description_content_type="text/markdown",
- author="QuatroPe",
- author_email="jbcabral@unc.edu.ar",
- url="http://scikit-criteria.org/",
- license="3 Clause BSD",
- keywords=[
- "muticriteria",
- "mcda",
- "mcdm",
- "weightedsum",
- "weightedproduct",
- "simus",
- "topsis",
- "moora",
- "electre",
- "critic",
- "entropy",
- "dominance",
- ],
- classifiers=[
- "Development Status :: 4 - Beta",
- "Intended Audience :: Education",
- "Intended Audience :: Science/Research",
- "License :: OSI Approved :: BSD License",
- "Operating System :: OS Independent",
- "Programming Language :: Python",
- "Programming Language :: Python :: 3",
- "Programming Language :: Python :: 3.7",
- "Programming Language :: Python :: 3.8",
- "Programming Language :: Python :: 3.9",
- "Programming Language :: Python :: 3.10",
- "Programming Language :: Python :: Implementation :: CPython",
- "Topic :: Scientific/Engineering",
- ],
- packages=[
- pkg for pkg in find_packages() if pkg.startswith("skcriteria")
- ],
- install_requires=REQUIREMENTS,
- )
-
-
-if __name__ == "__main__":
- do_setup()
diff --git a/skcriteria/__init__.py b/skcriteria/__init__.py
index 6d04dae..e6350b9 100644
--- a/skcriteria/__init__.py
+++ b/skcriteria/__init__.py
@@ -16,24 +16,26 @@
# IMPORTS
# =============================================================================
-import os
+import importlib.metadata
-if os.getenv("__SKCRITERIA_IN_SETUP__") != "True":
- from .core import DecisionMatrix, Objective, mkdm
-
-del os
+from . import datasets
+from .core import DecisionMatrix, Objective, mkdm
# =============================================================================
# CONSTANTS
# =============================================================================
-__all__ = ["mkdm", "DecisionMatrix", "Objective"]
+__all__ = ["mkdm", "DecisionMatrix", "Objective", "datasets"]
-__version__ = ("0", "7")
NAME = "scikit-criteria"
DOC = __doc__
-VERSION = ".".join(__version__)
+VERSION = importlib.metadata.version(NAME)
+
+__version__ = tuple(VERSION.split("."))
+
+
+del importlib
diff --git a/skcriteria/cmp/__init__.py b/skcriteria/cmp/__init__.py
new file mode 100644
index 0000000..5996aef
--- /dev/null
+++ b/skcriteria/cmp/__init__.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised))
+# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia
+# Copyright (c) 2022, QuatroPe
+# All rights reserved.
+
+# =============================================================================
+# DOCS
+# =============================================================================
+
+"""Utilities for a-posteriori analysis of experiments."""
+
+# =============================================================================
+# IMPORTS
+# =============================================================================
+
+from .ranks_cmp import RanksComparator, mkrank_cmp
+
+# =============================================================================
+# ALL
+# =============================================================================
+
+__all__ = [
+ "RanksComparator",
+ "mkrank_cmp",
+]
diff --git a/skcriteria/cmp/ranks_cmp.py b/skcriteria/cmp/ranks_cmp.py
new file mode 100644
index 0000000..fa52e2d
--- /dev/null
+++ b/skcriteria/cmp/ranks_cmp.py
@@ -0,0 +1,780 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised))
+# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia
+# Copyright (c) 2022, QuatroPe
+# All rights reserved.
+
+# =============================================================================
+# DOCS
+# =============================================================================
+
+"""Ranking comparison routines."""
+
+# =============================================================================
+# IMPORTS
+# =============================================================================
+
+import functools
+import itertools as it
+from collections import defaultdict
+
+import matplotlib.pyplot as plt
+
+import pandas as pd
+
+from scipy.spatial import distance
+
+import seaborn as sns
+
+from sklearn import metrics as _skl_metrics
+
+from ..core import SKCMethodABC
+from ..madm import RankResult
+from ..utils import AccessorABC, Bunch, unique_names
+
+
+# =============================================================================
+# CONSTANTS
+# =============================================================================
+
+RANKS_LABELS = {
+ True: "Untied ranks (lower is better)",
+ False: "Ranks (lower is better)",
+}
+
+
+# =============================================================================
+# COMPARATOR
+# =============================================================================
+
+
+class RanksComparator(SKCMethodABC):
+ """Rankings comparator object.
+
+ This class is intended to contain a collection of rankings on which you
+ want to do comparative analysis.
+
+ All rankings must have exactly the same alternatives, although their order
+ may vary.
+
+ All methods support the ``untied`` parameter, which serves to untie
+ rankings in case there are results that can assign more than one
+ alternative to the same position (e.g.``ELECTRE2``).
+
+ Parameters
+ ----------
+ ranks : list
+ List of (name, ranking) tuples of ``skcriteria.madm.RankResult``
+ with the same alternatives.
+
+ See Also
+ --------
+ skcriteria.cmp.mkrank_cmp : Convenience function for simplified
+ ranks comparator construction.
+
+ """
+
+ _skcriteria_dm_type = "ranks_comparator"
+ _skcriteria_parameters = ["ranks"]
+
+ def __init__(self, ranks):
+ ranks = list(ranks)
+ self._validate_ranks(ranks)
+ self._ranks = ranks
+
+ # INTERNALS ===============================================================
+ def _validate_ranks(self, ranks):
+
+ if len(ranks) <= 1:
+ raise ValueError("Please provide more than one ranking")
+
+ used_names = set()
+ first_alternatives = set(ranks[0][1].alternatives)
+ for name, part in ranks:
+
+ if not isinstance(name, str):
+ raise ValueError("'name' must be instance of str")
+
+ if not isinstance(part, RankResult):
+ raise TypeError("ranks must be instance of madm.RankResult")
+
+ if name in used_names:
+ raise ValueError(f"Duplicated name {name!r}")
+ used_names.add(name)
+
+ diff = first_alternatives.symmetric_difference(part.alternatives)
+ if diff:
+ miss_str = ", ".join(diff)
+ raise ValueError(
+ f"Some ranks miss the alternative/s: {miss_str!r}"
+ )
+
+ # PROPERTIES ==============================================================
+ @property
+ def ranks(self):
+ """List of ranks in the comparator."""
+ return list(self._ranks)
+
+ @property
+ def named_ranks(self):
+ """Dictionary-like object, with the following attributes.
+
+ Read-only attribute to access any rank parameter by user given name.
+ Keys are ranks names and values are rannks parameters.
+
+ """
+ return Bunch("ranks", dict(self.ranks))
+
+ # MAGIC! ==================================================================
+
+ def __repr__(self):
+ """x.__repr__() <==> repr(x)."""
+ cls_name = type(self).__name__
+ ranks_names = [rn for rn, _ in self._ranks]
+ return f"<{cls_name} [ranks={ranks_names!r}]>"
+
+ def __len__(self):
+ """Return the number of rankings to compare."""
+ return len(self._ranks)
+
+ def __getitem__(self, ind):
+ """Return a sub-comparator or a single ranking in the pipeline.
+
+ Indexing with an integer will return an ranking; using a slice
+ returns another RankComparator instance which copies a slice of this
+ RankComparator. This copy is shallow: modifying ranks in the
+ sub-comparator will affect the larger pipeline and vice-versa.
+ However, replacing a value in `step` will not affect a copy.
+
+ """
+ if isinstance(ind, slice):
+ if ind.step not in (1, None):
+ cname = type(self).__name__
+ raise ValueError(f"{cname} slicing only supports a step of 1")
+ return self.__class__(self.ranks[ind])
+ elif isinstance(ind, int):
+ return self._ranks[ind][-1]
+ elif isinstance(ind, str):
+ return self.named_ranks[ind]
+ raise KeyError(ind)
+
+ def __hash__(self):
+ """x.__hash__() <==> hash(x)."""
+ return id(self)
+
+ # TO DATA =================================================================
+
+ def to_dataframe(self, *, untied=False):
+ """Convert the entire RanksComparator into a dataframe.
+
+ The alternatives are the rows, and the different rankings are the
+ columns.
+
+ Parameters
+ ----------
+ untied: bool, default ``False``
+ If it is ``True`` and any ranking has ties, the
+ ``RankResult.untied_rank_`` property is used to assign each
+ alternative a single ranked order. On the other hand, if it is
+ ``False`` the rankings are used as they are.
+
+ Returns
+ -------
+ :py:class:`pd.DataFrame`
+ A RanksComparator as pandas DataFrame.
+
+ """
+ columns = {
+ rank_name: rank.to_series(untied=untied)
+ for rank_name, rank in self._ranks
+ }
+
+ df = pd.DataFrame.from_dict(columns)
+ df.columns.name = "Method"
+
+ return df
+
+ def corr(self, *, untied=False, **kwargs):
+ """Compute pairwise correlation of rankings, excluding NA/null values.
+
+ By default the pearson correlation coefficient is used.
+
+ Please check the full documentation of a ``pandas.DataFrame.corr()``
+ method for details about the implementation.
+
+ Parameters
+ ----------
+ untied: bool, default ``False``
+ If it is ``True`` and any ranking has ties, the
+ ``RankResult.untied_rank_`` property is used to assign each
+ alternative a single ranked order. On the other hand, if it is
+ ``False`` the rankings are used as they are.
+ kwargs:
+ Other keyword arguments are passed to the
+ ``pandas.DataFrame.corr()`` method.
+
+ Returns
+ -------
+ :py:class:`pd.DataFrame`
+ A DataFrame with the correlation between rankings.
+
+ """
+ return self.to_dataframe(untied=untied).corr(**kwargs)
+
+ def cov(self, *, untied=False, **kwargs):
+ """Compute pairwise covariance of rankings, excluding NA/null values.
+
+ Please check the full documentation of a ``pandas.DataFrame.cov()``
+ method for details about the implementation.
+
+ Parameters
+ ----------
+ untied: bool, default ``False``
+ If it is ``True`` and any ranking has ties, the
+ ``RankResult.untied_rank_`` property is used to assign each
+ alternative a single ranked order. On the other hand, if it is
+ ``False`` the rankings are used as they are.
+ kwargs:
+ Other keyword arguments are passed to the
+ ``pandas.DataFrame.cov()`` method.
+
+ Returns
+ -------
+ :py:class:`pd.DataFrame`
+ A DataFrame with the covariance between rankings.
+
+ """
+ return self.to_dataframe(untied=untied).cov(**kwargs)
+
+ def r2_score(self, *, untied=False, **kwargs):
+ """Compute pairwise coefficient of determination regression score \
+ function of rankings, excluding NA/null values.
+
+ Best possible score is 1.0 and it can be negative (because the
+ model can be arbitrarily worse).
+
+ Please check the full documentation of a ``sklearn.metrics.r2_score``
+ function for details about the implementation and the behaviour.
+
+ Parameters
+ ----------
+ untied: bool, default ``False``
+ If it is ``True`` and any ranking has ties, the
+ ``RankResult.untied_rank_`` property is used to assign each
+ alternative a single ranked order. On the other hand, if it is
+ ``False`` the rankings are used as they are.
+ kwargs:
+ Other keyword arguments are passed to the
+ ``sklearn.metrics.r2_score()`` function.
+
+ Returns
+ -------
+ :py:class:`pd.DataFrame`
+ A DataFrame with the coefficient of determination between rankings.
+
+ """
+ df = self.to_dataframe(untied=untied)
+ # here we are going to create a dict of dict
+ rows = defaultdict(dict)
+
+ # combine the methods pairwise
+ for r0, r1 in it.combinations(df.columns, 2):
+ r2_score = _skl_metrics.r2_score(df[r0], df[r1], **kwargs)
+
+ # add the metrics in both directions
+ rows[r0][r1] = r2_score
+ rows[r1][r0] = r2_score
+
+ # create the dataframe and change the nan for 1 (perfect R2)
+ r2_df = pd.DataFrame.from_dict(rows).fillna(1)
+ r2_df = r2_df[df.columns].loc[df.columns]
+
+ r2_df.index.name = "Method"
+ r2_df.columns.name = "Method"
+
+ return r2_df
+
+ def distance(self, *, untied=False, metric="hamming", **kwargs):
+ """Compute pairwise distance between rankings.
+
+ By default the 'hamming' distance is used, which is simply the
+ proportion of disagreeing components in Two rankings.
+
+ Please check the full documentation of a
+ ``scipy.spatial.distance.pdist`` function for details about the
+ implementation and the behaviour.
+
+ Parameters
+ ----------
+ untied: bool, default ``False``
+ If it is ``True`` and any ranking has ties, the
+ ``RankResult.untied_rank_`` property is used to assign each
+ alternative a single ranked order. On the other hand, if it is
+ ``False`` the rankings are used as they are.
+ metric: str or function, default ``"hamming"``
+ The distance metric to use. The distance function can
+ be 'braycurtis', 'canberra', 'chebyshev', 'cityblock',
+ 'correlation', 'cosine', 'dice', 'euclidean', 'hamming',
+ 'jaccard', 'jensenshannon', 'kulczynski1',
+ 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto',
+ 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath',
+ 'sqeuclidean', 'yule'.
+ kwargs:
+ Other keyword arguments are passed to the
+ ``scipy.spatial.distance.pdist()`` function.
+
+ Returns
+ -------
+ :py:class:`pd.DataFrame`
+ A DataFrame with the distance between rankings.
+
+ """
+ df = self.to_dataframe(untied=untied).T
+ dis_array = distance.pdist(df, metric=metric, **kwargs)
+ dis_mtx = distance.squareform(dis_array)
+ dis_df = pd.DataFrame(
+ dis_mtx, columns=df.index.copy(), index=df.index.copy()
+ )
+ return dis_df
+
+ # ACCESSORS (YES, WE USE CACHED PROPERTIES IS THE EASIEST WAY) ============
+
+ @property
+ @functools.lru_cache(maxsize=None)
+ def plot(self):
+ """Plot accessor."""
+ return RanksComparatorPlotter(self)
+
+
+# =============================================================================
+# PLOTTER
+# =============================================================================
+
+
+class RanksComparatorPlotter(AccessorABC):
+ """RanksComparator plot utilities.
+
+ Kind of plot to produce:
+
+ - 'flow' : Changes in the rankings of the alternatives as flow lines
+ (default)
+ - 'reg' : Pairwise rankings data and a linear regression model fit plot.
+ - 'heatmap' : Rankings as a color-encoded matrix.
+ - 'corr' : Pairwise correlation of rankings as a color-encoded matrix.
+ - 'cov' : Pairwise covariance of rankings as a color-encoded matrix.
+ - 'r2_score' : Pairwise coefficient of determination regression score \
+ function of rankings as a color-encoded matrix.
+ - 'distance' : Pairwise distance between rankings as a color-encoded \
+ matrix.
+ - 'box' : Box-plot of rankings with respect to alternatives
+ - 'bar' : Ranking of alternatives by method with vertical bars.
+ - 'barh' : Ranking of alternatives by method with horizontal bars.
+
+ """
+
+ _default_kind = "flow"
+
+ def __init__(self, ranks_cmp):
+ self._ranks_cmp = ranks_cmp
+
+ # MANUAL MADE PLOT ========================================================
+ # These plots have a much more manually orchestrated code.
+
+ def flow(self, *, untied=False, grid_kws=None, **kwargs):
+ """Represents changes in the rankings of the alternatives as lines \
+ flowing through the ranking-methods.
+
+ Parameters
+ ----------
+ untied: bool, default ``False``
+ If it is ``True`` and any ranking has ties, the
+ ``RankResult.untied_rank_`` property is used to assign each
+ alternative a single ranked order. On the other hand, if it is
+ ``False`` the rankings are used as they are.
+ grid_kws: dict or None
+ Dict with keyword arguments passed to
+ ``matplotlib.axes.plt.Axes.grid``
+ kwargs:
+ Other keyword arguments are passed to the ``seaborn.lineplot()``
+ function. except for data, estimator and sort.
+
+ Returns
+ -------
+ matplotlib.axes.Axes or numpy.ndarray of them
+
+ """
+ df = self._ranks_cmp.to_dataframe(untied=untied)
+
+ ax = sns.lineplot(data=df.T, estimator=None, sort=False, **kwargs)
+
+ grid_kws = {} if grid_kws is None else grid_kws
+ grid_kws.setdefault("alpha", 0.3)
+ ax.grid(**grid_kws)
+
+ ax.set_ylabel(RANKS_LABELS[untied])
+
+ return ax
+
+ def reg(
+ self,
+ *,
+ untied=False,
+ r2=True,
+ palette=None,
+ legend=True,
+ r2_fmt=".2g",
+ r2_kws=None,
+ **kwargs,
+ ):
+ """Plot a pairwise rankings data and a linear regression model fit.
+
+ Parameters
+ ----------
+ untied: bool, default ``False``
+ If it is ``True`` and any ranking has ties, the
+ ``RankResult.untied_rank_`` property is used to assign each
+ alternative a single ranked order. On the other hand, if it is
+ ``False`` the rankings are used as they are.
+ r2 : bool, default ``True``
+ If True, the coefficient of determination results are added to the
+ regression legend.
+ palette: matplotlib/seaborn color palette, default ``None``
+ Set of colors for mapping the hue variable.
+ legend: bool, default ``True``
+ If False, suppress the legend for semantic variables.
+ r2_fmt: str, default ``"2.g"``
+ String formatting code to use when adding the coefficient of
+ determination.
+ r2_kws: dict or None
+ Dict with keywords arguments passed to
+ ``sklearn.metrics.r2_score()`` function.
+ kwargs:
+ Other keyword arguments are passed to the ``seaborn.lineplot()``
+ function.
+
+ Returns
+ -------
+ matplotlib.axes.Axes or numpy.ndarray of them
+
+ """
+ df = self._ranks_cmp.to_dataframe(untied=untied)
+
+ # Just to ensure that no manual color reaches regplot
+ if "color" in kwargs:
+ cls_name = type(self).__name__
+ raise TypeError(
+ f"{cls_name}.reg() got an unexpected keyword argument 'color'"
+ )
+
+ # if there is a custom axis, we take it out
+ ax = kwargs.pop("ax", None)
+
+ # r2
+ if legend and r2:
+ r2_kws = {} if r2_kws is None else r2_kws
+ r2_df = self._ranks_cmp.r2_score(untied=untied, **r2_kws)
+
+ # we create the infinite cycle of colors for the palette,
+ # so we take out as we need
+ colors = it.cycle(sns.color_palette(palette=palette))
+
+ # pairwise ranks iteration
+ for x, y in it.combinations(df.columns, 2):
+ color = next(colors)
+
+ # The r2 correlation index
+ r2_label = ""
+ if legend and r2:
+ r2_score = format(r2_df[x][y], r2_fmt)
+ r2_label = f" - $R^2={r2_score}$"
+
+ label = "x={x}, y={y}{r2}".format(x=x, y=y, r2=r2_label)
+ ax = sns.regplot(
+ x=x, y=y, data=df, ax=ax, label=label, color=color, **kwargs
+ )
+
+ ranks_label = RANKS_LABELS[untied]
+ ax.set(xlabel=f"'x' {ranks_label}", ylabel=f"'y' {ranks_label}")
+
+ if legend:
+ ax.legend()
+
+ return ax
+
+ # SEABORN BASED ===========================================================
+ # Thin wrapper around seaborn plots
+
+ def heatmap(self, *, untied=False, **kwargs):
+ """Plot the rankings as a color-encoded matrix.
+
+ Parameters
+ ----------
+ untied: bool, default ``False``
+ If it is ``True`` and any ranking has ties, the
+ ``RankResult.untied_rank_`` property is used to assign each
+ alternative a single ranked order. On the other hand, if it is
+ ``False`` the rankings are used as they are.
+ kwargs:
+ Other keyword arguments are passed to the ``seaborn.heatmap()``
+ function.
+
+ Returns
+ -------
+ matplotlib.axes.Axes or numpy.ndarray of them
+
+ """
+ df = self._ranks_cmp.to_dataframe(untied=untied)
+ kwargs.setdefault("annot", True)
+ kwargs.setdefault("cbar_kws", {"label": RANKS_LABELS[untied]})
+ return sns.heatmap(data=df, **kwargs)
+
+ def corr(self, *, untied=False, corr_kws=None, **kwargs):
+ """Plot the pairwise correlation of rankings as a color-encoded matrix.
+
+ By default the pearson correlation coefficient is used.
+
+ Parameters
+ ----------
+ untied: bool, default ``False``
+ If it is ``True`` and any ranking has ties, the
+ ``RankResult.untied_rank_`` property is used to assign each
+ alternative a single ranked order. On the other hand, if it is
+ ``False`` the rankings are used as they are.
+ corr_kws: dict or None
+ Dict with keywords arguments passed the
+ ``pandas.DataFrame.corr()`` method.
+ kwargs:
+ Other keyword arguments are passed to the ``seaborn.heatmap()``
+ function.
+
+ Returns
+ -------
+ matplotlib.axes.Axes or numpy.ndarray of them
+
+ """
+ corr_kws = {} if corr_kws is None else corr_kws
+ corr = self._ranks_cmp.corr(untied=untied, **corr_kws)
+
+ kwargs.setdefault("annot", True)
+ kwargs.setdefault("cbar_kws", {"label": "Correlation"})
+ return sns.heatmap(data=corr, **kwargs)
+
+ def cov(self, *, untied=False, cov_kws=None, **kwargs):
+ """Plot the pairwise covariance of rankings as a color-encoded matrix.
+
+ Parameters
+ ----------
+ untied: bool, default ``False``
+ If it is ``True`` and any ranking has ties, the
+ ``RankResult.untied_rank_`` property is used to assign each
+ alternative a single ranked order. On the other hand, if it is
+ ``False`` the rankings are used as they are.
+ cov_kws: dict or None
+ Dict with keywords arguments passed the
+ ``pandas.DataFrame.cov()`` method.
+ kwargs:
+ Other keyword arguments are passed to the ``seaborn.heatmap()``
+ function.
+
+ Returns
+ -------
+ matplotlib.axes.Axes or numpy.ndarray of them
+
+ """
+ cov_kws = {} if cov_kws is None else cov_kws
+ cov = self._ranks_cmp.cov(untied=untied, **cov_kws)
+
+ kwargs.setdefault("annot", True)
+ kwargs.setdefault("cbar_kws", {"label": "Covariance"})
+ return sns.heatmap(data=cov, **kwargs)
+
+ def r2_score(self, untied=False, r2_kws=None, **kwargs):
+ """Plot the pairwise coefficient of determination regression score \
+ function of rankings as a color-encoded matrix.
+
+ Parameters
+ ----------
+ untied: bool, default ``False``
+ If it is ``True`` and any ranking has ties, the
+ ``RankResult.untied_rank_`` property is used to assign each
+ alternative a single ranked order. On the other hand, if it is
+ ``False`` the rankings are used as they are.
+ cov_kws: dict or None
+ Dict with keywords arguments passed the
+ ``pandas.DataFrame.cov()`` method.
+ kwargs:
+ Other keyword arguments are passed to the ``seaborn.heatmap()``
+ function.
+
+ Returns
+ -------
+ matplotlib.axes.Axes or numpy.ndarray of them
+
+ """
+ r2_kws = {} if r2_kws is None else r2_kws
+ r2 = self._ranks_cmp.r2_score(untied=untied, **r2_kws)
+
+ kwargs.setdefault("annot", True)
+ kwargs.setdefault("cbar_kws", {"label": "$R^2$"})
+ return sns.heatmap(data=r2, **kwargs)
+
+ def distance(
+ self, *, untied=False, metric="hamming", distance_kws=None, **kwargs
+ ):
+ """Plot the pairwise distance between rankings as a color-encoded \
+ matrix.
+
+ By default the 'hamming' distance is used, which is simply the
+ proportion of disagreeing components in Two rankings.
+
+ Parameters
+ ----------
+ untied: bool, default ``False``
+ If it is ``True`` and any ranking has ties, the
+ ``RankResult.untied_rank_`` property is used to assign each
+ alternative a single ranked order. On the other hand, if it is
+ ``False`` the rankings are used as they are.
+ metric: str or function, default ``"hamming"``
+ The distance metric to use. The distance function can
+ be 'braycurtis', 'canberra', 'chebyshev', 'cityblock',
+ 'correlation', 'cosine', 'dice', 'euclidean', 'hamming',
+ 'jaccard', 'jensenshannon', 'kulczynski1',
+ 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto',
+ 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath',
+ 'sqeuclidean', 'yule'.
+ distance_kws: dict or None
+ Dict with keywords arguments passed the
+ ``scipy.spatial.distance.pdist`` function
+ kwargs:
+ Other keyword arguments are passed to the ``seaborn.heatmap()``
+ function.
+
+ Returns
+ -------
+ matplotlib.axes.Axes or numpy.ndarray of them
+
+ """
+ distance_kws = {} if distance_kws is None else distance_kws
+ dis = self._ranks_cmp.distance(
+ untied=untied, metric=metric, **distance_kws
+ )
+
+ kwargs.setdefault("annot", True)
+ kwargs.setdefault(
+ "cbar_kws", {"label": f"{metric} distance".capitalize()}
+ )
+ return sns.heatmap(data=dis, **kwargs)
+
+ def box(self, *, untied=False, **kwargs):
+ """Draw a boxplot to show rankings with respect to alternatives.
+
+ Parameters
+ ----------
+ untied: bool, default ``False``
+ If it is ``True`` and any ranking has ties, the
+ ``RankResult.untied_rank_`` property is used to assign each
+ alternative a single ranked order. On the other hand, if it is
+ ``False`` the rankings are used as they are.
+ kwargs:
+ Other keyword arguments are passed to the ``seaborn.boxplot()``
+ function.
+
+ Returns
+ -------
+ matplotlib.axes.Axes or numpy.ndarray of them
+
+ """
+ df = self._ranks_cmp.to_dataframe(untied=untied)
+ ax = sns.boxplot(data=df.T, **kwargs)
+
+ ranks_label = RANKS_LABELS[untied]
+ if kwargs.get("orient") in (None, "v"):
+ ax.set_ylabel(ranks_label)
+ else:
+ ax.set_xlabel(ranks_label)
+
+ return ax
+
+ # DATAFRAME BASED ========================================================
+ # Thin wrapper around pandas.DataFrame.plot
+
+ def bar(self, *, untied=False, **kwargs):
+ """Draw plot that presents ranking of alternatives by method with \
+ vertical bars.
+
+ Parameters
+ ----------
+ untied: bool, default ``False``
+ If it is ``True`` and any ranking has ties, the
+ ``RankResult.untied_rank_`` property is used to assign each
+ alternative a single ranked order. On the other hand, if it is
+ ``False`` the rankings are used as they are.
+ kwargs:
+ Other keyword arguments are passed to the
+ ``pandas.Dataframe.plot.bar()`` method.
+
+ Returns
+ -------
+ matplotlib.axes.Axes or numpy.ndarray of them
+
+ """
+ df = self._ranks_cmp.to_dataframe(untied=untied)
+ kwargs["ax"] = kwargs.get("ax") or plt.gca()
+ ax = df.plot.bar(**kwargs)
+ ax.set_ylabel(RANKS_LABELS[untied])
+ return ax
+
+ def barh(self, *, untied=False, **kwargs):
+ """Draw plot that presents ranking of alternatives by method with \
+ horizontal bars.
+
+ Parameters
+ ----------
+ untied: bool, default ``False``
+ If it is ``True`` and any ranking has ties, the
+ ``RankResult.untied_rank_`` property is used to assign each
+ alternative a single ranked order. On the other hand, if it is
+ ``False`` the rankings are used as they are.
+ kwargs:
+ Other keyword arguments are passed to the
+ ``pandas.Dataframe.plot.barh()`` method.
+
+ Returns
+ -------
+ matplotlib.axes.Axes or numpy.ndarray of them
+
+ """
+ df = self._ranks_cmp.to_dataframe(untied=untied)
+ kwargs["ax"] = kwargs.get("ax") or plt.gca()
+ ax = df.plot.barh(**kwargs)
+ ax.set_xlabel(RANKS_LABELS[untied])
+ return ax
+
+
+# =============================================================================
+# FACTORY
+# =============================================================================
+
+
+def mkrank_cmp(*ranks):
+ """Construct a RankComparator from the given rankings.
+
+ This is a shorthand for the RankComparator constructor; it does not
+ require, and does not permit, naming the estimators. Instead, their names
+ will be set to the method attribute of the rankings automatically.
+
+ Parameters
+ ----------
+ *ranks: list of RankResult objects
+ List of the scikit-criteria RankResult objcects.
+
+ Returns
+ -------
+ rcmp : RanksComparator
+ Returns a scikit-criteria :class:`RanksComparator` object.
+
+ """
+ names = [r.method for r in ranks]
+ named_ranks = unique_names(names=names, elements=ranks)
+ return RanksComparator(named_ranks)
diff --git a/skcriteria/core/__init__.py b/skcriteria/core/__init__.py
index 0716b0c..fe520b2 100644
--- a/skcriteria/core/__init__.py
+++ b/skcriteria/core/__init__.py
@@ -15,16 +15,9 @@
# IMPORTS
# =============================================================================
-from .data import (
- DecisionMatrix,
- Objective,
- mkdm,
-)
-from .methods import (
- SKCMatrixAndWeightTransformerABC,
- SKCMethodABC,
- SKCTransformerABC,
-)
+from .data import DecisionMatrix, mkdm
+from .methods import SKCMethodABC
+from .objectives import Objective
from .plot import DecisionMatrixPlotter
# =============================================================================
@@ -36,7 +29,5 @@
"DecisionMatrix",
"DecisionMatrixPlotter",
"Objective",
- "SKCMatrixAndWeightTransformerABC",
"SKCMethodABC",
- "SKCTransformerABC",
]
diff --git a/skcriteria/core/data.py b/skcriteria/core/data.py
index adec9fd..cc4bc5e 100644
--- a/skcriteria/core/data.py
+++ b/skcriteria/core/data.py
@@ -12,7 +12,7 @@
"""Data abstraction layer.
This module defines the DecisionMatrix object, which internally encompasses
-the alternative matrix, weights and objectives (MIN, MAX) of the criteria.
+the alternative matrix, weights and objectives (MIN, MAX) of the criteria.
"""
@@ -21,7 +21,6 @@
# =============================================================================
-import enum
import functools
from collections import abc
@@ -30,95 +29,15 @@
import pandas as pd
from pandas.io.formats import format as pd_fmt
-import pyquery as pq
-
-
from .dominance import DecisionMatrixDominanceAccessor
+from .objectives import Objective
from .plot import DecisionMatrixPlotter
from .stats import DecisionMatrixStatsAccessor
-from ..utils import deprecated, doc_inherit
+from ..utils import deprecated, df_temporal_header, doc_inherit
# =============================================================================
-# CONSTANTS
-# =============================================================================
-class Objective(enum.Enum):
- """Representation of criteria objectives (Minimize, Maximize)."""
-
- #: Internal representation of minimize criteria
- MIN = -1
-
- #: Internal representation of maximize criteria
- MAX = 1
-
- # INTERNALS ===============================================================
-
- _MIN_STR = "\u25bc"
- _MAX_STR = "\u25b2"
-
- #: Another way to name the maximization criteria.
- _MAX_ALIASES = frozenset(
- [
- MAX,
- _MAX_STR,
- max,
- np.max,
- np.nanmax,
- np.amax,
- "max",
- "maximize",
- "+",
- ">",
- ]
- )
-
- #: Another ways to name the minimization criteria.
- _MIN_ALIASES = frozenset(
- [
- MIN,
- _MIN_STR,
- min,
- np.min,
- np.nanmin,
- np.amin,
- "min",
- "minimize",
- "<",
- "-",
- ]
- )
-
- # CUSTOM CONSTRUCTOR ======================================================
-
- @classmethod
- def construct_from_alias(cls, alias):
- """Return the alias internal representation of the objective."""
- if isinstance(alias, cls):
- return alias
- if isinstance(alias, str):
- alias = alias.lower()
- if alias in cls._MAX_ALIASES.value:
- return cls.MAX
- if alias in cls._MIN_ALIASES.value:
- return cls.MIN
- raise ValueError(f"Invalid criteria objective {alias}")
-
- # METHODS =================================================================
-
- def __str__(self):
- """Convert the objective to an string."""
- return self.name
-
- def to_string(self):
- """Return the printable representation of the objective."""
- if self.value in Objective._MIN_ALIASES.value:
- return Objective._MIN_STR.value
- if self.value in Objective._MAX_ALIASES.value:
- return Objective._MAX_STR.value
-
-
-# =============================================================================
-# _SLICER ARRAY
+# SLICERS ARRAY
# =============================================================================
class _ACArray(np.ndarray, abc.Mapping):
"""Immutable Array to provide access to the alternative and criteria \
@@ -163,9 +82,50 @@ def values(self):
return (self[e] for e in self)
+class _Loc:
+ """Locator abstraction.
+
+ this class ensures that the correct objectives and weights are applied to
+ the sliced ``DecisionMatrix``.
+
+ """
+
+ def __init__(self, name, real_loc, objectives, weights):
+ self._name = name
+ self._real_loc = real_loc
+ self._objectives = objectives
+ self._weights = weights
+
+ @property
+ def name(self):
+ """The name of the locator."""
+ return self._name
+
+ def __getitem__(self, slc):
+ """dm[slc] <==> dm.__getitem__(slc)."""
+ df = self._real_loc.__getitem__(slc)
+ if isinstance(df, pd.Series):
+ df = df.to_frame().T
+
+ dtypes = self._real_loc.obj.dtypes
+ dtypes = dtypes[dtypes.index.isin(df.columns)]
+
+ df = df.astype(dtypes)
+
+ objectives = self._objectives
+ objectives = objectives[objectives.index.isin(df.columns)].to_numpy()
+
+ weights = self._weights
+ weights = weights[weights.index.isin(df.columns)].to_numpy()
+
+ return DecisionMatrix(df, objectives, weights)
+
+
# =============================================================================
# DECISION MATRIX
# =============================================================================
+
+
class DecisionMatrix:
"""Representation of all data needed in the MCDA analysis.
@@ -242,9 +202,9 @@ class DecisionMatrix:
def __init__(self, data_df, objectives, weights):
self._data_df = (
- data_df.copy()
+ data_df.copy(deep=True)
if isinstance(data_df, pd.DataFrame)
- else pd.DataFrame(data_df)
+ else pd.DataFrame(data_df, copy=True)
)
self._objectives = np.asarray(objectives, dtype=object)
@@ -378,15 +338,25 @@ def from_mcda_data(
@property
def alternatives(self):
- """Names of the alternatives."""
- arr = self._data_df.index.to_numpy()
+ """Names of the alternatives.
+
+ From this array you can also access the values of the alternatives as
+ ``pandas.Series``.
+
+ """
+ arr = self._data_df.index.to_numpy(copy=True)
slicer = self._data_df.loc.__getitem__
return _ACArray(arr, slicer)
@property
def criteria(self):
- """Names of the criteria."""
- arr = self._data_df.columns.to_numpy()
+ """Names of the criteria.
+
+ From this array you can also access the values of the criteria as
+ ``pandas.Series``.
+
+ """
+ arr = self._data_df.columns.to_numpy(copy=True)
slicer = self._data_df.__getitem__
return _ACArray(arr, slicer)
@@ -396,17 +366,19 @@ def weights(self):
return pd.Series(
self._weights,
dtype=float,
- index=self._data_df.columns,
+ index=self._data_df.columns.copy(deep=True),
name="Weights",
+ copy=True,
)
@property
def objectives(self):
"""Objectives of the criteria as ``Objective`` instances."""
return pd.Series(
- [Objective.construct_from_alias(a) for a in self._objectives],
+ [Objective.from_alias(a) for a in self._objectives],
index=self._data_df.columns,
name="Objectives",
+ copy=True,
)
@property
@@ -436,7 +408,8 @@ def iobjectives(self):
return pd.Series(
[o.value for o in self.objectives],
dtype=np.int8,
- index=self._data_df.columns,
+ index=self._data_df.columns.copy(deep=True),
+ copy=True,
)
@property
@@ -445,16 +418,23 @@ def matrix(self):
The matrix excludes weights and objectives.
- If you want to create a DataFrame with objetvies and weights, use
+ If you want to create a DataFrame with objectives and weights, use
``DecisionMatrix.to_dataframe()``
"""
- return self._data_df.copy()
+ mtx = self._data_df.copy(deep=True)
+ mtx.index = self._data_df.index.copy(deep=True)
+ mtx.index.name = "Alternatives"
+ mtx.columns = self._data_df.columns.copy(deep=True)
+ mtx.columns.name = "Criteria"
+ return mtx
@property
def dtypes(self):
"""Dtypes of the criteria."""
- return self._data_df.dtypes.copy()
+ series = self._data_df.dtypes.copy(deep=True)
+ series.index = self._data_df.dtypes.index.copy(deep=True)
+ return series
# ACCESSORS (YES, WE USE CACHED PROPERTIES IS THE EASIEST WAY) ============
@@ -553,9 +533,9 @@ def to_dict(self):
@deprecated(
reason=(
- "Use 'DecisionMatrix.stats()', "
- "'DecisionMatrix.stats(\"describe\")' or "
- "'DecisionMatrix.stats.describe()' instead."
+ "Use ``DecisionMatrix.stats()``, "
+ "``DecisionMatrix.stats('describe)`` or "
+ "``DecisionMatrix.stats.describe()`` instead."
),
version=0.6,
)
@@ -600,7 +580,7 @@ def __len__(self):
def equals(self, other):
"""Return True if the decision matrix are equal.
- This method calls `DecisionMatrix.aquals` whitout tolerance.
+ This method calls `DecisionMatrix.aquals` without tolerance.
Parameters
----------
@@ -690,15 +670,80 @@ def aequals(self, other, rtol=1e-05, atol=1e-08, equal_nan=False):
)
)
- # repr ====================================================================
- def _get_cow_headers(self):
+ # SLICES ==================================================================
+
+ def __getitem__(self, slc):
+ """dm[slc] <==> dm.__getitem__(slc)."""
+ df = self._data_df.__getitem__(slc)
+ if isinstance(df, pd.Series):
+ df = df.to_frame()
+
+ dtypes = self._data_df.dtypes
+ dtypes = dtypes[dtypes.index.isin(df.columns)]
+
+ df = df.astype(dtypes)
+
+ objectives = self.objectives
+ objectives = objectives[objectives.index.isin(df.columns)].to_numpy()
+
+ weights = self.weights
+ weights = weights[weights.index.isin(df.columns)].to_numpy()
+
+ return DecisionMatrix(df, objectives, weights)
+
+ @property
+ def loc(self):
+ """Access a group of alternatives and criteria by label(s) or a \
+ boolean array.
+
+ ``.loc[]`` is primarily alternative label based, but may also be used
+ with a boolean array.
+
+ Unlike DataFrames, `ìloc`` of ``DecisionMatrix`` always returns an
+ instance of ``DecisionMatrix``.
+
+ """
+ return _Loc("loc", self._data_df.loc, self.objectives, self.weights)
+
+ @property
+ def iloc(self):
+ """Purely integer-location based indexing for selection by position.
+
+ ``.iloc[]`` is primarily integer position based (from ``0`` to
+ ``length-1`` of the axis), but may also be used with a boolean
+ array.
+
+ Unlike DataFrames, `ìloc`` of ``DecisionMatrix`` always returns an
+ instance of ``DecisionMatrix``.
+
+ """
+ return _Loc("iloc", self._data_df.iloc, self.objectives, self.weights)
+
+ # REPR ====================================================================
+
+ def _get_cow_headers(
+ self, only=None, fmt="{criteria}[{objective}{weight}]"
+ ):
"""Columns names with COW (Criteria, Objective, Weight)."""
+ criteria = self._data_df.columns
+ objectives = self.objectives
+ weights = self.weights
+
+ if only:
+ mask = self._data_df.columns.isin(only)
+ criteria = criteria[mask]
+ objectives = objectives[mask]
+ weights = weights[mask]
+
+ weights = pd_fmt.format_array(weights, None)
+
headers = []
- fmt_weights = pd_fmt.format_array(self.weights, None)
- for c, o, w in zip(self.criteria, self.objectives, fmt_weights):
- header = f"{c}[{o.to_string()}{w}]"
+ for crit, obj, weight in zip(criteria, objectives, weights):
+ header = fmt.format(
+ criteria=crit, objective=obj.to_symbol(), weight=weight
+ )
headers.append(header)
- return headers
+ return np.array(headers)
def _get_axc_dimensions(self):
"""Dimension foote with AxC (Alternativs x Criteria)."""
@@ -711,26 +756,9 @@ def __repr__(self):
header = self._get_cow_headers()
dimensions = self._get_axc_dimensions()
- max_rows = pd.get_option("display.max_rows")
- min_rows = pd.get_option("display.min_rows")
- max_cols = pd.get_option("display.max_columns")
- max_colwidth = pd.get_option("display.max_colwidth")
-
- width = (
- pd.io.formats.console.get_console_size()[0]
- if pd.get_option("display.expand_frame_repr")
- else None
- )
-
- original_string = self._data_df.to_string(
- max_rows=max_rows,
- min_rows=min_rows,
- max_cols=max_cols,
- line_width=width,
- max_colwidth=max_colwidth,
- show_dimensions=False,
- header=header,
- )
+ with df_temporal_header(self._data_df, header) as df:
+ with pd.option_context("display.show_dimensions", False):
+ original_string = repr(df)
# add dimension
string = f"{original_string}\n[{dimensions}]"
@@ -742,12 +770,13 @@ def _repr_html_(self):
Mainly for IPython notebook.
"""
- header = dict(zip(self.criteria, self._get_cow_headers()))
+ header = self._get_cow_headers()
dimensions = self._get_axc_dimensions()
# retrieve the original string
- with pd.option_context("display.show_dimensions", False):
- original_html = self._data_df._repr_html_()
+ with df_temporal_header(self._data_df, header) as df:
+ with pd.option_context("display.show_dimensions", False):
+ original_html = df._repr_html_()
# add dimension
html = (
@@ -757,13 +786,7 @@ def _repr_html_(self):
""
)
- # now we need to change the table header
- d = pq.PyQuery(html)
- for th in d("div.decisionmatrix table.dataframe > thead > tr > th"):
- crit = th.text
- th.text = header.get(crit, crit)
-
- return str(d)
+ return html
# =============================================================================
diff --git a/skcriteria/core/dominance.py b/skcriteria/core/dominance.py
index 6c584da..28629c7 100644
--- a/skcriteria/core/dominance.py
+++ b/skcriteria/core/dominance.py
@@ -81,7 +81,7 @@ def _cache_read(self, a0, a1):
# FRAME ALT VS ALT ========================================================
- def _create_frame(self, compute_cell):
+ def _create_frame(self, compute_cell, iname, cname):
"""Create a data frame comparing two alternatives.
The value of each cell is calculated with the "compute_cell"
@@ -95,7 +95,13 @@ def _create_frame(self, compute_cell):
for a1 in alternatives:
row[a1] = compute_cell(a0, a1)
rows.append(row)
- return pd.DataFrame(rows, index=alternatives)
+
+ df = pd.DataFrame(rows, index=alternatives)
+
+ df.index.name = iname
+ df.columns.name = cname
+
+ return df
def bt(self):
"""Compare on how many criteria one alternative is better than another.
@@ -116,7 +122,9 @@ def compute_cell(a0, a1):
centry, ckreverted = self._cache_read(a0, a1)
return centry.aDb if not ckreverted else centry.bDa
- return self._create_frame(compute_cell)
+ return self._create_frame(
+ compute_cell, iname="Better than", cname="Worse than"
+ )
def eq(self):
"""Compare on how many criteria two alternatives are equal.
@@ -136,7 +144,9 @@ def compute_cell(a0, a1):
centry, _ = self._cache_read(a0, a1)
return centry.eq
- return self._create_frame(compute_cell)
+ return self._create_frame(
+ compute_cell, iname="Equals to", cname="Equals to"
+ )
def dominance(self, *, strict=False):
"""Compare if one alternative dominates or strictly dominates another \
@@ -176,7 +186,15 @@ def compute_cell(a0, a1):
return performance_a0 > 0 and performance_a1 == 0
- return self._create_frame(compute_cell)
+ iname, cname = (
+ ("Strict dominators", "Strictly dominated")
+ if strict
+ else ("Dominators", "Dominated")
+ )
+
+ dom = self._create_frame(compute_cell, iname=iname, cname=cname)
+
+ return dom
# COMPARISONS =============================================================
@@ -238,7 +256,7 @@ def compare(self, a0, a1):
return df
- # The dominated============================================================
+ # The dominated ===========================================================
def dominated(self, *, strict=False):
"""Which alternative is dominated or strictly dominated by at least \
@@ -257,7 +275,10 @@ def dominated(self, *, strict=False):
by at least one other alternative.
"""
- return self.dominance(strict=strict).any()
+ dom = self.dominance(strict=strict).any()
+ dom.name = dom.index.name
+ dom.index.name = "Alternatives"
+ return dom
@functools.lru_cache(maxsize=None)
def dominators_of(self, a, *, strict=False):
diff --git a/skcriteria/core/methods.py b/skcriteria/core/methods.py
index 7974544..639d5bf 100644
--- a/skcriteria/core/methods.py
+++ b/skcriteria/core/methods.py
@@ -19,8 +19,6 @@
import copy
import inspect
-from .data import DecisionMatrix
-from ..utils import doc_inherit
# =============================================================================
# BASE DECISION MAKER CLASS
@@ -88,7 +86,7 @@ def __repr__(self):
parameters.append(f"{pname}={repr(pvalue)}")
str_parameters = ", ".join(parameters)
- return f"{cls_name}({str_parameters})"
+ return f"<{cls_name} [{str_parameters}]>"
def get_parameters(self):
"""Return the parameters of the method as dictionary."""
@@ -116,149 +114,8 @@ def copy(self, **kwargs):
"""
asdict = self.get_parameters()
+
asdict.update(kwargs)
cls = type(self)
return cls(**asdict)
-
-
-# =============================================================================
-# SKCTransformer ABC
-# =============================================================================
-
-
-class SKCTransformerABC(SKCMethodABC):
- """Abstract class for all transformer in scikit-criteria."""
-
- _skcriteria_dm_type = "transformer"
- _skcriteria_abstract_class = True
-
- @abc.abstractmethod
- def _transform_data(self, **kwargs):
- """Apply the transformation logic to the decision matrix parameters.
-
- Parameters
- ----------
- kwargs:
- The decision matrix as separated parameters.
-
- Returns
- -------
- :py:class:`dict`
- A dictionary with all the values of the decision matrix
- transformed.
-
- """
- raise NotImplementedError()
-
- def transform(self, dm):
- """Perform transformation on `dm`.
-
- Parameters
- ----------
- dm: :py:class:`skcriteria.data.DecisionMatrix`
- The decision matrix to transform.
-
- Returns
- -------
- :py:class:`skcriteria.data.DecisionMatrix`
- Transformed decision matrix.
-
- """
- data = dm.to_dict()
-
- transformed_data = self._transform_data(**data)
-
- transformed_dm = DecisionMatrix.from_mcda_data(**transformed_data)
-
- return transformed_dm
-
-
-class SKCMatrixAndWeightTransformerABC(SKCTransformerABC):
- """Transform weights and matrix together or independently.
-
- The Transformer that implements this abstract class can be configured to
- transform
- `weights`, `matrix` or `both` so only that part of the DecisionMatrix
- is altered.
-
- This abstract class require to redefine ``_transform_weights`` and
- ``_transform_matrix``, instead of ``_transform_data``.
-
- """
-
- _skcriteria_abstract_class = True
- _skcriteria_parameters = ["target"]
-
- _TARGET_WEIGHTS = "weights"
- _TARGET_MATRIX = "matrix"
- _TARGET_BOTH = "both"
-
- def __init__(self, target):
- if target not in (
- self._TARGET_MATRIX,
- self._TARGET_WEIGHTS,
- self._TARGET_BOTH,
- ):
- raise ValueError(
- f"'target' can only be '{self._TARGET_WEIGHTS}', "
- f"'{self._TARGET_MATRIX}' or '{self._TARGET_BOTH}', "
- f"found '{target}'"
- )
- self._target = target
-
- @property
- def target(self):
- """Determine which part of the DecisionMatrix will be transformed."""
- return self._target
-
- @abc.abstractmethod
- def _transform_weights(self, weights):
- """Execute the transform method over the weights.
-
- Parameters
- ----------
- weights: :py:class:`numpy.ndarray`
- The weights to transform.
-
- Returns
- -------
- :py:class:`numpy.ndarray`
- The transformed weights.
-
- """
- raise NotImplementedError()
-
- @abc.abstractmethod
- def _transform_matrix(self, matrix):
- """Execute the transform method over the matrix.
-
- Parameters
- ----------
- matrix: :py:class:`numpy.ndarray`
- The decision matrix to transform
-
- Returns
- -------
- :py:class:`numpy.ndarray`
- The transformed matrix.
-
- """
- raise NotImplementedError()
-
- @doc_inherit(SKCTransformerABC._transform_data)
- def _transform_data(self, matrix, weights, **kwargs):
- transformed_mtx = matrix
- transformed_weights = weights
-
- if self._target in (self._TARGET_MATRIX, self._TARGET_BOTH):
- transformed_mtx = self._transform_matrix(matrix)
-
- if self._target in (self._TARGET_WEIGHTS, self._TARGET_BOTH):
- transformed_weights = self._transform_weights(weights)
-
- kwargs.update(
- matrix=transformed_mtx, weights=transformed_weights, dtypes=None
- )
-
- return kwargs
diff --git a/skcriteria/core/objectives.py b/skcriteria/core/objectives.py
new file mode 100644
index 0000000..f07fe56
--- /dev/null
+++ b/skcriteria/core/objectives.py
@@ -0,0 +1,116 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised))
+# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia
+# Copyright (c) 2022, QuatroPe
+# All rights reserved.
+
+# =============================================================================
+# DOCS
+# =============================================================================
+
+"""Definition of the objectives (MIN, MAX) for the criteria."""
+
+
+# =============================================================================
+# IMPORTS
+# =============================================================================
+
+import enum
+
+import numpy as np
+
+from ..utils import deprecated
+
+
+# =============================================================================
+# CONSTANTS
+# =============================================================================
+
+
+class Objective(enum.Enum):
+ """Representation of criteria objectives (Minimize, Maximize)."""
+
+ #: Internal representation of minimize criteria
+ MIN = -1
+
+ #: Internal representation of maximize criteria
+ MAX = 1
+
+ # INTERNALS ===============================================================
+
+ _MIN_STR = "\u25bc" # â–¼
+ _MAX_STR = "\u25b2" # â–²
+
+ #: Another way to name the maximization criteria.
+ _MAX_ALIASES = frozenset(
+ [
+ MAX,
+ _MAX_STR,
+ max,
+ np.max,
+ np.nanmax,
+ np.amax,
+ "max",
+ "maximize",
+ "+",
+ ">",
+ ]
+ )
+
+ #: Another ways to name the minimization criteria.
+ _MIN_ALIASES = frozenset(
+ [
+ MIN,
+ _MIN_STR,
+ min,
+ np.min,
+ np.nanmin,
+ np.amin,
+ "min",
+ "minimize",
+ "-",
+ "<",
+ ]
+ )
+
+ # CUSTOM CONSTRUCTOR ======================================================
+
+ @classmethod
+ def from_alias(cls, alias):
+ """Return a n objective instase based on some given alias."""
+ if isinstance(alias, cls):
+ return alias
+ if isinstance(alias, str):
+ alias = alias.lower()
+ if alias in cls._MAX_ALIASES.value:
+ return cls.MAX
+ if alias in cls._MIN_ALIASES.value:
+ return cls.MIN
+ raise ValueError(f"Invalid criteria objective {alias}")
+
+ # METHODS =================================================================
+
+ def __str__(self):
+ """Convert the objective to an string."""
+ return self.name
+
+ def to_symbol(self):
+ """Return the printable symbol representation of the objective."""
+ if self.value in Objective._MIN_ALIASES.value:
+ return Objective._MIN_STR.value
+ if self.value in Objective._MAX_ALIASES.value:
+ return Objective._MAX_STR.value
+
+ # DEPRECATED ==============================================================
+
+ @classmethod
+ @deprecated(reason="Use ``Objective.from_alias()`` instead.", version=0.8)
+ def construct_from_alias(cls, alias):
+ """Return an objective instance based on some given alias."""
+ return cls.from_alias(alias)
+
+ @deprecated(reason="Use ``MAX/MIN.to_symbol()`` instead.", version=0.8)
+ def to_string(self):
+ """Return the printable representation of the objective."""
+ return self.to_symbol()
diff --git a/skcriteria/core/plot.py b/skcriteria/core/plot.py
index b207b6d..1d74ec4 100644
--- a/skcriteria/core/plot.py
+++ b/skcriteria/core/plot.py
@@ -15,18 +15,21 @@
# IMPORTS
# =============================================================================
-import matplotlib.pyplot as plt
+import pandas as pd
import seaborn as sns
+from .objectives import Objective
from ..utils import AccessorABC
# =============================================================================
# PLOTTER OBJECT
# =============================================================================
+
+
class DecisionMatrixPlotter(AccessorABC):
- """Make plots of DecisionMatrix.
+ """DecisionMatrix plot utilities.
Kind of plot to produce:
@@ -45,6 +48,8 @@ class DecisionMatrixPlotter(AccessorABC):
- 'ogive' : criteria empirical cumulative distribution plot.
- 'wogive' : weights empirical cumulative distribution plot.
- 'area' : criteria area plot.
+ - 'dominance': the dominance matrix as a heatmap.
+ - 'frontier': criteria pair-wise Pareto-Frontier.
"""
@@ -56,31 +61,28 @@ def __init__(self, dm):
# PRIVATE =================================================================
# This method are used "a lot" inside all the different plots, so we can
# save some lines of code
+ def _get_criteria_labels(self, **kwargs):
+ kwargs.setdefault("fmt", "{criteria} {objective}")
+ labels = self._dm._get_cow_headers(**kwargs)
+ return pd.Series(labels, name="Criteria")
@property
def _ddf(self):
# proxy to access the dataframe with the data
- return self._dm.matrix
+ ddf = self._dm.matrix
+ ddf.columns = self._get_criteria_labels()
+ return ddf
@property
def _wdf(self):
# proxy to access the dataframe with the weights
- return self._dm.weights.to_frame()
-
- @property
- def _criteria_labels(self):
- # list with all the criteria + objectives
- dm = self._dm
- labels = [
- f"{c} {o.to_string()}" for c, o in zip(dm.criteria, dm.objectives)
- ]
- return labels
+ wdf = self._dm.weights.to_frame()
+ wdf.index = self._get_criteria_labels()
+ return wdf
# HEATMAP =================================================================
def _heatmap(self, df, **kwargs):
- kwargs.setdefault("annot", True)
- kwargs.setdefault("cmap", plt.cm.get_cmap())
ax = sns.heatmap(df, **kwargs)
return ax
@@ -89,7 +91,7 @@ def heatmap(self, **kwargs):
Parameters
----------
- **kwargs
+ **kwargs:
Additional keyword arguments are passed and are documented in
``seaborn.heatmap``.
@@ -98,10 +100,8 @@ def heatmap(self, **kwargs):
matplotlib.axes.Axes or numpy.ndarray of them
"""
+ kwargs.setdefault("annot", True)
ax = self._heatmap(self._ddf, **kwargs)
- ax.set_xticklabels(self._criteria_labels)
- ax.set_ylabel("Alternatives")
- ax.set_xlabel("Criteria")
return ax
def wheatmap(self, **kwargs):
@@ -109,7 +109,7 @@ def wheatmap(self, **kwargs):
Parameters
----------
- **kwargs
+ **kwargs:
Additional keyword arguments are passed and are documented in
``seaborn.heatmap``.
@@ -118,13 +118,11 @@ def wheatmap(self, **kwargs):
matplotlib.axes.Axes or numpy.ndarray of them
"""
+ kwargs.setdefault("annot", True)
ax = self._heatmap(self._wdf.T, **kwargs)
- ax.set_xticklabels(self._criteria_labels)
- ax.set_xlabel("Criteria")
-
if "ax" not in kwargs:
# if the ax is provided by the user we assume that the figure
- # is already setted to the expected size. If it's not we resize the
+ # is already with the expected size. If it's not, we resize the
# height to 1/5 of the original size.
fig = ax.get_figure()
size = fig.get_size_inches() / [1, 5]
@@ -145,7 +143,7 @@ def bar(self, **kwargs):
Parameters
----------
- **kwargs
+ **kwargs:
Additional keyword arguments are passed and are documented in
``DataFrame.plot.bar``.
@@ -155,9 +153,6 @@ def bar(self, **kwargs):
"""
ax = self._ddf.plot.bar(**kwargs)
- ax.set_xlabel("Alternatives")
- if kwargs.get("legend", True):
- ax.legend(self._criteria_labels)
return ax
def wbar(self, **kwargs):
@@ -171,7 +166,7 @@ def wbar(self, **kwargs):
Parameters
----------
- **kwargs
+ **kwargs:
Additional keyword arguments are passed and are documented in
``DataFrame.plot.bar``.
@@ -181,8 +176,6 @@ def wbar(self, **kwargs):
"""
ax = self._wdf.T.plot.bar(**kwargs)
- if kwargs.get("legend", True):
- ax.legend(self._criteria_labels)
return ax
# BARH ====================================================================
@@ -198,7 +191,7 @@ def barh(self, **kwargs):
Parameters
----------
- **kwargs
+ **kwargs:
Additional keyword arguments are passed and are documented in
``DataFrame.plot.barh``.
@@ -208,9 +201,6 @@ def barh(self, **kwargs):
"""
ax = self._ddf.plot.barh(**kwargs)
- ax.set_ylabel("Alternatives")
- if kwargs.get("legend", True):
- ax.legend(self._criteria_labels)
return ax
def wbarh(self, **kwargs):
@@ -224,7 +214,7 @@ def wbarh(self, **kwargs):
Parameters
----------
- **kwargs
+ **kwargs:
Additional keyword arguments are passed and are documented in
``DataFrame.plot.barh``.
@@ -234,8 +224,6 @@ def wbarh(self, **kwargs):
"""
ax = self._wdf.T.plot.barh(**kwargs)
- if kwargs.get("legend", True):
- ax.legend(self._criteria_labels)
return ax
# HIST ====================================================================
@@ -249,7 +237,7 @@ def hist(self, **kwargs):
Parameters
----------
- **kwargs
+ **kwargs:
Additional keyword arguments are passed and are documented in
``seaborn.histplot``.
@@ -259,8 +247,6 @@ def hist(self, **kwargs):
"""
ax = sns.histplot(self._ddf, **kwargs)
- if kwargs.get("legend", True):
- ax.legend(self._criteria_labels)
return ax
def whist(self, **kwargs):
@@ -272,7 +258,7 @@ def whist(self, **kwargs):
Parameters
----------
- **kwargs
+ **kwargs:
Additional keyword arguments are passed and are documented in
``seaborn.histplot``.
@@ -282,8 +268,6 @@ def whist(self, **kwargs):
"""
ax = sns.histplot(self._wdf.T, **kwargs)
- if kwargs.get("legend", True):
- ax.legend(self._criteria_labels)
return ax
# BOX =====================================================================
@@ -299,7 +283,7 @@ def box(self, **kwargs):
Parameters
----------
- **kwargs
+ **kwargs:
Additional keyword arguments are passed and are documented in
``seaborn.boxplot``.
@@ -308,17 +292,7 @@ def box(self, **kwargs):
matplotlib.axes.Axes or numpy.ndarray of them
"""
- orient = kwargs.setdefault("orient", "v")
-
ax = sns.boxplot(data=self._ddf, **kwargs)
-
- if orient == "v":
- ax.set_xticklabels(self._criteria_labels)
- ax.set_xlabel("Criteria")
- elif orient == "h":
- ax.set_yticklabels(self._criteria_labels)
- ax.set_ylabel("Criteria")
-
return ax
def wbox(self, **kwargs):
@@ -332,7 +306,7 @@ def wbox(self, **kwargs):
Parameters
----------
- **kwargs
+ **kwargs:
Additional keyword arguments are passed and are documented in
``seaborn.boxplot``.
@@ -359,7 +333,7 @@ def kde(self, **kwargs):
Parameters
----------
- **kwargs
+ **kwargs:
Additional keyword arguments are passed and are documented in
``seaborn.kdeplot``.
@@ -369,8 +343,6 @@ def kde(self, **kwargs):
"""
ax = sns.kdeplot(data=self._ddf, **kwargs)
- if kwargs.get("legend", True):
- ax.legend(self._criteria_labels)
return ax
def wkde(self, **kwargs):
@@ -386,7 +358,7 @@ def wkde(self, **kwargs):
Parameters
----------
- **kwargs
+ **kwargs:
Additional keyword arguments are passed and are documented in
``seaborn.kdeplot``.
@@ -416,7 +388,7 @@ def ogive(self, **kwargs):
Parameters
----------
- **kwargs
+ **kwargs:
Additional keyword arguments are passed and are documented in
``seaborn.ecdfplot``.
@@ -426,8 +398,6 @@ def ogive(self, **kwargs):
"""
ax = sns.ecdfplot(data=self._ddf, **kwargs)
- if kwargs.get("legend", True):
- ax.legend(self._criteria_labels)
return ax
def wogive(self, **kwargs):
@@ -446,7 +416,7 @@ def wogive(self, **kwargs):
Parameters
----------
- **kwargs
+ **kwargs:
Additional keyword arguments are passed and are documented in
``seaborn.ecdfplot``.
@@ -468,7 +438,7 @@ def area(self, **kwargs):
Parameters
----------
- **kwargs
+ **kwargs:
Additional keyword arguments are passed and are documented in
:meth:`DataFrame.plot.area`.
@@ -479,7 +449,179 @@ def area(self, **kwargs):
"""
ax = self._ddf.plot.area(**kwargs)
- ax.set_xlabel("Alternatives")
- if kwargs.get("legend", True):
- ax.legend(self._criteria_labels)
+ return ax
+
+ # DOMINANCE ===============================================================
+
+ def dominance(self, *, strict=False, **kwargs):
+ """Plot dominance as a color-encoded matrix.
+
+ In order to evaluate the dominance of an alternative *a0* over an
+ alternative *a1*, the algorithm evaluates that *a0* is better in at
+ least one criterion and that *a1* is not better in any criterion than
+ *a0*. In the case that ``strict = True`` it also evaluates that there
+ are no equal criteria.
+
+ Parameters
+ ----------
+ strict: bool, default ``False``
+ If True, strict dominance is evaluated.
+ **kwargs:
+ Additional keyword arguments are passed and are documented in
+ ``seaborn.heatmap``.
+
+ Returns
+ -------
+ matplotlib.axes.Axes or numpy.ndarray of them
+
+ """
+ dm = self._dm
+ import numpy as np
+
+ dom = dm.dominance.dominance(strict=strict)
+ bt = dm.dominance.bt().to_numpy().astype(str)
+ eq = dm.dominance.eq().to_numpy().astype(str)
+
+ annot = kwargs.pop("annot", True)
+ if annot:
+ annot = ""
+ for elem in [r"$\succ", bt, "$/$=", eq, "$"]:
+ annot = np.char.add(annot, elem)
+
+ kwargs.setdefault("cbar", False)
+ kwargs.setdefault("fmt", "")
+ ax = self._heatmap(dom, annot=annot, **kwargs)
+
+ return ax
+
+ def frontier(
+ self,
+ x,
+ y,
+ *,
+ strict=False,
+ ax=None,
+ legend=True,
+ scatter_kws=None,
+ line_kws=None,
+ ):
+ """Pareto frontier on two arbitrarily selected criteria.
+
+ A selection of an alternative of an $A_o$ is a pareto-optimal solution
+ when there is no other solution that selects an alternative that does
+ not belong to $A_o$ such that it improves on one objective without
+ worsening at least one of the others.
+
+ From this point of view, the concept is used to analyze the possible
+ optimal options of a solution given a variety of objectives or desires
+ and one or more evaluation criteria.
+
+ Given a "universe" of alternatives, one seeks to determine the set that
+ are Pareto efficient (i.e., those alternatives that satisfy the
+ condition of not being able to better satisfy one of those desires or
+ objectives without worsening some other). That set of optimal
+ alternatives establishes a "Pareto set" or the "Pareto Frontier".
+
+ The study of the solutions in the frontier allows designers to analyze
+ the possible alternatives within the established parameters, without
+ having to analyze the totality of possible solutions.
+
+ Parameters
+ ----------
+ x, y : str
+ Criteria names.
+ Variables that specify positions on the x and y axes.
+ weighted: bool, default ``False``
+ If its True the domination analysis is performed over the weighted
+ matrix.
+ strict: bool, default ``False``
+ If True, strict dominance is evaluated.
+ weighted: bool, default ``False``
+ If True, the weighted matrix is evaluated.
+ ax : :class:`matplotlib.axes.Axes`
+ Pre-existing axes for the plot. Otherwise, call
+ ``matplotlib.pyplot.gca`` internally.
+ legend : bool, default ``True``
+ If ``False``, no legend data is added and no legend is drawn.
+ scatter_kws: dict, default ``None``
+ Additional parameters passed to ``seaborn.scatterplot``.
+ scatter_kws: dict, default ``None``
+ Additional parameters passed to ``seaborn.lineplot``,
+ except for ``estimator`` and ``sort``.
+
+ Returns
+ -------
+ matplotlib.axes.Axes or numpy.ndarray of them
+
+ References
+ ----------
+ :cite:p:`enwiki:1107297090`
+ :cite:p:`enwiki:1110412520`
+
+ """
+ # cut the dmatrix to only the necesary criteria
+ sdm = self._dm[[x, y]]
+
+ # extract the matrix
+ df = sdm.matrix
+
+ # draw the scatterplot ================================================
+ scatter_kws = {} if scatter_kws is None else scatter_kws
+ scatter_kws.setdefault("ax", ax)
+ scatter_kws.setdefault("legend", legend)
+ ax = sns.scatterplot(x=x, y=y, data=df, hue=df.index, **scatter_kws)
+
+ # draw the frontier ===================================================
+ # Get the non dominated alternatives.
+ # This alternatives create the frontier
+ non_dominated = df[
+ ~sdm.dominance.dominated(strict=strict)
+ ].sort_values([x, y])
+
+ # if we only have one alternative in the frontier but we have more
+ # alternatives we draw a limit around all the dominated one.
+ if len(non_dominated) == 1 and len(sdm.alternatives) > 1:
+ non_dominated = pd.concat([non_dominated] * 3, ignore_index=True)
+
+ # esto cambia si x o y son a minimizar
+ obj_x, obj_y = sdm.objectives
+
+ non_dominated.iloc[0, 0] = (
+ df[x].min() if obj_x is Objective.MAX else df[x].max()
+ )
+ non_dominated.iloc[2, 1] = (
+ df[y].min() if obj_y is Objective.MAX else df[y].max()
+ )
+
+ # line style and frontier label
+ frontier_ls, frontier_lb = (
+ ("-", "Strict frontier") if strict else ("--", "Frontier")
+ )
+
+ # draw the line plot
+ line_kws = {} if line_kws is None else line_kws
+ line_kws.setdefault("alpha", 0.5)
+ line_kws.setdefault("linestyle", frontier_ls)
+ line_kws.setdefault("label", frontier_lb)
+ line_kws.setdefault("legend", legend)
+
+ sns.lineplot(
+ x=x,
+ y=y,
+ data=non_dominated,
+ estimator=None,
+ sort=False,
+ ax=ax,
+ **line_kws,
+ )
+
+ # Set the labels
+ xlabel, ylabel = self._get_criteria_labels(only=[x, y])
+ ax.set_xlabel(xlabel)
+ ax.set_ylabel(ylabel)
+
+ if legend:
+ handles, labels = ax.get_legend_handles_labels()
+ ax.legend(handles, labels, title="Alternatives")
+
return ax
diff --git a/skcriteria/core/stats.py b/skcriteria/core/stats.py
index 44ccf11..09defb9 100644
--- a/skcriteria/core/stats.py
+++ b/skcriteria/core/stats.py
@@ -29,24 +29,24 @@ class DecisionMatrixStatsAccessor(AccessorABC):
Kind of statistic to produce:
- 'corr' : Compute pairwise correlation of columns, excluding
- NA/null values.
+ NA/null values.
- 'cov' : Compute pairwise covariance of columns, excluding NA/null
- values.
+ values.
- 'describe' : Generate descriptive statistics.
- 'kurtosis' : Return unbiased kurtosis over requested axis.
- 'mad' : Return the mean absolute deviation of the values over the
- requested axis.
+ requested axis.
- 'max' : Return the maximum of the values over the requested axis.
- 'mean' : Return the mean of the values over the requested axis.
- 'median' : Return the median of the values over the requested
- axis.
+ axis.
- 'min' : Return the minimum of the values over the requested axis.
- 'pct_change' : Percentage change between the current and a prior
- element.
+ element.
- 'quantile' : Return values at the given quantile over requested
- axis.
+ axis.
- 'sem' : Return unbiased standard error of the mean over requested
- axis.
+ axis.
- 'skew' : Return unbiased skew over requested axis.
- 'std' : Return sample standard deviation over requested axis.
- 'var' : Return unbiased variance over requested axis.
@@ -59,7 +59,6 @@ class DecisionMatrixStatsAccessor(AccessorABC):
"cov",
"describe",
"kurtosis",
- "mad",
"max",
"mean",
"median",
@@ -88,3 +87,17 @@ def __dir__(self):
return super().__dir__() + [
e for e in dir(self._dm._data_df) if e in self._DF_WHITELIST
]
+
+ def mad(self, axis=0, skipna=True):
+ """Return the mean absolute deviation of the values over a given axis.
+
+ Parameters
+ ----------
+ axis : int
+ Axis for the function to be applied on.
+ skipna : bool, default True
+ Exclude NA/null values when computing the result.
+
+ """
+ df = self._dm._data_df
+ return (df - df.mean(axis=axis)).abs().mean(axis=axis, skipna=skipna)
diff --git a/skcriteria/datasets/__init__.py b/skcriteria/datasets/__init__.py
new file mode 100644
index 0000000..0efbe86
--- /dev/null
+++ b/skcriteria/datasets/__init__.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised))
+# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia
+# Copyright (c) 2022, QuatroPe
+# All rights reserved.
+
+# =============================================================================
+# DOCS
+# =============================================================================
+
+"""The :mod:`skcriteria.datasets` module includes utilities to load \
+datasets."""
+
+
+# =============================================================================
+# IMPORRTS
+# =============================================================================
+
+import json
+import os
+import pathlib
+
+from skcriteria.core.data import mkdm
+
+from .. import core
+
+# =============================================================================
+# CONSTANTS
+# =============================================================================
+
+_PATH = pathlib.Path(os.path.abspath(os.path.dirname(__file__)))
+
+# =============================================================================
+# FUNCTIONS
+# =============================================================================
+
+
+def load_simple_stock_selection():
+ """Simple stock selection decision matrix.
+
+ This matrix was designed primarily for teaching and evaluating the behavior
+ of an experiment.
+
+ Among the data we can find: two maximization criteria (ROE, CAP),
+ one minimization criterion (RI), dominated alternatives (FX), and
+ one alternative with an outlier criterion (ROE, MM = 1).
+
+ Although the criteria and alternatives are original from the authors of
+ Scikit-Criteria, the numerical values were extracted at some point from a
+ somewhere which we have forgotten.
+
+ Description:
+
+ In order to decide to buy a series of stocks, a company studied 5 candidate
+ investments: PE, JN, AA, FX, MM and GN. The finance department decides to
+ consider the following criteria for selection:
+
+ 1. ROE (Max): Return % for each monetary unit invested.
+ 2. CAP (Max): Years of market capitalization.
+ 3. RI (Min): Risk of the stock.
+
+ """
+ dm = core.mkdm(
+ matrix=[
+ [7, 5, 35],
+ [5, 4, 26],
+ [5, 6, 28],
+ [3, 4, 36],
+ [1, 7, 30],
+ [5, 8, 30],
+ ],
+ objectives=[max, max, min],
+ weights=[2, 4, 1],
+ alternatives=["PE", "JN", "AA", "FX", "MM", "GN"],
+ criteria=["ROE", "CAP", "RI"],
+ )
+ return dm
+
+
+def load_van2021evaluation(windows_size=7):
+ r"""Dataset extracted from from historical time series cryptocurrencies.
+
+ This dataset is extracted from::
+
+ Van Heerden, N., Cabral, J. y Luczywo, N. (2021). Evaluación de la
+ importancia de criterios para la selección de criptomonedas.
+ XXXIV ENDIO - XXXII EPIO Virtual 2021, Argentina.
+
+ The nine available alternatives are based on the ranking of the 20
+ cryptocurrencies with the largest market capitalization calculated on the
+ basis of circulating supply, according to information retrieved from
+ Cryptocurrency Historical Prices" retrieved on July 21st, 2021, from
+ there only the coins with complete data between October 9th, 2018 to July
+ 6th of 2021, excluding stable-coins, since they maintain a stable price and
+ therefore do not carry associated yields; the alternatives that met these
+ requirements turned out to be: Cardano (ADA), Binance coin (BNB),
+ Bitcoin (BTC), Dogecoin (DOGE), Ethereum (ETH), Chainlink (LINK),
+ Litecoin (LTC), Stellar (XLM) and Ripple (XRP).
+
+ Two decision matrices were created for two sizes of overlapping moving
+ windows: 7 and 15 days. Six criteria were defined on these windows that
+ seek to represent returns and risks:
+
+ - ``xRv`` - average Window return (:math:`\bar{x}RV`) - Maximize: is the
+ average of the differences between the closing price of the
+ cryptocurrency on the last day and the first day of each window, divided
+ by the price on the first day.
+ - ``sRV`` - window return deviation (:math:`sRV`) - Minimize: is the
+ standard deviation of window return. The greater the deviation, the
+ returns within the windows have higher variance and are unstable.
+ - ``xVV`` - average of the volume of the window (:math:`\bar{x}VV`) -
+ Maximize: it is the average of the summations of the transaction amount
+ of the cryptocurrency in dollars in each window, representing a liquidity
+ measure of the asset.
+ - ``sVV`` - window volume deviation (:math:`sVV`) - Minimize: it is the
+ deviation of the window volumes. The greater the deviation, the volumes
+ within the windows have higher variance and are unstable.
+ - ``xR2`` - mean of the correlation coefficient (:math:`\bar{x}R^2`) -
+ Maximize: it is the mean of the :math:`R^2` of the fit of the linear
+ trends with respect to the data. It is a measure that defines how well it
+ explains that linear trend to the data within the window.
+ - ``xm`` - mean of the slope (:math:`\bar{x}m`) - Maximize: it is the mean
+ of the slope of the linear trend between the closing prices in dollars
+ and the volumes traded in dollars of the cryptocurrency within each
+ window.
+
+ Parameters
+ ----------
+ windows_size: 7 o 15, default 7
+ If the decision matrix based on 7 or 15 day overlapping moving windows
+ is desired.
+
+
+ References
+ ----------
+ :cite:p:`van2021evaluation`
+ :cite:p:`van2021epio_evaluation`
+ :cite:p:`rajkumar_2021`
+
+ """
+ paths = {
+ 7: _PATH / "van2021evaluation" / "windows_size_7.json",
+ 15: _PATH / "van2021evaluation" / "windows_size_15.json",
+ }
+
+ path = paths.get(windows_size)
+ if path is None:
+ raise ValueError(
+ f"Windows size must be '7' or '15'. Found {windows_size!r}"
+ )
+
+ with open(path) as fp:
+ data = json.load(fp)
+
+ return mkdm(**data)
diff --git a/skcriteria/datasets/van2021evaluation/windows_size_15.json b/skcriteria/datasets/van2021evaluation/windows_size_15.json
new file mode 100644
index 0000000..cbbe34a
--- /dev/null
+++ b/skcriteria/datasets/van2021evaluation/windows_size_15.json
@@ -0,0 +1,109 @@
+{
+ "matrix": [
+ [
+ 0.072,
+ 0.274,
+ 17440000000.0,
+ 32880000000.0,
+ 0.281,
+ 3.806e-11
+ ],
+ [
+ 0.087,
+ 0.348,
+ 13160000000.0,
+ 23330000000.0,
+ 0.339,
+ 1.195e-08
+ ],
+ [
+ 0.036,
+ 0.159,
+ 450200000000.0,
+ 289400000000.0,
+ 0.237,
+ 2.192e-08
+ ],
+ [
+ 0.153,
+ 0.805,
+ 17770000000.0,
+ 52850000000.0,
+ 0.314,
+ 2.441e-12
+ ],
+ [
+ 0.055,
+ 0.213,
+ 214500000000.0,
+ 169500000000.0,
+ 0.239,
+ 2.52e-09
+ ],
+ [
+ 0.097,
+ 0.302,
+ 14440000000.0,
+ 27920000000.0,
+ 0.277,
+ 2.544e-09
+ ],
+ [
+ 0.034,
+ 0.207,
+ 54150000000.0,
+ 35570000000.0,
+ 0.28,
+ 2.679e-09
+ ],
+ [
+ 0.031,
+ 0.275,
+ 8951000000.0,
+ 11040000000.0,
+ 0.276,
+ 2.454e-11
+ ],
+ [
+ 0.037,
+ 0.292,
+ 49660000000.0,
+ 59500000000.0,
+ 0.26,
+ 9.236e-12
+ ]
+ ],
+ "objectives": [
+ 1, -1,
+ 1, -1,
+ 1,
+ 1
+ ],
+ "weights": [
+ 1.0,
+ 1.0,
+ 1.0,
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ "alternatives": [
+ "ADA",
+ "BNB",
+ "BTC",
+ "DOGE",
+ "ETH",
+ "LINK",
+ "LTC",
+ "XLM",
+ "XRP"
+ ],
+ "criteria": [
+ "xRV",
+ "sRV",
+ "xVV",
+ "sVV",
+ "xR2",
+ "xm"
+ ]
+}
\ No newline at end of file
diff --git a/skcriteria/datasets/van2021evaluation/windows_size_7.json b/skcriteria/datasets/van2021evaluation/windows_size_7.json
new file mode 100644
index 0000000..06f9950
--- /dev/null
+++ b/skcriteria/datasets/van2021evaluation/windows_size_7.json
@@ -0,0 +1,111 @@
+{
+ "objectives": [
+ "max",
+ "min",
+ "max",
+ "min",
+ "max",
+ "max"
+ ],
+ "weights": [
+ 1.0,
+ 1.0,
+ 1.0,
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ "alternatives": [
+ "ADA",
+ "BNB",
+ "BTC",
+ "DOGE",
+ "ETH",
+ "LINK",
+ "LTC",
+ "XLM",
+ "XRP"
+ ],
+ "criteria": [
+ "xRV",
+ "sRV",
+ "xVV",
+ "sVV",
+ "xR2",
+ "xm"
+ ],
+ "matrix": [
+ [
+ 0.029,
+ 0.156,
+ 8144000000.0,
+ 15860000000.0,
+ 0.312,
+ 1.821e-11
+ ],
+ [
+ 0.033,
+ 0.167,
+ 6141000000.0,
+ 11180000000.0,
+ 0.396,
+ 9.167e-09
+ ],
+ [
+ 0.015,
+ 0.097,
+ 209500000000.0,
+ 138800000000.0,
+ 0.281,
+ 1.254e-08
+ ],
+ [
+ 0.057,
+ 0.399,
+ 8287000000.0,
+ 27260000000.0,
+ 0.327,
+ 1.459e-12
+ ],
+ [
+ 0.023,
+ 0.127,
+ 100000000000.0,
+ 80540000000.0,
+ 0.313,
+ 1.737e-09
+ ],
+ [
+ 0.04,
+ 0.179,
+ 6707000000.0,
+ 16650000000.0,
+ 0.319,
+ 1.582e-09
+ ],
+ [
+ 0.015,
+ 0.134,
+ 25130000000.0,
+ 17310000000.0,
+ 0.32,
+ 1.816e-09
+ ],
+ [
+ 0.013,
+ 0.176,
+ 4157000000.0,
+ 5469000000.0,
+ 0.321,
+ 1.876e-11
+ ],
+ [
+ 0.014,
+ 0.164,
+ 23080000000.0,
+ 29240000000.0,
+ 0.322,
+ 7.996e-12
+ ]
+ ]
+}
\ No newline at end of file
diff --git a/skcriteria/madm/__init__.py b/skcriteria/madm/__init__.py
index 13b30fe..d8a26a2 100644
--- a/skcriteria/madm/__init__.py
+++ b/skcriteria/madm/__init__.py
@@ -11,10 +11,25 @@
"""MCDA methods."""
-from ._base import KernelResult, RankResult, ResultABC, SKCDecisionMakerABC
+
+# =============================================================================
+# IMPORTS
+# =============================================================================
+
+from ._madm_base import (
+ KernelResult,
+ RankResult,
+ ResultABC,
+ SKCDecisionMakerABC,
+)
# =============================================================================
# ALL
# =============================================================================
-__all__ = ["KernelResult", "RankResult", "ResultABC", "SKCDecisionMakerABC"]
+__all__ = [
+ "KernelResult",
+ "RankResult",
+ "ResultABC",
+ "SKCDecisionMakerABC",
+]
diff --git a/skcriteria/madm/_base.py b/skcriteria/madm/_madm_base.py
similarity index 61%
rename from skcriteria/madm/_base.py
rename to skcriteria/madm/_madm_base.py
index 8419e48..3484c5d 100644
--- a/skcriteria/madm/_base.py
+++ b/skcriteria/madm/_madm_base.py
@@ -97,22 +97,23 @@ class ResultABC(metaclass=abc.ABCMeta):
"""
- _skcriteria_result_column = None
+ _skcriteria_result_series = None
def __init_subclass__(cls):
"""Validate if the subclass are well formed."""
- result_column = cls._skcriteria_result_column
+ result_column = cls._skcriteria_result_series
if result_column is None:
- raise TypeError(f"{cls} must redefine '_skcriteria_result_column'")
+ raise TypeError(f"{cls} must redefine '_skcriteria_result_series'")
def __init__(self, method, alternatives, values, extra):
self._validate_result(values)
self._method = str(method)
self._extra = Bunch("extra", extra)
- self._result_df = pd.DataFrame(
+ self._result_series = pd.Series(
values,
- index=alternatives,
- columns=[self._skcriteria_result_column],
+ index=pd.Index(alternatives, name="Alternatives", copy=True),
+ name=self._skcriteria_result_series,
+ copy=True,
)
@abc.abstractmethod
@@ -127,7 +128,7 @@ def values(self):
The i-th value refers to the valuation of the i-th. alternative.
"""
- return self._result_df[self._skcriteria_result_column].to_numpy()
+ return self._result_series.to_numpy(copy=True)
@property
def method(self):
@@ -137,7 +138,7 @@ def method(self):
@property
def alternatives(self):
"""Names of the alternatives evaluated."""
- return self._result_df.index.to_numpy()
+ return self._result_series.index.to_numpy(copy=True)
@property
def extra_(self):
@@ -152,16 +153,24 @@ def extra_(self):
e_ = extra_
+ # UTILS ===================================================================
+
+ def to_series(self):
+ """The result as `pandas.Series`."""
+ series = self._result_series.copy(deep=True)
+ series.index = self._result_series.index.copy(deep=True)
+ return series
+
# CMP =====================================================================
@property
def shape(self):
- """Tuple with (number_of_alternatives, number_of_alternatives).
+ """Tuple with (number_of_alternatives, ).
rank.shape <==> np.shape(rank)
"""
- return np.shape(self._result_df)
+ return np.shape(self._result_series)
def __len__(self):
"""Return the number ot alternatives.
@@ -169,18 +178,119 @@ def __len__(self):
rank.__len__() <==> len(rank).
"""
- return len(self._result_df)
+ return len(self._result_series)
- def equals(self, other):
+ def values_equals(self, other):
"""Check if the alternatives and ranking are the same.
The method doesn't check the method or the extra parameters.
"""
return (self is other) or (
- isinstance(other, RankResult)
- and self._result_df.equals(other._result_df)
+ isinstance(other, type(self))
+ and self._result_series.equals(other._result_series)
+ )
+
+ def aequals(self, other, rtol=1e-05, atol=1e-08, equal_nan=False):
+ """Return True if the result are equal within a tolerance.
+
+ The tolerance values are positive, typically very small numbers. The
+ relative difference (`rtol` * abs(`b`)) and the absolute difference
+ `atol` are added together to compare against the absolute difference
+ between `a` and `b`.
+
+ NaNs are treated as equal if they are in the same place and if
+ ``equal_nan=True``. Infs are treated as equal if they are in the same
+ place and of the same sign in both arrays.
+
+ The proceeds as follows:
+
+ - If ``other`` is the same object return ``True``.
+ - If ``other`` is not instance of 'DecisionMatrix', has different shape
+ 'criteria', 'alternatives' or 'objectives' returns ``False``.
+ - Next check the 'weights' and the matrix itself using the provided
+ tolerance.
+
+ Parameters
+ ----------
+ other : Result
+ Other result to compare.
+ rtol : float
+ The relative tolerance parameter
+ (see Notes in :py:func:`numpy.allclose`).
+ atol : float
+ The absolute tolerance parameter
+ (see Notes in :py:func:`numpy.allclose`).
+ equal_nan : bool
+ Whether to compare NaN's as equal. If True, NaN's in dm will be
+ considered equal to NaN's in `other` in the output array.
+
+ Returns
+ -------
+ aequals : :py:class:`bool:py:class:`
+ Returns True if the two result are equal within the given
+ tolerance; False otherwise.
+
+ See Also
+ --------
+ equals, :py:func:`numpy.isclose`, :py:func:`numpy.all`,
+ :py:func:`numpy.any`, :py:func:`numpy.equal`,
+ :py:func:`numpy.allclose`.
+
+ """
+ if self is other:
+ return True
+ is_veq = self.values_equals(other) and set(self._extra) == set(
+ other._extra
)
+ keys = set(self._extra)
+ while is_veq and keys:
+ k = keys.pop()
+ sv = self._extra[k]
+ ov = other._extra[k]
+ if isinstance(ov, np.ndarray):
+ is_veq = is_veq and np.allclose(
+ sv,
+ ov,
+ rtol=rtol,
+ atol=atol,
+ equal_nan=equal_nan,
+ )
+ else:
+ is_veq = is_veq and sv == ov
+ return is_veq
+
+ def equals(self, other):
+ """Return True if the results are equal.
+
+ This method calls `aquals` without tolerance.
+
+ Parameters
+ ----------
+ other : :py:class:`skcriteria.DecisionMatrix`
+ Other instance to compare.
+
+ Returns
+ -------
+ equals : :py:class:`bool:py:class:`
+ Returns True if the two results are equals.
+
+ See Also
+ --------
+ aequals, :py:func:`numpy.isclose`, :py:func:`numpy.all`,
+ :py:func:`numpy.any`, :py:func:`numpy.equal`,
+ :py:func:`numpy.allclose`.
+
+ """
+ return self.aequals(other, 0, 0, False)
+
+ def __eq__(self, other):
+ """x.__eq__(y) <==> x == y."""
+ return self.equals(other)
+
+ def __ne__(self, other):
+ """x.__eq__(y) <==> x == y."""
+ return not self == other
# REPR ====================================================================
@@ -189,7 +299,7 @@ def __repr__(self):
kwargs = {"show_dimensions": False}
# retrieve the original string
- df = self._result_df.T
+ df = self._result_series.to_frame().T
original_string = df.to_string(**kwargs)
# add dimension
@@ -197,6 +307,26 @@ def __repr__(self):
return string
+ def _repr_html_(self):
+ """Return a html representation for a particular result.
+
+ Mainly for IPython notebook.
+
+ """
+ df = self._result_series.to_frame().T
+ original_html = df.style._repr_html_()
+ rtype = self._skcriteria_result_series.lower()
+
+ # add metadata
+ html = (
+ f"\n"
+ f"{original_html}"
+ f"Method: {self.method}\n"
+ "
"
+ )
+
+ return html
+
@doc_inherit(ResultABC, warn_class=False)
class RankResult(ResultABC):
@@ -207,7 +337,7 @@ class RankResult(ResultABC):
"""
- _skcriteria_result_column = "Rank"
+ _skcriteria_result_series = "Rank"
@doc_inherit(ResultABC._validate_result)
def _validate_result(self, values):
@@ -248,24 +378,16 @@ def untied_rank_(self):
return np.argsort(self.rank_) + 1
return self.rank_
- def _repr_html_(self):
- """Return a html representation for a particular result.
-
- Mainly for IPython notebook.
-
- """
- df = self._result_df.T
- original_html = df.style._repr_html_()
-
- # add metadata
- html = (
- "\n"
- f"{original_html}"
- f"Method: {self.method}\n"
- "
"
- )
-
- return html
+ def to_series(self, *, untied=False):
+ """The result as `pandas.Series`."""
+ if untied:
+ return pd.Series(
+ self.untied_rank_,
+ index=self._result_series.index.copy(deep=True),
+ copy=True,
+ name="Untied rank",
+ )
+ return super().to_series()
@doc_inherit(ResultABC, warn_class=False)
@@ -277,7 +399,7 @@ class KernelResult(ResultABC):
"""
- _skcriteria_result_column = "Kernel"
+ _skcriteria_result_series = "Kernel"
@doc_inherit(ResultABC._validate_result)
def _validate_result(self, values):
@@ -301,7 +423,7 @@ def kernel_where_(self):
@property
@deprecated(
- reason=("Use 'kernel_where_' instead"),
+ reason=("Use ``kernel_where_`` instead"),
version=0.7,
)
def kernelwhere_(self):
@@ -311,23 +433,6 @@ def kernelwhere_(self):
@property
def kernel_alternatives_(self):
"""Return the names of alternatives in the kernel."""
- return self._result_df.index[self._result_df.Kernel].to_numpy()
-
- def _repr_html_(self):
- """Return a html representation for a particular result.
-
- Mainly for IPython notebook.
-
- """
- df = self._result_df.T
- original_html = df._repr_html_()
-
- # add metadata
- html = (
- "\n"
- f"{original_html}"
- f"Method: {self.method}\n"
- "
"
+ return self._result_series.index[self._result_series].to_numpy(
+ copy=True
)
-
- return html
diff --git a/skcriteria/madm/electre.py b/skcriteria/madm/electre.py
index a9cb532..c2c52db 100644
--- a/skcriteria/madm/electre.py
+++ b/skcriteria/madm/electre.py
@@ -33,9 +33,9 @@
from scipy import stats
-from ._base import KernelResult, RankResult, SKCDecisionMakerABC
+from ._madm_base import KernelResult, RankResult, SKCDecisionMakerABC
from ..core import Objective
-from ..utils import doc_inherit
+from ..utils import doc_inherit, will_change
# =============================================================================
@@ -214,17 +214,18 @@ def weights_outrank(matrix, weights, objectives):
for a0_idx, a1_idx in alt_combs:
- # sacamos las alternativas
+ # select the two alternatives to compare
a0, a1 = matrix[[a0_idx, a1_idx]]
- # vemos donde hay maximos y donde hay minimos estrictos
+ # we see where there are strict maximums and minimums
maxs, mins = (a0 > a1), (a0 < a1)
- # armamos los vectores de a \succ b teniendo en cuenta los objetivs
+ # we assemble the vectors of a \succ b taking the
+ # objectives into account
a0_s_a1 = np.where(objectives == Objective.MAX.value, maxs, mins)
a1_s_a0 = np.where(objectives == Objective.MAX.value, mins, maxs)
- # sacamos ahora los criterios
+ # we now draw out the criteria
outrank[a0_idx, a1_idx] = np.sum(weights * a0_s_a1) >= np.sum(
weights * a1_s_a0
)
@@ -297,6 +298,10 @@ def _electre2_ranker(
return ranking
+@will_change(
+ reason="electre2 implementation will change in version after 0.8",
+ version=0.8,
+)
def electre2(
matrix, objectives, weights, p0=0.65, p1=0.5, p2=0.35, q0=0.65, q1=0.35
):
@@ -319,7 +324,7 @@ def electre2(
# TODO: remove loops
- # calculo del ranking directo
+ # calculation of direct and indirect ranking
ranking_direct = _electre2_ranker(
alt_n, outrank_s, outrank_w, invert_ranking=False
@@ -345,8 +350,12 @@ def electre2(
)
+@will_change(
+ reason="ELECTRE2 implementation will change in version after 0.8",
+ version=0.8,
+)
class ELECTRE2(SKCDecisionMakerABC):
- """Find the rankin solution through ELECTRE-2.
+ """Find the ranking solution through ELECTRE-2.
ELECTRE II was proposed by Roy and Bertier (1971-1973) to overcome ELECTRE
I's inability to produce a ranking of alternatives. Instead of simply
@@ -389,12 +398,12 @@ def __init__(self, *, p0=0.65, p1=0.5, p2=0.35, q0=0.65, q1=0.35):
if not (1 >= p0 >= p1 >= p2 >= 0):
raise ValueError(
"Condition '1 >= p0 >= p1 >= p2 >= 0' must be fulfilled. "
- "Found: p0={p0}, p1={p1} p2={p2}.'"
+ f"Found: p0={p0}, p1={p1} p2={p2}.'"
)
if not (1 >= q0 >= q1 >= 0):
raise ValueError(
"Condition '1 >= q0 >= q1 >= 0' must be fulfilled. "
- "Found: q0={q0}, q1={q1}.'"
+ f"Found: q0={q0}, q1={q1}.'"
)
self._p0, self._p1, self._p2, self._q0, self._q1 = (p0, p1, p2, q0, q1)
diff --git a/skcriteria/madm/moora.py b/skcriteria/madm/moora.py
index 4280ab2..cefe12b 100644
--- a/skcriteria/madm/moora.py
+++ b/skcriteria/madm/moora.py
@@ -20,7 +20,7 @@
import numpy as np
-from ._base import RankResult, SKCDecisionMakerABC
+from ._madm_base import RankResult, SKCDecisionMakerABC
from ..core import Objective
from ..utils import doc_inherit, rank
diff --git a/skcriteria/madm/similarity.py b/skcriteria/madm/similarity.py
index b3e60b1..82f867f 100644
--- a/skcriteria/madm/similarity.py
+++ b/skcriteria/madm/similarity.py
@@ -21,42 +21,11 @@
from scipy.spatial import distance
-from ._base import RankResult, SKCDecisionMakerABC
+from ._madm_base import RankResult, SKCDecisionMakerABC
from ..core import Objective
from ..utils import doc_inherit, rank
-# =============================================================================
-# CONSTANTS
-# =============================================================================
-
-_VALID_DISTANCES_METRICS = [
- "braycurtis",
- "canberra",
- "chebyshev",
- "cityblock",
- "correlation",
- "cosine",
- "dice",
- "euclidean",
- "hamming",
- "jaccard",
- "jensenshannon",
- "kulsinski",
- "mahalanobis",
- "matching",
- "minkowski",
- "rogerstanimoto",
- "russellrao",
- "seuclidean",
- "sokalmichener",
- "sokalsneath",
- "sqeuclidean",
- "wminkowski",
- "yule",
-]
-
-
# =============================================================================
# TOPSIS
# =============================================================================
@@ -72,8 +41,10 @@ def topsis(matrix, objectives, weights, metric="euclidean", **kwargs):
maxs = np.max(wmtx, axis=0)
# create the ideal and the anti ideal arrays
- ideal = np.where(objectives == Objective.MAX.value, maxs, mins)
- anti_ideal = np.where(objectives == Objective.MIN.value, maxs, mins)
+ where_max = np.equal(objectives, Objective.MAX.value)
+
+ ideal = np.where(where_max, maxs, mins)
+ anti_ideal = np.where(where_max, mins, maxs)
# calculate distances
d_better = distance.cdist(
@@ -136,8 +107,8 @@ class TOPSIS(SKCDecisionMakerABC):
def __init__(self, *, metric="euclidean"):
- if not callable(metric) and metric not in _VALID_DISTANCES_METRICS:
- metrics = ", ".join(f"'{m}'" for m in _VALID_DISTANCES_METRICS)
+ if not callable(metric) and metric not in distance._METRICS_NAMES:
+ metrics = ", ".join(f"'{m}'" for m in distance._METRICS_NAMES)
raise ValueError(
f"Invalid metric '{metric}'. Plese choose from: {metrics}"
)
diff --git a/skcriteria/madm/simple.py b/skcriteria/madm/simple.py
index 1f1a90a..c1508f8 100644
--- a/skcriteria/madm/simple.py
+++ b/skcriteria/madm/simple.py
@@ -18,7 +18,7 @@
import numpy as np
-from ._base import RankResult, SKCDecisionMakerABC
+from ._madm_base import RankResult, SKCDecisionMakerABC
from ..core import Objective
from ..utils import doc_inherit, rank
@@ -85,6 +85,8 @@ def _evaluate_data(self, matrix, weights, objectives, **kwargs):
raise ValueError(
"WeightedSumModel can't operate with minimize objective"
)
+ if np.any(matrix < 0):
+ raise ValueError("WeightedSumModel can't operate with values < 0")
rank, score = wsm(matrix, weights)
return rank, {"score": score}
diff --git a/skcriteria/madm/simus.py b/skcriteria/madm/simus.py
index fb3115b..e1e9028 100644
--- a/skcriteria/madm/simus.py
+++ b/skcriteria/madm/simus.py
@@ -20,7 +20,7 @@
import numpy as np
-from ._base import RankResult, SKCDecisionMakerABC
+from ._madm_base import RankResult, SKCDecisionMakerABC
from ..core import Objective
from ..preprocessing.scalers import scale_by_sum
from ..utils import doc_inherit, lp, rank
diff --git a/skcriteria/pipeline.py b/skcriteria/pipeline.py
index 9eb7a81..7e6ebf9 100644
--- a/skcriteria/pipeline.py
+++ b/skcriteria/pipeline.py
@@ -15,10 +15,8 @@
# IMPORTS
# =============================================================================
-from collections import Counter
-
from .core import SKCMethodABC
-from .utils import Bunch
+from .utils import Bunch, unique_names
# =============================================================================
@@ -34,10 +32,7 @@ class SKCPipeline(SKCMethodABC):
The final decision-maker only needs to implement `evaluate`.
The purpose of the pipeline is to assemble several steps that can be
- applied together while setting different parameters. A step's
- estimator may be replaced entirely by setting the parameter with its name
- to another dmaker or a transformer removed by setting it to
- `'passthrough'` or `None`.
+ applied together while setting different parameters.
Parameters
----------
@@ -61,34 +56,7 @@ def __init__(self, steps):
self._validate_steps(steps)
self._steps = steps
- @property
- def steps(self):
- """List of steps of the pipeline."""
- return list(self._steps)
-
- def __len__(self):
- """Return the length of the Pipeline."""
- return len(self.steps)
-
- def __getitem__(self, ind):
- """Return a sub-pipeline or a single step in the pipeline.
-
- Indexing with an integer will return an step; using a slice
- returns another Pipeline instance which copies a slice of this
- Pipeline. This copy is shallow: modifying steps in the sub-pipeline
- will affect the larger pipeline and vice-versa.
- However, replacing a value in `step` will not affect a copy.
-
- """
- if isinstance(ind, slice):
- if ind.step not in (1, None):
- raise ValueError("Pipeline slicing only supports a step of 1")
- return self.__class__(self.steps[ind])
- elif isinstance(ind, int):
- return self.steps[ind][-1]
- elif isinstance(ind, str):
- return self.named_steps[ind]
- raise KeyError(ind)
+ # INTERNALS ===============================================================
def _validate_steps(self, steps):
for name, step in steps[:-1]:
@@ -107,6 +75,13 @@ def _validate_steps(self, steps):
f"step '{name}' must implement 'evaluate()' method"
)
+ # PROPERTIES ==============================================================
+
+ @property
+ def steps(self):
+ """List of steps of the pipeline."""
+ return list(self._steps)
+
@property
def named_steps(self):
"""Dictionary-like object, with the following attributes.
@@ -117,6 +92,35 @@ def named_steps(self):
"""
return Bunch("steps", dict(self.steps))
+ # DUNDERS =================================================================
+
+ def __len__(self):
+ """Return the length of the Pipeline."""
+ return len(self._steps)
+
+ def __getitem__(self, ind):
+ """Return a sub-pipeline or a single step in the pipeline.
+
+ Indexing with an integer will return an step; using a slice
+ returns another Pipeline instance which copies a slice of this
+ Pipeline. This copy is shallow: modifying steps in the sub-pipeline
+ will affect the larger pipeline and vice-versa.
+ However, replacing a value in `step` will not affect a copy.
+
+ """
+ if isinstance(ind, slice):
+ if ind.step not in (1, None):
+ cname = type(self).__name__
+ raise ValueError(f"{cname} slicing only supports a step of 1")
+ return self.__class__(self.steps[ind])
+ elif isinstance(ind, int):
+ return self.steps[ind][-1]
+ elif isinstance(ind, str):
+ return self.named_steps[ind]
+ raise KeyError(ind)
+
+ # API =====================================================================
+
def evaluate(self, dm):
"""Run the all the transformers and the decision maker.
@@ -157,34 +161,10 @@ def transform(self, dm):
# =============================================================================
-# FUNCTIONS
+# FACTORY
# =============================================================================
-def _name_steps(steps):
- """Generate names for steps."""
- # Based on sklearn.pipeline._name_estimators
-
- steps = list(reversed(steps))
-
- names = [type(step).__name__.lower() for step in steps]
-
- name_count = {k: v for k, v in Counter(names).items() if v > 1}
-
- named_steps = []
- for name, step in zip(names, steps):
- count = name_count.get(name, 0)
- if count:
- name_count[name] = count - 1
- name = f"{name}_{count}"
-
- named_steps.append((name, step))
-
- named_steps.reverse()
-
- return named_steps
-
-
def mkpipe(*steps):
"""Construct a Pipeline from the given transformers and decision-maker.
@@ -201,8 +181,9 @@ def mkpipe(*steps):
Returns
-------
p : SKCPipeline
- Returns a scikit-learn :class:`SKCPipeline` object.
+ Returns a scikit-criteria :class:`SKCPipeline` object.
"""
- named_steps = _name_steps(steps)
+ names = [type(step).__name__.lower() for step in steps]
+ named_steps = unique_names(names=names, elements=steps)
return SKCPipeline(named_steps)
diff --git a/skcriteria/preprocessing/__init__.py b/skcriteria/preprocessing/__init__.py
index 24c1611..0542020 100644
--- a/skcriteria/preprocessing/__init__.py
+++ b/skcriteria/preprocessing/__init__.py
@@ -10,3 +10,19 @@
# =============================================================================
"""Multiple data transformation routines."""
+
+# =============================================================================
+# IMPORTS
+# =============================================================================
+
+from ._preprocessing_base import (
+ SKCMatrixAndWeightTransformerABC,
+ SKCTransformerABC,
+)
+
+
+# =============================================================================
+# ALL
+# =============================================================================
+
+__all__ = ["SKCTransformerABC", "SKCMatrixAndWeightTransformerABC"]
diff --git a/skcriteria/preprocessing/_preprocessing_base.py b/skcriteria/preprocessing/_preprocessing_base.py
new file mode 100644
index 0000000..9dc7f74
--- /dev/null
+++ b/skcriteria/preprocessing/_preprocessing_base.py
@@ -0,0 +1,168 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised))
+# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia
+# Copyright (c) 2022, QuatroPe
+# All rights reserved.
+
+# =============================================================================
+# DOCS
+# =============================================================================
+
+"""Core functionalities to create transformers."""
+
+# =============================================================================
+# IMPORTS
+# =============================================================================
+
+import abc
+
+from ..core import DecisionMatrix, SKCMethodABC
+from ..utils import doc_inherit
+
+
+# =============================================================================
+# SKCTransformer ABC
+# =============================================================================
+
+
+class SKCTransformerABC(SKCMethodABC):
+ """Abstract class for all transformer in scikit-criteria."""
+
+ _skcriteria_dm_type = "transformer"
+ _skcriteria_abstract_class = True
+
+ @abc.abstractmethod
+ def _transform_data(self, **kwargs):
+ """Apply the transformation logic to the decision matrix parameters.
+
+ Parameters
+ ----------
+ kwargs:
+ The decision matrix as separated parameters.
+
+ Returns
+ -------
+ :py:class:`dict`
+ A dictionary with all the values of the decision matrix
+ transformed.
+
+ """
+ raise NotImplementedError()
+
+ def transform(self, dm):
+ """Perform transformation on `dm`.
+
+ Parameters
+ ----------
+ dm: :py:class:`skcriteria.data.DecisionMatrix`
+ The decision matrix to transform.
+
+ Returns
+ -------
+ :py:class:`skcriteria.data.DecisionMatrix`
+ Transformed decision matrix.
+
+ """
+ data = dm.to_dict()
+
+ transformed_data = self._transform_data(**data)
+
+ transformed_dm = DecisionMatrix.from_mcda_data(**transformed_data)
+
+ return transformed_dm
+
+
+# =============================================================================
+# MATRIX & WEIGHTS TRANSFORMER
+# =============================================================================
+
+
+class SKCMatrixAndWeightTransformerABC(SKCTransformerABC):
+ """Transform weights and matrix together or independently.
+
+ The Transformer that implements this abstract class can be configured to
+ transform
+ `weights`, `matrix` or `both` so only that part of the DecisionMatrix
+ is altered.
+
+ This abstract class require to redefine ``_transform_weights`` and
+ ``_transform_matrix``, instead of ``_transform_data``.
+
+ """
+
+ _skcriteria_abstract_class = True
+ _skcriteria_parameters = ["target"]
+
+ _TARGET_WEIGHTS = "weights"
+ _TARGET_MATRIX = "matrix"
+ _TARGET_BOTH = "both"
+
+ def __init__(self, target):
+ if target not in (
+ self._TARGET_MATRIX,
+ self._TARGET_WEIGHTS,
+ self._TARGET_BOTH,
+ ):
+ raise ValueError(
+ f"'target' can only be '{self._TARGET_WEIGHTS}', "
+ f"'{self._TARGET_MATRIX}' or '{self._TARGET_BOTH}', "
+ f"found '{target}'"
+ )
+ self._target = target
+
+ @property
+ def target(self):
+ """Determine which part of the DecisionMatrix will be transformed."""
+ return self._target
+
+ @abc.abstractmethod
+ def _transform_weights(self, weights):
+ """Execute the transform method over the weights.
+
+ Parameters
+ ----------
+ weights: :py:class:`numpy.ndarray`
+ The weights to transform.
+
+ Returns
+ -------
+ :py:class:`numpy.ndarray`
+ The transformed weights.
+
+ """
+ raise NotImplementedError()
+
+ @abc.abstractmethod
+ def _transform_matrix(self, matrix):
+ """Execute the transform method over the matrix.
+
+ Parameters
+ ----------
+ matrix: :py:class:`numpy.ndarray`
+ The decision matrix to transform
+
+ Returns
+ -------
+ :py:class:`numpy.ndarray`
+ The transformed matrix.
+
+ """
+ raise NotImplementedError()
+
+ @doc_inherit(SKCTransformerABC._transform_data)
+ def _transform_data(self, matrix, weights, **kwargs):
+ transformed_mtx = matrix
+ transformed_weights = weights
+
+ if self._target in (self._TARGET_MATRIX, self._TARGET_BOTH):
+ transformed_mtx = self._transform_matrix(matrix)
+
+ if self._target in (self._TARGET_WEIGHTS, self._TARGET_BOTH):
+ transformed_weights = self._transform_weights(weights)
+
+ kwargs.update(
+ matrix=transformed_mtx, weights=transformed_weights, dtypes=None
+ )
+
+ return kwargs
diff --git a/skcriteria/preprocessing/distance.py b/skcriteria/preprocessing/distance.py
index 8ad7aca..254ff43 100644
--- a/skcriteria/preprocessing/distance.py
+++ b/skcriteria/preprocessing/distance.py
@@ -9,90 +9,43 @@
# DOCS
# =============================================================================
-"""Normalization through the distance to distance function."""
+"""Normalization through the distance to distance function.
+
+This entire module is deprecated.
+
+"""
# =============================================================================
# IMPORTS
# =============================================================================
-import numpy as np
+from . import scalers
+from ..utils import deprecated, doc_inherit
-from ..core import Objective, SKCTransformerABC
-from ..utils import doc_inherit
# =============================================================================
-# FUNCTIONS
+# CENIT DISTANCE
# =============================================================================
+_skc_prep_scalers = "skcriteria.preprocessing.scalers"
-def cenit_distance(matrix, objectives):
- r"""Calculate a scores with respect to an ideal and anti-ideal alternative.
-
- For every criterion :math:`f` of this multicriteria problem we define a
- membership function :math:`x_j` mapping the values of :math:`f_j` to the
- interval [0, 1].
-
- The result score :math:`x_{aj}`expresses the degree to which the
- alternative :math:`a` is close to the ideal value :math:`f_{j}^*`, which
- is the best performance in criterion , and far from the anti-ideal value
- :math:`f_{j^*}`, which is the worst performance in criterion :math:`j`.
- Both ideal and anti-ideal, are achieved by at least one of the alternatives
- under consideration.
-
- .. math::
-
- x_{aj} = \frac{f_j(a) - f_{j^*}}{f_{j}^* - f_{j^*}}
-
- """
- matrix = np.asarray(matrix, dtype=float)
-
- maxs = np.max(matrix, axis=0)
- mins = np.min(matrix, axis=0)
-
- where_max = np.equal(objectives, Objective.MAX.value)
-
- cenit = np.where(where_max, maxs, mins)
- nadir = np.where(where_max, mins, maxs)
-
- return (matrix - nadir) / (cenit - nadir)
+@deprecated(
+ reason=(
+ f"Use ``{_skc_prep_scalers}.matrix_scale_by_cenit_distance`` instead"
+ ),
+ version=0.8,
+)
+@doc_inherit(scalers.matrix_scale_by_cenit_distance)
+def cenit_distance(matrix, objectives):
+ return scalers.matrix_scale_by_cenit_distance(matrix, objectives)
-class CenitDistance(SKCTransformerABC):
- r"""Relative scores with respect to an ideal and anti-ideal alternative.
-
- For every criterion :math:`f` of this multicriteria problem we define a
- membership function :math:`x_j` mapping the values of :math:`f_j` to the
- interval [0, 1].
-
- The result score :math:`x_{aj}`expresses the degree to which the
- alternative :math:`a` is close to the ideal value :math:`f_{j}^*`, which
- is the best performance in criterion , and far from the anti-ideal value
- :math:`f_{j^*}`, which is the worst performance in criterion :math:`j`.
- Both ideal and anti-ideal, are achieved by at least one of the alternatives
- under consideration.
-
- .. math::
-
- x_{aj} = \frac{f_j(a) - f_{j^*}}{f_{j}^* - f_{j^*}}
-
-
- References
- ----------
- :cite:p:`diakoulaki1995determining`
-
- """
-
- _skcriteria_parameters = []
-
- @doc_inherit(SKCTransformerABC._transform_data)
- def _transform_data(self, matrix, objectives, **kwargs):
-
- distance_mtx = cenit_distance(matrix, objectives)
-
- dtypes = np.full(np.shape(objectives), float)
- kwargs.update(
- matrix=distance_mtx, objectives=objectives, dtypes=dtypes
- )
- return kwargs
+@deprecated(
+ reason=f"Use ``{_skc_prep_scalers}.CenitDistanceMatrixScaler`` instead",
+ version=0.8,
+)
+@doc_inherit(scalers.CenitDistanceMatrixScaler, warn_class=False)
+class CenitDistance(scalers.CenitDistanceMatrixScaler):
+ ...
diff --git a/skcriteria/preprocessing/filters.py b/skcriteria/preprocessing/filters.py
index fb418b9..ecf9b3b 100644
--- a/skcriteria/preprocessing/filters.py
+++ b/skcriteria/preprocessing/filters.py
@@ -21,7 +21,8 @@
import numpy as np
-from ..core import DecisionMatrix, SKCTransformerABC
+from ._preprocessing_base import SKCTransformerABC
+from ..core import DecisionMatrix
from ..utils import doc_inherit
# =============================================================================
@@ -486,12 +487,12 @@ class FilterNE(SKCArithmeticFilterABC):
@doc_inherit(SKCByCriteriaFilterABC, warn_class=False)
class SKCSetFilterABC(SKCByCriteriaFilterABC):
- """Provide a common behavior to make filters based on set operatopms.
+ """Provide a common behavior to make filters based on set operations.
This abstract class require to redefine ``_set_filter`` method, and this
will apply to each criteria separately.
- This class is designed to implement in general set comparision like
+ This class is designed to implement in general set comparison like
"inclusion" and "exclusion".
"""
diff --git a/skcriteria/preprocessing/impute.py b/skcriteria/preprocessing/impute.py
new file mode 100644
index 0000000..4445365
--- /dev/null
+++ b/skcriteria/preprocessing/impute.py
@@ -0,0 +1,475 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised))
+# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia
+# Copyright (c) 2022, QuatroPe
+# All rights reserved.
+
+# =============================================================================
+# DOCS
+# =============================================================================
+
+"""Module that provides multiple strategies for missing value imputation.
+
+The classes implemented here are a thin layer on top of the `sklearn.impute`
+module classes.
+
+"""
+
+
+# =============================================================================
+# IMPORTS
+# =============================================================================
+
+
+import abc
+
+import numpy as np
+
+import sklearn.impute as _sklimpute
+
+from ._preprocessing_base import SKCTransformerABC
+from ..utils import doc_inherit
+
+# =============================================================================
+# BASE CLASS
+# =============================================================================
+
+
+class SKCImputerABC(SKCTransformerABC):
+ """Abstract class capable of impute missing values of the matrix.
+
+ This abstract class require to redefine ``_impute``, instead of
+ ``_transform_data``.
+
+ """
+
+ _skcriteria_abstract_class = True
+
+ @abc.abstractmethod
+ def _impute(self, matrix):
+ """Impute the missing values.
+
+ Parameters
+ ----------
+ matrix: :py:class:`numpy.ndarray`
+ The decision matrix to weights.
+
+ Returns
+ -------
+ :py:class:`numpy.ndarray`
+ The imputed matrix.
+
+ """
+ raise NotImplementedError()
+
+ @doc_inherit(SKCTransformerABC._transform_data)
+ def _transform_data(self, matrix, **kwargs):
+ imputed_matrix = self._impute(matrix=matrix)
+ kwargs.update(matrix=imputed_matrix, dtypes=None)
+ return kwargs
+
+
+# =============================================================================
+# SIMPLE IMPUTER
+# =============================================================================
+
+
+class SimpleImputer(SKCImputerABC):
+ """Imputation transformer for completing missing values.
+
+ Internally this class uses the ``sklearn.impute.SimpleImputer`` class.
+
+ Parameters
+ ----------
+ missing_values : int, float, str, np.nan, None or pandas.NA, default=np.nan
+ The placeholder for the missing values. All occurrences of
+ `missing_values` will be imputed.
+
+ strategy : str, default='mean'
+ The imputation strategy.
+
+ - If "mean", then replace missing values using the mean along
+ each column. Can only be used with numeric data.
+ - If "median", then replace missing values using the median along
+ each column. Can only be used with numeric data.
+ - If "most_frequent", then replace missing using the most frequent
+ value along each column. Can be used with strings or numeric data.
+ If there is more than one such value, only the smallest is returned.
+ - If "constant", then replace missing values with fill_value. Can be
+ used with strings or numeric data.
+
+ fill_value : str or numerical value, default=None
+ When strategy == "constant", fill_value is used to replace all
+ occurrences of missing_values.
+ If left to the default, fill_value will be 0.
+
+ """
+
+ _skcriteria_parameters = ["missing_values", "strategy", "fill_value"]
+
+ def __init__(
+ self,
+ *,
+ missing_values=np.nan,
+ strategy="mean",
+ fill_value=None,
+ ):
+ self._missing_values = missing_values
+ self._strategy = strategy
+ self._fill_value = fill_value
+
+ # PROPERTIES ==============================================================
+
+ @property
+ def missing_values(self):
+ """The placeholder for the missing values."""
+ return self._missing_values
+
+ @property
+ def strategy(self):
+ """The imputation strategy."""
+ return self._strategy
+
+ @property
+ def fill_value(self):
+ """Used to replace all occurrences of missing_values, \
+ when strategy == "constant"."""
+ return self._fill_value
+
+ # THE IMPUTATION LOGIC ====================================================
+
+ @doc_inherit(SKCImputerABC._impute)
+ def _impute(self, matrix):
+ imputer = _sklimpute.SimpleImputer(
+ missing_values=self._missing_values,
+ strategy=self._strategy,
+ fill_value=self._fill_value,
+ )
+ imputed_matrix = imputer.fit_transform(matrix)
+ return imputed_matrix
+
+
+# =============================================================================
+# MULTIVARIATE
+# =============================================================================
+
+
+class IterativeImputer(SKCImputerABC):
+ """Multivariate imputer that estimates each feature from all the others.
+
+ A strategy for imputing missing values by modeling each feature with
+ missing values as a function of other features in a round-robin fashion.
+
+ Internally this class uses the ``sklearn.impute.IterativeImputer`` class.
+
+ This estimator is still **experimental** for now: the predictions
+ and the API might change without any deprecation cycle. To use it,
+ you need to explicitly import `enable_iterative_imputer`::
+
+ >>> # explicitly require this experimental feature
+ >>> from sklearn.experimental import enable_iterative_imputer # noqa
+ >>> # now you can import normally from sklearn.impute
+ >>> from skcriteria.preprocess.impute import IterativeImputer
+
+ Parameters
+ ----------
+ estimator : estimator object, default=BayesianRidge()
+ The estimator to use at each step of the round-robin imputation.
+ If `sample_posterior=True`, the estimator must support
+ `return_std` in its `predict` method.
+ missing_values : int or np.nan, default=np.nan
+ The placeholder for the missing values. All occurrences of
+ `missing_values` will be imputed.
+ sample_posterior : bool, default=False
+ Whether to sample from the (Gaussian) predictive posterior of the
+ fitted estimator for each imputation. Estimator must support
+ `return_std` in its `predict` method if set to `True`. Set to
+ `True` if using `IterativeImputer` for multiple imputations.
+ max_iter : int, default=10
+ Maximum number of imputation rounds to perform before returning the
+ imputations computed during the final round. A round is a single
+ imputation of each criteria with missing values. The stopping criterion
+ is met once `max(abs(X_t - X_{t-1}))/max(abs(X[known_vals])) < tol`,
+ where `X_t` is `X` at iteration `t`. Note that early stopping is only
+ applied if `sample_posterior=False`.
+ tol : float, default=1e-3
+ Tolerance of the stopping condition.
+ n_nearest_criteria : int, default=None
+ Number of other criteria to use to estimate the missing values of
+ each criteria column. Nearness between criteria is measured using
+ the absolute correlation coefficient between each criteria pair (after
+ initial imputation). To ensure coverage of criteria throughout the
+ imputation process, the neighbor criteria are not necessarily nearest,
+ but are drawn with probability proportional to correlation for each
+ imputed target criteria. Can provide significant speed-up when the
+ number of criteria is huge. If `None`, all criteria will be used.
+ initial_strategy : {'mean', 'median', 'most_frequent', 'constant'}, \
+ default='mean'
+ Which strategy to use to initialize the missing values. Same as the
+ `strategy` parameter in :class:`~sklearn.impute.SimpleImputer`.
+ imputation_order : {'ascending', 'descending', 'roman', 'arabic', \
+ 'random'}, default='ascending'
+ The order in which the criteria will be imputed. Possible values:
+
+ - `'ascending'`: From criteria with fewest missing values to most.
+ - `'descending'`: From criteria with most missing values to fewest.
+ - `'roman'`: Left to right.
+ - `'arabic'`: Right to left.
+ - `'random'`: A random order for each round.
+
+ min_value : float or array-like of shape (n_criteria,), default=-np.inf
+ Minimum possible imputed value. Broadcast to shape `(n_criteria,)` if
+ scalar. If array-like, expects shape `(n_criteria,)`, one min value for
+ each criteria. The default is `-np.inf`.
+ max_value : float or array-like of shape (n_criteria,), default=np.inf
+ Maximum possible imputed value. Broadcast to shape `(n_criteria,)` if
+ scalar. If array-like, expects shape `(n_criteria,)`, one max value for
+ each criteria. The default is `np.inf`.
+ verbose : int, default=0
+ Verbosity flag, controls the debug messages that are issued
+ as functions are evaluated. The higher, the more verbose. Can be 0, 1,
+ or 2.
+ random_state : int, RandomState instance or None, default=None
+ The seed of the pseudo random number generator to use. Randomizes
+ selection of estimator criteria if `n_nearest_criteria` is not `None`,
+ the `imputation_order` if `random`, and the sampling from posterior if
+ `sample_posterior=True`. Use an integer for determinism.
+
+ """
+
+ _skcriteria_parameters = [
+ "estimator",
+ "missing_values",
+ "sample_posterior",
+ "max_iter",
+ "tol",
+ "n_nearest_criteria",
+ "initial_strategy",
+ "imputation_order",
+ "min_value",
+ "max_value",
+ "verbose",
+ "random_state",
+ ]
+
+ def __init__(
+ self,
+ estimator=None,
+ *,
+ missing_values=np.nan,
+ sample_posterior=False,
+ max_iter=10,
+ tol=1e-3,
+ n_nearest_criteria=None,
+ initial_strategy="mean",
+ imputation_order="ascending",
+ skip_complete=False,
+ min_value=-np.inf,
+ max_value=np.inf,
+ verbose=0,
+ random_state=None,
+ ):
+ self._estimator = estimator
+ self._missing_values = missing_values
+ self._sample_posterior = sample_posterior
+ self._max_iter = max_iter
+ self._tol = tol
+ self._n_nearest_criteria = n_nearest_criteria
+ self._initial_strategy = initial_strategy
+ self._imputation_order = imputation_order
+ self._skip_complete = skip_complete
+ self._min_value = min_value
+ self._max_value = max_value
+ self._verbose = verbose
+ self._random_state = random_state
+
+ # PROPERTIES ==============================================================
+
+ @property
+ def estimator(self):
+ """Used at each step of the round-robin imputation."""
+ return self._estimator
+
+ @property
+ def missing_values(self):
+ """The placeholder for the missing values."""
+ return self._missing_values
+
+ @property
+ def sample_posterior(self):
+ """Whether to sample from the (Gaussian) predictive posterior of the \
+ fitted estimator for each imputation."""
+ return self._sample_posterior
+
+ @property
+ def max_iter(self):
+ """Maximum number of imputation rounds."""
+ return self._max_iter
+
+ @property
+ def tol(self):
+ """Tolerance of the stopping condition."""
+ return self._tol
+
+ @property
+ def n_nearest_criteria(self):
+ """Number of other criteria to use to estimate the missing values of \
+ each criteria column."""
+ return self._n_nearest_criteria
+
+ @property
+ def initial_strategy(self):
+ """Which strategy to use to initialize the missing values."""
+ return self._initial_strategy
+
+ @property
+ def imputation_order(self):
+ """The order in which the criteria will be imputed."""
+ return self._imputation_order
+
+ @property
+ def min_value(self):
+ """Minimum possible imputed value."""
+ return self._min_value
+
+ @property
+ def max_value(self):
+ """Maximum possible imputed value."""
+ return self._max_value
+
+ @property
+ def verbose(self):
+ """Verbosity flag, controls the debug messages that are issued as \
+ functions are evaluated."""
+ return self._verbose
+
+ @property
+ def random_state(self):
+ """The seed of the pseudo random number generator to use."""
+ return self._random_state
+
+ # THE IMPUTATION LOGIC ====================================================
+
+ @doc_inherit(SKCImputerABC._impute)
+ def _impute(self, matrix):
+
+ imputer = _sklimpute.IterativeImputer(
+ estimator=self._estimator,
+ missing_values=self._missing_values,
+ sample_posterior=self._sample_posterior,
+ max_iter=self._max_iter,
+ tol=self._tol,
+ n_nearest_features=self._n_nearest_criteria,
+ initial_strategy=self._initial_strategy,
+ imputation_order=self._imputation_order,
+ skip_complete=False, # is
+ min_value=self._min_value,
+ max_value=self._max_value,
+ verbose=self._verbose,
+ random_state=self._random_state,
+ )
+ imputed_matrix = imputer.fit_transform(matrix)
+ return imputed_matrix
+
+
+# =============================================================================
+# KNN
+# =============================================================================
+
+
+class KNNImputer(SKCImputerABC):
+ """Imputation for completing missing values using k-Nearest Neighbors.
+
+ Internally this class uses the ``sklearn.impute.KNNImputer`` class.
+
+ Each sample's missing values are imputed using the mean value from
+ `n_neighbors` nearest neighbors found in the training set.
+ Two samples are close if the criteria that neither is missing are close.
+
+ Parameters
+ ----------
+ missing_values : int, float, str, np.nan or None, default=np.nan
+ The placeholder for the missing values. All occurrences of
+ `missing_values` will be imputed.
+
+ n_neighbors : int, default=5
+ Number of neighboring samples to use for imputation.
+
+ weights : {'uniform', 'distance'} or callable, default='uniform'
+ Weight function used in prediction. Possible values:
+
+ - `'uniform'`: uniform weights. All points in each neighborhood are
+ weighted equally.
+ - `'distance'`: weight points by the inverse of their distance.
+ in this case, closer neighbors of a query point will have a
+ greater influence than neighbors which are further away.
+ - callable: a user-defined function which accepts an
+ array of distances, and returns an array of the same shape
+ containing the weights.
+
+ metric : {'nan_euclidean'} or callable, default='nan_euclidean'
+ Distance metric for searching neighbors. Possible values:
+
+ - 'nan_euclidean'
+ - callable : a user-defined function which conforms to the definition
+ of ``_pairwise_callable(X, Y, metric, **kwds)``. The function
+ accepts two arrays, X and Y, and a `missing_values` keyword in
+ `kwds` and returns a scalar distance value.
+
+ """
+
+ _skcriteria_parameters = [
+ "missing_values",
+ "n_neighbors",
+ "weights",
+ "metric",
+ ]
+
+ def __init__(
+ self,
+ *,
+ missing_values=np.nan,
+ n_neighbors=5,
+ weights="uniform",
+ metric="nan_euclidean",
+ ):
+ self._missing_values = missing_values
+ self._n_neighbors = n_neighbors
+ self._weights = weights
+ self._metric = metric
+
+ # PROPERTIES ==============================================================
+
+ @property
+ def missing_values(self):
+ """The placeholder for the missing values."""
+ return self._missing_values
+
+ @property
+ def n_neighbors(self):
+ """Number of neighboring samples to use for imputation."""
+ return self._n_neighbors
+
+ @property
+ def weights(self):
+ """Weight function used in prediction."""
+ return self._weights
+
+ @property
+ def metric(self):
+ """Distance metric for searching neighbors."""
+ return self._metric
+
+ # THE IMPUTATION LOGIC ====================================================
+
+ @doc_inherit(SKCImputerABC._impute)
+ def _impute(self, matrix):
+ imputer = _sklimpute.KNNImputer(
+ missing_values=self._missing_values,
+ n_neighbors=self._n_neighbors,
+ weights=self._weights,
+ metric=self._metric,
+ )
+ imputed_matrix = imputer.fit_transform(matrix)
+ return imputed_matrix
diff --git a/skcriteria/preprocessing/increment.py b/skcriteria/preprocessing/increment.py
index d3410a8..ce1b949 100644
--- a/skcriteria/preprocessing/increment.py
+++ b/skcriteria/preprocessing/increment.py
@@ -24,7 +24,7 @@
import numpy as np
-from ..core import SKCMatrixAndWeightTransformerABC
+from ._preprocessing_base import SKCMatrixAndWeightTransformerABC
from ..utils import doc_inherit
# =============================================================================
diff --git a/skcriteria/preprocessing/invert_objectives.py b/skcriteria/preprocessing/invert_objectives.py
index cd6585d..30eeee5 100644
--- a/skcriteria/preprocessing/invert_objectives.py
+++ b/skcriteria/preprocessing/invert_objectives.py
@@ -19,7 +19,8 @@
import numpy as np
-from ..core import Objective, SKCTransformerABC
+from ._preprocessing_base import SKCTransformerABC
+from ..core import Objective
from ..utils import deprecated, doc_inherit
@@ -133,7 +134,10 @@ def _invert(self, matrix, minimize_mask):
# DEPRECATED
# =============================================================================
@deprecated(
- reason="Use 'skcriteria.preprocessing.InvertMinimize' instead",
+ reason=(
+ "Use ``skcriteria.preprocessing.invert_objectives.InvertMinimize`` "
+ "instead"
+ ),
version=0.7,
)
class MinimizeToMaximize(InvertMinimize):
diff --git a/skcriteria/preprocessing/push_negatives.py b/skcriteria/preprocessing/push_negatives.py
index 763a8fd..81ecb27 100644
--- a/skcriteria/preprocessing/push_negatives.py
+++ b/skcriteria/preprocessing/push_negatives.py
@@ -22,7 +22,7 @@
import numpy as np
-from ..core import SKCMatrixAndWeightTransformerABC
+from ._preprocessing_base import SKCMatrixAndWeightTransformerABC
from ..utils import doc_inherit
# =============================================================================
diff --git a/skcriteria/preprocessing/scalers.py b/skcriteria/preprocessing/scalers.py
index 03e6c4f..124c115 100644
--- a/skcriteria/preprocessing/scalers.py
+++ b/skcriteria/preprocessing/scalers.py
@@ -9,7 +9,7 @@
# DOCS
# =============================================================================
-"""Functionalities for scale values based on differrent strategies.
+"""Functionalities for scale values based on different strategies.
In addition to the Transformers, a collection of an MCDA agnostic functions
are offered to scale an array along an arbitrary axis.
@@ -25,83 +25,217 @@
import numpy as np
from numpy import linalg
-from ..core import SKCMatrixAndWeightTransformerABC
-from ..utils import doc_inherit
+from sklearn import preprocessing as _sklpreproc
+
+from ._preprocessing_base import (
+ SKCMatrixAndWeightTransformerABC,
+ SKCTransformerABC,
+)
+from ..core import Objective
+from ..utils import deprecated, doc_inherit
+
+
+# =============================================================================
+# HELPER FUNCTION
+# =============================================================================
+
+
+def _run_sklearn_scaler(mtx_or_weights, scaler):
+ """Runs sklearn scalers against 1D (weights) or 2D (alternatives) \
+ arrays.
+
+ This function is in charge of verifying if the array provided has adequate
+ dimensions to work with the scikit-learn scalers.
+
+ It also ensures that the output has the same input dimensions.
+
+ """
+ ndims = np.ndim(mtx_or_weights)
+ if ndims == 1: # is a weights
+ mtx_or_weights = mtx_or_weights.reshape(len(mtx_or_weights), 1)
+ result = scaler.fit_transform(mtx_or_weights)
+ if ndims == 1:
+ result = result.flatten()
+ return result
+
# =============================================================================
# STANDAR SCALER
# =============================================================================
-def scale_by_stdscore(arr, axis=None):
- r"""Standardize the values by removing the mean and divided by the std-dev.
+class StandarScaler(SKCMatrixAndWeightTransformerABC):
+ """Standardize the dm by removing the mean and scaling to unit variance.
The standard score of a sample `x` is calculated as:
- .. math::
+ z = (x - u) / s
+
+ where `u` is the mean of the values, and `s` is the standard deviation
+ of the training samples or one if `with_std=False`.
- z = (x - \mu) / \sigma
+ This is a thin wrapper around ``sklearn.preprocessing.StandarScaler``.
Parameters
----------
- arr: :py:class:`numpy.ndarray` like.
- A array with values
- axis : :py:class:`int` optional
- Axis along which to operate. By default, flattened input is used.
+ with_mean : bool, default=True
+ If True, center the data before scaling.
- Returns
- -------
- :py:class:`numpy.ndarray`
- array of ratios
+ with_std : bool, default=True
+ If True, scale the data to unit variance (or equivalently, unit
+ standard deviation).
- Examples
- --------
- .. code-block:: pycon
+ """
- >>> from skcriteria.preprocess import scale_by_stdscore
- >>> mtx = [[1, 2], [3, 4]]
+ _skcriteria_parameters = ["target", "with_mean", "with_std"]
+
+ def __init__(self, target, *, with_mean=True, with_std=True):
+ super().__init__(target)
+ self._with_mean = bool(with_mean)
+ self._with_std = bool(with_std)
+
+ @property
+ def with_mean(self):
+ """True if the features will be center before scaling."""
+ return self._with_mean
+
+ @property
+ def with_std(self):
+ """True if the features will be scaled to the unit variance."""
+ return self._with_std
+
+ def _get_scaler(self):
+ return _sklpreproc.StandardScaler(
+ with_mean=self.with_mean,
+ with_std=self.with_std,
+ )
+
+ @doc_inherit(SKCMatrixAndWeightTransformerABC._transform_weights)
+ def _transform_weights(self, weights):
+ scaler = self._get_scaler()
+ return _run_sklearn_scaler(weights, scaler)
+
+ @doc_inherit(SKCMatrixAndWeightTransformerABC._transform_matrix)
+ def _transform_matrix(self, matrix):
+ scaler = self._get_scaler()
+ return _run_sklearn_scaler(matrix, scaler)
- # ratios with the max value of the array
- >>> scale_by_stdscore(mtx)
- array([[-1.34164079, -0.4472136 ],
- [ 0.4472136 , 1.34164079]])
- # ratios with the max value of the arr by column
- >>> scale_by_stdscore(mtx, axis=0)
- array([[-1., -1.],
- [ 1., 1.]])
+# =============================================================================
+# MINMAX
+# =============================================================================
- # ratios with the max value of the array by row
- >>> scale_by_stdscore(mtx, axis=1)
- array([[-1., 1.],
- [-1., 1.]])
+
+class MinMaxScaler(SKCMatrixAndWeightTransformerABC):
+ r"""Scaler based on the range.
+
+ The matrix transformation is given by::
+
+ X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))
+ X_scaled = X_std * (max - min) + min
+
+ And the weight transformation::
+
+ X_std = (X - X.min(axis=None)) / (X.max(axis=None) - X.min(axis=None))
+ X_scaled = X_std * (max - min) + min
+
+ If the scaler is configured to work with 'matrix' each value
+ of each criteria is divided by the range of that criteria.
+ In other hand if is configure to work with 'weights',
+ each value of weight is divided by the range the weights.
+
+ This is a thin wrapper around ``sklearn.preprocessing.MinMaxScaler``.
+
+ Parameters
+ ----------
+ criteria_range : tuple (min, max), default=(0, 1)
+ Desired range of transformed data.
+
+ clip : bool, default=False
+ Set to True to clip transformed values of held-out data to
+ provided `criteria_range`.
"""
- arr = np.asarray(arr, dtype=float)
- mean = np.mean(arr, axis=axis, keepdims=True)
- std = np.std(arr, axis=axis, keepdims=True)
- return (arr - mean) / std
+ _skcriteria_parameters = ["target", "clip", "criteria_range"]
-class StandarScaler(SKCMatrixAndWeightTransformerABC):
- """Standardize the dm by removing the mean and scaling to unit variance.
+ def __init__(self, target, *, clip=False, criteria_range=(0, 1)):
+ super().__init__(target)
+ self._clip = bool(clip)
+ self._cr_min, self._cr_max = map(float, criteria_range)
- The standard score of a sample `x` is calculated as:
+ @property
+ def clip(self):
+ """True if the transformed values will be clipped to held-out the \
+ value provided `criteria_range`."""
+ return self._clip
- z = (x - u) / s
+ @property
+ def criteria_range(self):
+ """Range of transformed data."""
+ return (self._cr_min, self._cr_max)
- where `u` is the mean of the values, and `s` is the standard deviation
- of the training samples or one if `with_std=False`.
+ def _get_scaler(self):
+ return _sklpreproc.MinMaxScaler(
+ clip=self.clip,
+ feature_range=self.criteria_range,
+ )
+
+ @doc_inherit(SKCMatrixAndWeightTransformerABC._transform_weights)
+ def _transform_weights(self, weights):
+ scaler = self._get_scaler()
+ return _run_sklearn_scaler(weights, scaler)
+
+ @doc_inherit(SKCMatrixAndWeightTransformerABC._transform_matrix)
+ def _transform_matrix(self, matrix):
+ scaler = self._get_scaler()
+ return _run_sklearn_scaler(matrix, scaler)
+
+
+# =============================================================================
+# MAX
+# =============================================================================
+
+
+class MaxAbsScaler(SKCMatrixAndWeightTransformerABC):
+ r"""Scaler based on the maximum values.
+
+ If the scaler is configured to work with 'matrix' each value
+ of each criteria is divided by the maximum value of that criteria.
+ In other hand if is configure to work with 'weights',
+ each value of weight is divided by the maximum value the weights.
+
+ This estimator scales and translates each criteria individually such that
+ the maximal absolute value of each criteria in the training set will be
+ 1.0. It does not shift/center the data, and thus does not destroy any
+ sparsity.
+
+ This is a thin wrapper around ``sklearn.preprocessing.MaxAbsScaler``.
"""
@doc_inherit(SKCMatrixAndWeightTransformerABC._transform_weights)
def _transform_weights(self, weights):
- return scale_by_stdscore(weights, axis=None)
+ scaler = _sklpreproc.MaxAbsScaler()
+ return _run_sklearn_scaler(weights, scaler)
@doc_inherit(SKCMatrixAndWeightTransformerABC._transform_matrix)
def _transform_matrix(self, matrix):
- return scale_by_stdscore(matrix, axis=0)
+ scaler = _sklpreproc.MaxAbsScaler()
+ return _run_sklearn_scaler(matrix, scaler)
+
+
+@deprecated(
+ reason="Use ``skcriteria.preprocessing.scalers.MaxAbsScaler`` instead",
+ version=0.8,
+)
+class MaxScaler(MaxAbsScaler):
+ r"""Scaler based on the maximum values.
+
+ From skcriteria >= 0.8 this is a thin wrapper around
+ ``sklearn.preprocessing.MaxAbsScaler``.
+
+ """
# =============================================================================
@@ -187,88 +321,6 @@ def _transform_matrix(self, matrix):
return scale_by_vector(matrix, axis=0)
-# =============================================================================
-# MINMAX
-# =============================================================================
-
-
-def scale_by_minmax(arr, axis=None):
- r"""Fraction of the range normalizer.
-
- Subtracts to each value of the array the minimum and then divides
- it by the total range.
-
- .. math::
-
- \overline{X}_{ij} =
- \frac{X_{ij} - \min{X_{ij}}}{\max_{X_{ij}} - \min_{X_{ij}}}
-
- Parameters
- ----------
- arr: :py:class:`numpy.ndarray` like.
- A array with values
- axis : :py:class:`int` optional
- Axis along which to operate. By default, flattened input is used.
-
- Returns
- -------
- :py:class:`numpy.ndarray`
- array of ratios
-
-
- Examples
- --------
- .. code-block:: pycon
-
- >>> from skcriteria.preprocess import scale_by_minmax
- >>> mtx = [[1, 2], [3, 4]]
-
- # ratios with the range of the array
- >>> scale_by_minmax(mtx)
- array([[0. , 0.33333333],
- [0.66666667, 1. ]])
-
- # ratios with the range by column
- >>> scale_by_minmax(mtx, axis=0)
- array([[0., 0.],
- [1., 1.]])
-
- # ratios with the range by row
- >>> scale_by_minmax(mtx, axis=1)
- array([[0., 1.],
- [0., 1.]])
-
- """
- arr = np.asarray(arr, dtype=float)
- minval = np.min(arr, axis=axis, keepdims=True)
- maxval = np.max(arr, axis=axis, keepdims=True)
- return (arr - minval) / (maxval - minval)
-
-
-class MinMaxScaler(SKCMatrixAndWeightTransformerABC):
- r"""Scaler based on the range.
-
- .. math::
-
- \overline{X}_{ij} =
- \frac{X_{ij} - \min{X_{ij}}}{\max_{X_{ij}} - \min_{X_{ij}}}
-
- If the scaler is configured to work with 'matrix' each value
- of each criteria is divided by the range of that criteria.
- In other hand if is configure to work with 'weights',
- each value of weight is divided by the range the weights.
-
- """
-
- @doc_inherit(SKCMatrixAndWeightTransformerABC._transform_weights)
- def _transform_weights(self, weights):
- return scale_by_minmax(weights, axis=None)
-
- @doc_inherit(SKCMatrixAndWeightTransformerABC._transform_matrix)
- def _transform_matrix(self, matrix):
- return scale_by_minmax(matrix, axis=0)
-
-
# =============================================================================
# SUM
# =============================================================================
@@ -345,75 +397,77 @@ def _transform_matrix(self, matrix):
# =============================================================================
-# MAX
+# CENIT DISTANCE
# =============================================================================
-def scale_by_max(arr, axis=None):
- r"""Divide of every value on the array by max value along an axis.
+def matrix_scale_by_cenit_distance(matrix, objectives):
+ r"""Calculate a scores with respect to an ideal and anti-ideal alternative.
- .. math::
+ For every criterion :math:`f` of this multicriteria problem we define a
+ membership function :math:`x_j` mapping the values of :math:`f_j` to the
+ interval [0, 1].
- \overline{X}_{ij} = \frac{X_{ij}}{\max_{X_{ij}}}
+ The result score :math:`x_{aj}`expresses the degree to which the
+ alternative :math:`a` is close to the ideal value :math:`f_{j}^*`, which
+ is the best performance in criterion , and far from the anti-ideal value
+ :math:`f_{j^*}`, which is the worst performance in criterion :math:`j`.
+ Both ideal and anti-ideal, are achieved by at least one of the alternatives
+ under consideration.
- Parameters
- ----------
- arr: :py:class:`numpy.ndarray` like.
- A array with values
- axis : :py:class:`int` optional
- Axis along which to operate. By default, flattened input is used.
+ .. math::
- Returns
- -------
- :py:class:`numpy.ndarray`
- array of ratios
+ x_{aj} = \frac{f_j(a) - f_{j^*}}{f_{j}^* - f_{j^*}}
- Examples
- --------
- .. code-block:: pycon
+ """
+ matrix = np.asarray(matrix, dtype=float)
- >>> from skcriteria.preprocess import scale_by_max
- >>> mtx = [[1, 2], [3, 4]]
+ maxs = np.max(matrix, axis=0)
+ mins = np.min(matrix, axis=0)
- # ratios with the max value of the array
- >>> scale_by_max(mtx)
- array([[ 0.25, 0.5 ],
- [ 0.75, 1. ]])
+ where_max = np.equal(objectives, Objective.MAX.value)
- # ratios with the max value of the arr by column
- >>> scale_by_max(mtx, axis=0)
- array([[ 0.33333334, 0.5],
- [ 1. , 1. ]])
+ cenit = np.where(where_max, maxs, mins)
+ nadir = np.where(where_max, mins, maxs)
- # ratios with the max value of the array by row
- >>> scale_by_max(mtx, axis=1)
- array([[ 0.5 , 1.],
- [ 0.75, 1.]])
+ return (matrix - nadir) / (cenit - nadir)
- """
- arr = np.asarray(arr, dtype=float)
- maxval = np.max(arr, axis=axis, keepdims=True)
- return arr / maxval
+class CenitDistanceMatrixScaler(SKCTransformerABC):
+ r"""Relative scores with respect to an ideal and anti-ideal alternative.
-class MaxScaler(SKCMatrixAndWeightTransformerABC):
- r"""Scaler based on the maximum values.
+ For every criterion :math:`f` of this multicriteria problem we define a
+ membership function :math:`x_j` mapping the values of :math:`f_j` to the
+ interval [0, 1].
+
+ The result score :math:`x_{aj}`expresses the degree to which the
+ alternative :math:`a` is close to the ideal value :math:`f_{j}^*`, which
+ is the best performance in criterion , and far from the anti-ideal value
+ :math:`f_{j^*}`, which is the worst performance in criterion :math:`j`.
+ Both ideal and anti-ideal, are achieved by at least one of the alternatives
+ under consideration.
.. math::
- \overline{X}_{ij} = \frac{X_{ij}}{\max_{X_{ij}}}
+ x_{aj} = \frac{f_j(a) - f_{j^*}}{f_{j}^* - f_{j^*}}
- If the scaler is configured to work with 'matrix' each value
- of each criteria is divided by the maximum value of that criteria.
- In other hand if is configure to work with 'weights',
- each value of weight is divided by the maximum value the weights.
+
+ References
+ ----------
+ :cite:p:`diakoulaki1995determining`
"""
- @doc_inherit(SKCMatrixAndWeightTransformerABC._transform_weights)
- def _transform_weights(self, weights):
- return scale_by_max(weights, axis=None)
+ _skcriteria_parameters = []
- @doc_inherit(SKCMatrixAndWeightTransformerABC._transform_matrix)
- def _transform_matrix(self, matrix):
- return scale_by_max(matrix, axis=0)
+ @doc_inherit(SKCTransformerABC._transform_data)
+ def _transform_data(self, matrix, objectives, **kwargs):
+
+ distance_mtx = matrix_scale_by_cenit_distance(matrix, objectives)
+
+ dtypes = np.full(np.shape(objectives), float)
+
+ kwargs.update(
+ matrix=distance_mtx, objectives=objectives, dtypes=dtypes
+ )
+ return kwargs
diff --git a/skcriteria/preprocessing/weighters.py b/skcriteria/preprocessing/weighters.py
index a280a9d..0c4371d 100644
--- a/skcriteria/preprocessing/weighters.py
+++ b/skcriteria/preprocessing/weighters.py
@@ -28,10 +28,14 @@
import scipy.stats
+from ._preprocessing_base import SKCTransformerABC
+from .scalers import matrix_scale_by_cenit_distance
+from ..core import Objective
+from ..utils import deprecated, doc_inherit
-from .distance import cenit_distance
-from ..core import Objective, SKCTransformerABC
-from ..utils import doc_inherit
+# =============================================================================
+# BASE CLASS
+# =============================================================================
class SKCWeighterABC(SKCTransformerABC):
@@ -159,7 +163,7 @@ def std_weights(matrix):
.. math::
- w_j = \frac{base\_value}{m}
+ w_j = \frac{s_j}{m}
Where $m$ is the number os columns/criteria in matrix.
@@ -184,7 +188,7 @@ def std_weights(matrix):
array([0.5, 0.5])
"""
- std = np.std(matrix, axis=0)
+ std = np.std(matrix, axis=0, ddof=1)
return std / np.sum(std)
@@ -235,7 +239,7 @@ class EntropyWeighter(SKCWeighterABC):
It uses the underlying ``scipy.stats.entropy`` function which assumes that
the values of the criteria are probabilities of a distribution.
- This transformer will normalize the criteria if they don’t sum to 1.
+ This transformer will normalize the criteria if they don't sum to 1.
See Also
--------
@@ -256,6 +260,10 @@ def _weight_matrix(self, matrix, **kwargs):
# =============================================================================
+@deprecated(
+ reason="Please use ``pd.DataFrame(arr.T).correlation('pearson')``",
+ version=0.8,
+)
def pearson_correlation(arr):
"""Return Pearson product-moment correlation coefficients.
@@ -282,6 +290,10 @@ def pearson_correlation(arr):
return np.corrcoef(arr)
+@deprecated(
+ reason="Please use ``pd.DataFrame(arr.T).correlation('spearman')``",
+ version=0.8,
+)
def spearman_correlation(arr):
"""Calculate a Spearman correlation coefficient.
@@ -308,22 +320,25 @@ def spearman_correlation(arr):
return scipy.stats.spearmanr(arr.T, axis=0).correlation
-def critic_weights(
- matrix, objectives, correlation=pearson_correlation, scale=True
-):
+def critic_weights(matrix, objectives, correlation="pearson", scale=True):
"""Execute the CRITIC method without any validation."""
matrix = np.asarray(matrix, dtype=float)
- matrix = cenit_distance(matrix, objectives=objectives) if scale else matrix
+ matrix = (
+ matrix_scale_by_cenit_distance(matrix, objectives=objectives)
+ if scale
+ else matrix
+ )
dindex = np.std(matrix, axis=0)
+ import pandas as pd
- corr_m1 = 1 - correlation(matrix.T)
+ corr_m1 = 1 - pd.DataFrame(matrix).corr(method=correlation).to_numpy()
uweights = dindex * np.sum(corr_m1, axis=0)
weights = uweights / np.sum(uweights)
return weights
-class Critic(SKCWeighterABC):
+class CRITIC(SKCWeighterABC):
"""CRITIC (CRiteria Importance Through Intercriteria Correlation).
The method aims at the determination of objective weights of relative
@@ -333,19 +348,21 @@ class Critic(SKCWeighterABC):
Parameters
----------
- correlation: str ["pearson" or "spearman"] or callable. (default "pearson")
+ correlation: str ["pearson", "spearman", "kendall"] or callable.
This is the correlation function used to evaluate the discordance
between two criteria. In other words, what conflict does one criterion
a criterion with respect to the decision made by the other criteria.
- By default the ``pearson`` correlation is used, and the ``kendall``
- correlation is also available implemented.
- It is also possible to provide a function that receives as a single
- parameter, the matrix of alternatives, and returns the correlation
- matrix.
+ By default the ``pearson`` correlation is used, and the ``spearman``
+ and ``kendall`` correlation is also available implemented.
+ It is also possible to provide a callable with input two 1d arrays
+ and returning a float. Note that the returned matrix from corr will
+ have 1 along the diagonals and will be symmetric regardless of the
+ callable's behavior
+
scale: bool (default ``True``)
True if it is necessary to scale the data with
- ``skcriteria.preprocesisng.cenit_distance`` prior to calculating the
- correlation
+ ``skcriteria.preprocessing.matrix_scale_by_cenit_distance`` prior
+ to calculating the correlation
Warnings
--------
@@ -360,19 +377,15 @@ class Critic(SKCWeighterABC):
"""
- CORRELATION = {
- "pearson": pearson_correlation,
- "spearman": spearman_correlation,
- }
+ CORRELATION = ("pearson", "spearman", "kendall")
_skcriteria_parameters = ["correlation", "scale"]
def __init__(self, correlation="pearson", scale=True):
- correlation_func = self.CORRELATION.get(correlation, correlation)
- if not callable(correlation_func):
+ if not (correlation in self.CORRELATION or callable(correlation)):
corr_keys = ", ".join(f"'{c}'" for c in self.CORRELATION)
- raise ValueError(f"Correlation must be {corr_keys} or callable")
- self._correlation = correlation_func
+ raise ValueError(f"Correlation must be {corr_keys} or a callable")
+ self._correlation = correlation
self._scale = bool(scale)
@@ -398,3 +411,12 @@ def _weight_matrix(self, matrix, objectives, **kwargs):
return critic_weights(
matrix, objectives, correlation=self.correlation, scale=self.scale
)
+
+
+@deprecated(
+ reason="Use ``skcriteria.preprocessing.weighters.CRITIC`` instead",
+ version=0.8,
+)
+@doc_inherit(CRITIC, warn_class=False)
+class Critic(CRITIC):
+ ...
diff --git a/skcriteria/utils/__init__.py b/skcriteria/utils/__init__.py
index 5d955a9..83575d6 100644
--- a/skcriteria/utils/__init__.py
+++ b/skcriteria/utils/__init__.py
@@ -18,7 +18,11 @@
from . import lp, rank
from .accabc import AccessorABC
from .bunch import Bunch
-from .decorators import deprecated, doc_inherit
+from .cmanagers import df_temporal_header
+from .deprecate import deprecated, will_change
+from .doctools import doc_inherit
+from .unames import unique_names
+
# =============================================================================
# ALL
@@ -28,8 +32,10 @@
"AccessorABC",
"doc_inherit",
"deprecated",
+ "df_temporal_header",
"rank",
"Bunch",
"lp",
- "dominance",
+ "unique_names",
+ "will_change",
]
diff --git a/skcriteria/utils/bunch.py b/skcriteria/utils/bunch.py
index 8396ba1..62a7ba1 100644
--- a/skcriteria/utils/bunch.py
+++ b/skcriteria/utils/bunch.py
@@ -16,8 +16,10 @@
# IMPORTS
# =============================================================================
+import copy
from collections.abc import Mapping
+
# =============================================================================
# DOC INHERITANCE
# =============================================================================
@@ -61,10 +63,32 @@ def __getitem__(self, k):
def __getattr__(self, a):
"""x.__getattr__(y) <==> x.y."""
try:
- return self[a]
+ return self._data[a]
except KeyError:
raise AttributeError(a)
+ def __copy__(self):
+ """x.__copy__() <==> copy.copy(x)."""
+ cls = type(self)
+ return cls(str(self._name), data=self._data)
+
+ def __deepcopy__(self, memo):
+ """x.__deepcopy__() <==> copy.copy(x)."""
+ # extract the class
+ cls = type(self)
+
+ # make the copy but without the data
+ clone = cls(name=str(self._name), data=None)
+
+ # store in the memo that clone is copy of self
+ # https://docs.python.org/3/library/copy.html
+ memo[id(self)] = clone
+
+ # now we copy the data
+ clone._data = copy.deepcopy(self._data, memo)
+
+ return clone
+
def __iter__(self):
"""x.__iter__() <==> iter(x)."""
return iter(self._data)
@@ -76,7 +100,7 @@ def __len__(self):
def __repr__(self):
"""x.__repr__() <==> repr(x)."""
content = repr(set(self._data)) if self._data else "{}"
- return f"{self._name}({content})"
+ return f"<{self._name} {content}>"
def __dir__(self):
"""x.__dir__() <==> dir(x)."""
diff --git a/skcriteria/utils/cmanagers.py b/skcriteria/utils/cmanagers.py
new file mode 100644
index 0000000..7a1684d
--- /dev/null
+++ b/skcriteria/utils/cmanagers.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised))
+# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia
+# Copyright (c) 2022, QuatroPe
+# All rights reserved.
+
+# =============================================================================
+# DOCS
+# =============================================================================
+
+"""Multiple context managers to use inside scikit-criteria."""
+
+# =============================================================================
+# IMPORTS
+# =============================================================================
+
+import contextlib
+
+# =============================================================================
+# FUNCTIONS
+# =============================================================================
+
+
+@contextlib.contextmanager
+def df_temporal_header(df, header, name=None):
+ """Temporarily replaces a DataFrame columns names.
+
+ Optionally also assign another name to the columns.
+
+ Parameters
+ ----------
+ header : sequence
+ The new names of the columns.
+ name : str or None (default None)
+ New name for the index containing the columns in the DataFrame. If
+ 'None' the original name of the columns present in the DataFrame is
+ preserved.
+
+ """
+ original_header = df.columns
+ original_name = original_header.name
+
+ name = original_name if name is None else name
+ try:
+ df.columns = header
+ df.columns.name = name
+ yield df
+ finally:
+ df.columns = original_header
+ df.columns.name = original_name
diff --git a/skcriteria/utils/deprecate.py b/skcriteria/utils/deprecate.py
new file mode 100644
index 0000000..28c0561
--- /dev/null
+++ b/skcriteria/utils/deprecate.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised))
+# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia
+# Copyright (c) 2022, QuatroPe
+# All rights reserved.
+
+# =============================================================================
+# DOCS
+# =============================================================================
+
+"""Multiple decorator to use inside scikit-criteria."""
+
+# =============================================================================
+# IMPORTS
+# =============================================================================
+
+from deprecated import deprecated as _deprecated
+
+
+# =============================================================================
+# CONSTANTS
+# =============================================================================
+
+# _ If the version of the warning is >= ERROR_GE the action is setted to
+# 'error', otherwise is 'once'.
+ERROR_GE = 1.0
+
+# =============================================================================
+# WARNINGS
+# =============================================================================
+
+
+class SKCriteriaDeprecationWarning(DeprecationWarning):
+ """Skcriteria deprecation warning."""
+
+
+class SKCriteriaFutureWarning(FutureWarning):
+ """Skcriteria future warning."""
+
+
+# =============================================================================
+# FUNCTIONS
+# =============================================================================
+
+DEPRECATION_DIRECTIVE = """
+{indent}.. deprecated:: {version}
+{indent} {reason}
+"""
+
+
+def _create_doc_with_deprecated_directive(text, *, reason, version):
+ # first let split the text in lines
+ lines = text.splitlines()
+
+ # the location is where in between lines we must insert the
+ # deprecation directive. By default "at the end"
+ location = len(lines)
+
+ # indentation is how much away from the margin (in number os spaces) we
+ # must insert the directive. By default n indentation is required
+ indentation = ""
+
+ # we iterate line by line
+ for idx, line in enumerate(lines):
+
+ line_stripped = line.strip()
+
+ # if we found a line full of "-" is a underline of the first section
+ # in numpy format.
+ # check: https://numpydoc.readthedocs.io/en/latest/format.html
+ if line_stripped and line_stripped.replace("-", "") == "":
+
+ # the the location of the directive is one line above the first
+ # section
+ location = idx - 2
+
+ # and the indentation is the number os white spaces on the left
+ indentation = " " * (len(line) - len(line.lstrip()))
+
+ break
+
+ # we create the directive here
+ directive = DEPRECATION_DIRECTIVE.format(
+ reason=reason, version=version, indent=indentation
+ )
+
+ # we insert the directive in the correct location
+ lines.insert(location, directive)
+
+ # recreate the doct with the directive
+ new_doc = "\n".join(lines)
+ return new_doc
+
+
+# =============================================================================
+# DECORATORS
+# =============================================================================
+
+
+def deprecated(*, reason, version):
+ """Mark functions, classes and methods as deprecated.
+
+ It will result in a warning being emitted when the object is called,
+ and the "deprecated" directive was added to the docstring.
+
+ Parameters
+ ----------
+ reason: str
+ Reason message which documents the deprecation in your library.
+ version: str
+ Version of your project which deprecates this feature.
+ If you follow the `Semantic Versioning `_,
+ the version number has the format "MAJOR.MINOR.PATCH".
+
+ Notes
+ -----
+ This decorator is a thin layer over
+ :py:func:`deprecated.deprecated`.
+
+ Check: __
+
+ """
+ add_warning = _deprecated(
+ reason=reason,
+ version=version,
+ category=SKCriteriaDeprecationWarning,
+ action=("error" if version >= ERROR_GE else "once"),
+ )
+
+ def _dec(func):
+ decorated_func = add_warning(func)
+ decorated_func.__doc__ = _create_doc_with_deprecated_directive(
+ func.__doc__, reason=reason, version=version
+ )
+ return decorated_func
+
+ return _dec
+
+
+def will_change(*, reason, version):
+ """Mark functions, classes and methods as "to be changed".
+
+ It will result in a warning being emitted when the object is called,
+ and the "deprecated" directive was added to the docstring.
+
+ Parameters
+ ----------
+ reason: str
+ Reason message which documents the "to be changed" in your library.
+ version: str
+ Version of your project which marks as this feature.
+ If you follow the `Semantic Versioning `_,
+ the version number has the format "MAJOR.MINOR.PATCH".
+
+ Notes
+ -----
+ This decorator is a thin layer over
+ :py:func:`deprecated.deprecated`.
+
+ Check: __
+
+ """
+ add_warning = _deprecated(
+ reason=reason,
+ version=version,
+ category=SKCriteriaFutureWarning,
+ action="once",
+ )
+
+ def _dec(func):
+ decorated_func = add_warning(func)
+ decorated_func.__doc__ = _create_doc_with_deprecated_directive(
+ func.__doc__, reason=reason, version=version
+ )
+ return decorated_func
+
+ return _dec
diff --git a/skcriteria/utils/decorators.py b/skcriteria/utils/doctools.py
similarity index 59%
rename from skcriteria/utils/decorators.py
rename to skcriteria/utils/doctools.py
index 27db58a..78b4981 100644
--- a/skcriteria/utils/decorators.py
+++ b/skcriteria/utils/doctools.py
@@ -20,8 +20,6 @@
from custom_inherit import doc_inherit as _doc_inherit
-from deprecated import deprecated as _deprecated
-
# =============================================================================
# DOC INHERITANCE
# =============================================================================
@@ -39,7 +37,7 @@ def doc_inherit(parent, warn_class=True):
parent : Union[str, Any]
The docstring, or object of which the docstring is utilized as the
parent docstring during the docstring merge.
- warn_class: bool
+ warn_class : bool
If it is true, and the decorated is a class, it throws a warning
since there are some issues with inheritance of documentation in
classes.
@@ -64,48 +62,3 @@ def _wrapper(obj):
return dec(obj)
return _wrapper
-
-
-# =============================================================================
-# Deprecation
-# =============================================================================
-
-
-class SKCriteriaDeprecationWarning(DeprecationWarning):
- """Skcriteria deprecation warning."""
-
-
-# _ If the version of the warning is >= ERROR_GE the action is setted to
-# 'error', otherwise is 'once'.
-ERROR_GE = 1.0
-
-
-def deprecated(*, reason, version):
- """Mark functions, classes and methods as deprecated.
-
- It will result in a warning being emitted when the object is called,
- and the "deprecated" directive was added to the docstring.
-
- Parameters
- ----------
- reason: str
- Reason message which documents the deprecation in your library.
- version: str
- Version of your project which deprecates this feature.
- If you follow the `Semantic Versioning `_,
- the version number has the format "MAJOR.MINOR.PATCH".
-
- Notes
- -----
- This decorator is a thin layer over
- :py:func:`deprecated.deprecated`.
-
- Check: __
-
- """
- return _deprecated(
- reason=reason,
- version=version,
- category=SKCriteriaDeprecationWarning,
- action=("error" if version >= ERROR_GE else "once"),
- )
diff --git a/skcriteria/utils/lp.py b/skcriteria/utils/lp.py
index a0ea501..9b10de2 100644
--- a/skcriteria/utils/lp.py
+++ b/skcriteria/utils/lp.py
@@ -26,7 +26,7 @@
import pulp
from .bunch import Bunch
-from .decorators import doc_inherit
+from .doctools import doc_inherit
# =============================================================================
diff --git a/skcriteria/utils/unames.py b/skcriteria/utils/unames.py
new file mode 100644
index 0000000..a7bc12f
--- /dev/null
+++ b/skcriteria/utils/unames.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised))
+# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia
+# Copyright (c) 2022, QuatroPe
+# All rights reserved.
+
+# =============================================================================
+# DOCS
+# =============================================================================
+
+"""Utility to achieve unique names for a collection of objects."""
+
+# =============================================================================
+# IMPORTS
+# =============================================================================
+
+from collections import Counter
+
+
+# =============================================================================
+# FUNCTIONS
+# =============================================================================
+
+
+def unique_names(*, names, elements):
+ """Generate names unique name.
+
+ Parameters
+ ----------
+ elements: iterable of size n
+ objects to be named
+ names: iterable of size n
+ names candidates
+
+ Returns
+ -------
+ list of tuples:
+ Returns a list where each element is a tuple.
+ Each tuple contains two elements: The first is the unique name of
+ the second.
+
+ """
+ # Based on sklearn.pipeline._name_estimators
+ if len(names) != len(elements):
+ raise ValueError("'names' and 'elements' must has same length")
+
+ names = list(reversed(names))
+ elements = list(reversed(elements))
+
+ name_count = {k: v for k, v in Counter(names).items() if v > 1}
+
+ named_elements = []
+ for name, step in zip(names, elements):
+ count = name_count.get(name, 0)
+ if count:
+ name_count[name] = count - 1
+ name = f"{name}_{count}"
+
+ named_elements.append((name, step))
+
+ named_elements.reverse()
+
+ return named_elements
diff --git a/tests/cmp/test_ranks_cmp.py b/tests/cmp/test_ranks_cmp.py
new file mode 100644
index 0000000..b177630
--- /dev/null
+++ b/tests/cmp/test_ranks_cmp.py
@@ -0,0 +1,550 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# License: BSD-3 (https://tldrlegal.com/license/bsd-3-clause-license-(revised))
+# Copyright (c) 2016-2021, Cabral, Juan; Luczywo, Nadia
+# Copyright (c) 2022, QuatroPe
+# All rights reserved.
+
+# =============================================================================
+# DOCS
+# =============================================================================
+
+"""test for skcriteria.cmp.ranks_cmp
+
+"""
+
+
+# =============================================================================
+# IMPORTS
+# =============================================================================
+
+from matplotlib.testing.decorators import check_figures_equal
+
+import numpy as np
+
+import pandas as pd
+
+import pytest
+
+import seaborn as sns
+
+from skcriteria import madm
+from skcriteria.cmp import ranks_cmp
+
+# =============================================================================
+# TESTS
+# =============================================================================
+
+
+def test_Ranks_only_one_rank():
+ rank = madm.RankResult("test", ["a"], [1], {})
+ with pytest.raises(ValueError):
+ ranks_cmp.mkrank_cmp(rank)
+
+
+def test_Ranks_name_not_str():
+ rank = madm.RankResult("test", ["a"], [1], {})
+ with pytest.raises(ValueError):
+ ranks_cmp.RanksComparator([("a", rank), (1, rank)])
+
+
+def test_Ranks_not_rank_result():
+ rank = madm.RankResult("test", ["a"], [1], {})
+ with pytest.raises(TypeError):
+ ranks_cmp.RanksComparator([("a", rank), ("b", None)])
+
+
+def test_Ranks_duplicated_names():
+ rank = madm.RankResult("test", ["a"], [1], {})
+ with pytest.raises(ValueError):
+ ranks_cmp.RanksComparator([("a", rank), ("a", rank)])
+
+
+def test_RanksComparator_missing_alternatives():
+ rank0 = madm.RankResult("test", ["a"], [1], {})
+ rank1 = madm.RankResult("test", ["a", "b"], [1, 2], {})
+ with pytest.raises(ValueError):
+ ranks_cmp.mkrank_cmp(rank0, rank1)
+
+
+def test_RanksComparator_repr():
+ rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rcmp = ranks_cmp.mkrank_cmp(rank0, rank1)
+ assert repr(rcmp) == ""
+
+
+@pytest.mark.parametrize("untied", [True, False])
+def test_RanksComparator_to_dataframe(untied):
+ rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ df = ranks_cmp.mkrank_cmp(rank0, rank1).to_dataframe(untied=untied)
+
+ expected = pd.DataFrame.from_dict(
+ {
+ "test_1": {"a": 1, "b": 2 if untied else 1},
+ "test_2": {"a": 1, "b": 2 if untied else 1},
+ }
+ )
+
+ expected.columns.name = "Method"
+ expected.index.name = "Alternatives"
+
+ pd.testing.assert_frame_equal(df, expected)
+
+
+@pytest.mark.parametrize("untied", [True, False])
+def test_RanksComparator_cov(untied):
+ rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ cov = ranks_cmp.mkrank_cmp(rank0, rank1).cov(untied=untied)
+
+ expected = pd.DataFrame.from_dict(
+ {
+ "test_1": {"test_1": 0.5, "test_2": 0.5}
+ if untied
+ else {"test_1": 0.0, "test_2": 0.0},
+ "test_2": {"test_1": 0.5, "test_2": 0.5}
+ if untied
+ else {"test_1": 0.0, "test_2": 0.0},
+ },
+ )
+
+ expected.columns.name = "Method"
+ expected.index.name = "Method"
+
+ pd.testing.assert_frame_equal(cov, expected)
+
+
+@pytest.mark.parametrize("untied", [True, False])
+def test_RanksComparator_corr(untied):
+ rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ corr = ranks_cmp.mkrank_cmp(rank0, rank1).corr(untied=untied)
+
+ expected = pd.DataFrame.from_dict(
+ {
+ "test_1": {"test_1": 1.0, "test_2": 1.0}
+ if untied
+ else {"test_1": np.nan, "test_2": np.nan},
+ "test_2": {"test_1": 1.0, "test_2": 1.0}
+ if untied
+ else {"test_1": np.nan, "test_2": np.nan},
+ },
+ )
+
+ expected.columns.name = "Method"
+ expected.index.name = "Method"
+
+ pd.testing.assert_frame_equal(corr, expected)
+
+
+@pytest.mark.parametrize("untied", [True, False])
+def test_RanksComparator_r2_score(untied):
+ rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ r2 = ranks_cmp.mkrank_cmp(rank0, rank1).r2_score(untied=untied)
+
+ expected = pd.DataFrame.from_dict(
+ {
+ "test_1": {"test_1": 1.0, "test_2": 1.0},
+ "test_2": {"test_1": 1.0, "test_2": 1.0},
+ },
+ )
+
+ expected.columns.name = "Method"
+ expected.index.name = "Method"
+
+ pd.testing.assert_frame_equal(r2, expected)
+
+
+@pytest.mark.parametrize("untied", [True, False])
+def test_RanksComparator_distance(untied):
+ rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ dis = ranks_cmp.mkrank_cmp(rank0, rank1).distance(untied=untied)
+
+ expected = pd.DataFrame.from_dict(
+ {
+ "test_1": {"test_1": 0.0, "test_2": 0.0},
+ "test_2": {"test_1": 0.0, "test_2": 0.0},
+ },
+ )
+
+ expected.columns.name = "Method"
+ expected.index.name = "Method"
+
+ pd.testing.assert_frame_equal(dis, expected)
+
+
+def test_RanksComparator_len():
+ rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ assert len(ranks_cmp.mkrank_cmp(rank0, rank1)) == 2
+
+
+def test_RanksComparator_getitem():
+ rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rcmp = ranks_cmp.mkrank_cmp(rank0, rank1)
+ copy = rcmp[0:]
+
+ assert rank0 == rcmp["test_1"] == rcmp[0] == copy[0]
+ assert rank1 == rcmp["test_2"] == rcmp[1] == copy[1]
+
+ with pytest.raises(ValueError):
+ rcmp[0::2]
+
+ with pytest.raises(KeyError):
+ rcmp[object]
+
+
+def test_RanksComparator_hash():
+ rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rcmp = ranks_cmp.mkrank_cmp(rank0, rank1)
+ assert id(rcmp) == hash(rcmp)
+
+
+def test_RanksComparator_plot():
+ rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rcmp = ranks_cmp.mkrank_cmp(rank0, rank1)
+
+ assert isinstance(rcmp.plot, ranks_cmp.RanksComparatorPlotter)
+ assert rcmp.plot._ranks_cmp is rcmp
+
+
+# =============================================================================
+# RanksComparatorPlotter
+# =============================================================================
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("untied", [True, False])
+@check_figures_equal()
+def test_RanksComparatorPlotter_flow(fig_test, fig_ref, untied):
+ test_ax = fig_test.subplots()
+
+ rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rcmp = ranks_cmp.mkrank_cmp(rank0, rank1)
+
+ rcmp.plot.flow(ax=test_ax, untied=untied)
+
+ # EXPECTED
+ exp_ax = fig_ref.subplots()
+
+ expected = pd.DataFrame.from_dict(
+ {
+ "test_1": {"a": 1, "b": 2 if untied else 1},
+ "test_2": {"a": 1, "b": 2 if untied else 1},
+ }
+ )
+ expected.columns.name = "Method"
+ expected.index.name = "Alternatives"
+
+ sns.lineplot(data=expected.T, estimator=None, sort=False, ax=exp_ax)
+ exp_ax.grid(alpha=0.3)
+
+ exp_ax.set_ylabel(ranks_cmp.RANKS_LABELS[untied])
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("untied", [True, False])
+@check_figures_equal()
+def test_RanksComparatorPlotter_reg(fig_test, fig_ref, untied):
+ test_ax = fig_test.subplots()
+
+ rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rcmp = ranks_cmp.mkrank_cmp(rank0, rank1)
+
+ rcmp.plot.reg(ax=test_ax, untied=untied)
+
+ # EXPECTED
+ exp_ax = fig_ref.subplots()
+
+ expected = pd.DataFrame.from_dict(
+ {
+ "test_1": {"a": 1, "b": 2 if untied else 1},
+ "test_2": {"a": 1, "b": 2 if untied else 1},
+ }
+ )
+ expected.columns.name = "Method"
+ expected.index.name = "Alternatives"
+
+ sns.regplot(
+ x="test_1",
+ y="test_2",
+ data=expected,
+ label="x=test_1, y=test_2 - $R^2=1$",
+ ax=exp_ax,
+ )
+
+ ranks_label = ranks_cmp.RANKS_LABELS[untied]
+ exp_ax.set(xlabel=f"'x' {ranks_label}", ylabel=f"'y' {ranks_label}")
+
+ exp_ax.legend()
+
+
+@pytest.mark.parametrize("untied", [True, False])
+def test_RanksComparatorPlotter_reg_unexpected_keyword_argument_color(untied):
+ rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rcmp = ranks_cmp.mkrank_cmp(rank0, rank1)
+
+ with pytest.raises(TypeError):
+ rcmp.plot.reg(color="k", untied=untied)
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("untied", [True, False])
+@check_figures_equal()
+def test_RanksComparatorPlotter_heatmap(fig_test, fig_ref, untied):
+ test_ax = fig_test.subplots()
+
+ rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rcmp = ranks_cmp.mkrank_cmp(rank0, rank1)
+
+ rcmp.plot.heatmap(ax=test_ax, untied=untied)
+
+ # EXPECTED
+ exp_ax = fig_ref.subplots()
+
+ expected = pd.DataFrame.from_dict(
+ {
+ "test_1": {"a": 1, "b": 2 if untied else 1},
+ "test_2": {"a": 1, "b": 2 if untied else 1},
+ }
+ )
+ expected.columns.name = "Method"
+ expected.index.name = "Alternatives"
+
+ sns.heatmap(
+ expected,
+ annot=True,
+ cbar_kws={"label": ranks_cmp.RANKS_LABELS[untied]},
+ ax=exp_ax,
+ )
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("untied", [True, False])
+@check_figures_equal()
+def test_RanksComparatorPlotter_corr(fig_test, fig_ref, untied):
+ test_ax = fig_test.subplots()
+
+ rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rcmp = ranks_cmp.mkrank_cmp(rank0, rank1)
+
+ rcmp.plot.corr(ax=test_ax, untied=untied)
+
+ # EXPECTED
+ exp_ax = fig_ref.subplots()
+
+ expected = pd.DataFrame.from_dict(
+ {
+ "test_1": {"a": 1, "b": 2 if untied else 1},
+ "test_2": {"a": 1, "b": 2 if untied else 1},
+ }
+ )
+ expected.columns.name = "Method"
+ expected.index.name = "Alternatives"
+
+ sns.heatmap(
+ expected.corr(),
+ annot=True,
+ cbar_kws={"label": "Correlation"},
+ ax=exp_ax,
+ )
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("untied", [True, False])
+@check_figures_equal()
+def test_RanksComparatorPlotter_cov(fig_test, fig_ref, untied):
+ test_ax = fig_test.subplots()
+
+ rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rcmp = ranks_cmp.mkrank_cmp(rank0, rank1)
+
+ rcmp.plot.cov(ax=test_ax, untied=untied)
+
+ # EXPECTED
+ exp_ax = fig_ref.subplots()
+
+ expected = pd.DataFrame.from_dict(
+ {
+ "test_1": {"a": 1, "b": 2 if untied else 1},
+ "test_2": {"a": 1, "b": 2 if untied else 1},
+ }
+ )
+ expected.columns.name = "Method"
+ expected.index.name = "Alternatives"
+
+ sns.heatmap(
+ expected.cov(),
+ annot=True,
+ cbar_kws={"label": "Covariance"},
+ ax=exp_ax,
+ )
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("untied", [True, False])
+@check_figures_equal()
+def test_RanksComparatorPlotter_r2_score(fig_test, fig_ref, untied):
+ test_ax = fig_test.subplots()
+
+ rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rcmp = ranks_cmp.mkrank_cmp(rank0, rank1)
+
+ rcmp.plot.r2_score(ax=test_ax, untied=untied)
+
+ # EXPECTED
+ exp_ax = fig_ref.subplots()
+
+ expected = pd.DataFrame.from_dict(
+ {
+ "test_1": {"test_1": 1.0, "test_2": 1.0},
+ "test_2": {"test_1": 1.0, "test_2": 1.0},
+ },
+ )
+ expected.columns.name = "Method"
+ expected.index.name = "Method"
+
+ sns.heatmap(
+ expected,
+ annot=True,
+ cbar_kws={"label": "$R^2$"},
+ ax=exp_ax,
+ )
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("untied", [True, False])
+@check_figures_equal()
+def test_RanksComparatorPlotter_distance(fig_test, fig_ref, untied):
+ test_ax = fig_test.subplots()
+
+ rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rcmp = ranks_cmp.mkrank_cmp(rank0, rank1)
+
+ rcmp.plot.distance(ax=test_ax, untied=untied)
+
+ # EXPECTED
+ exp_ax = fig_ref.subplots()
+
+ expected = pd.DataFrame.from_dict(
+ {
+ "test_1": {"test_1": 0, "test_2": 0},
+ "test_2": {"test_1": 0, "test_2": 0},
+ },
+ )
+ expected.columns.name = "Method"
+ expected.index.name = "Method"
+
+ sns.heatmap(
+ expected,
+ annot=True,
+ cbar_kws={"label": "Hamming distance"},
+ ax=exp_ax,
+ )
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("untied", [True, False])
+@pytest.mark.parametrize("orient", ["v", "h"])
+@check_figures_equal()
+def test_RanksComparatorPlotter_box(fig_test, fig_ref, untied, orient):
+ test_ax = fig_test.subplots()
+
+ rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rcmp = ranks_cmp.mkrank_cmp(rank0, rank1)
+
+ rcmp.plot.box(ax=test_ax, orient=orient, untied=untied)
+
+ # EXPECTED
+ exp_ax = fig_ref.subplots()
+
+ expected = pd.DataFrame.from_dict(
+ {
+ "test_1": {"a": 1, "b": 2 if untied else 1},
+ "test_2": {"a": 1, "b": 2 if untied else 1},
+ }
+ )
+ expected.columns.name = "Method"
+ expected.index.name = "Alternatives"
+
+ sns.boxplot(data=expected.T, orient=orient)
+
+ ranks_label = ranks_cmp.RANKS_LABELS[untied]
+ if orient in (None, "v"):
+ exp_ax.set_ylabel(ranks_label)
+ else:
+ exp_ax.set_xlabel(ranks_label)
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("untied", [True, False])
+@check_figures_equal()
+def test_RanksComparatorPlotter_bar(fig_test, fig_ref, untied):
+ test_ax = fig_test.subplots()
+
+ rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rcmp = ranks_cmp.mkrank_cmp(rank0, rank1)
+
+ rcmp.plot.bar(ax=test_ax, untied=untied)
+
+ # EXPECTED
+ exp_ax = fig_ref.subplots()
+
+ expected = pd.DataFrame.from_dict(
+ {
+ "test_1": {"a": 1, "b": 2 if untied else 1},
+ "test_2": {"a": 1, "b": 2 if untied else 1},
+ }
+ )
+ expected.columns.name = "Method"
+ expected.index.name = "Alternatives"
+
+ expected.plot.bar(ax=exp_ax)
+
+ exp_ax.set_ylabel(ranks_cmp.RANKS_LABELS[untied])
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("untied", [True, False])
+@check_figures_equal()
+def test_RanksComparatorPlotter_barh(fig_test, fig_ref, untied):
+ test_ax = fig_test.subplots()
+
+ rank0 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rank1 = madm.RankResult("test", ["a", "b"], [1, 1], {})
+ rcmp = ranks_cmp.mkrank_cmp(rank0, rank1)
+
+ rcmp.plot.barh(ax=test_ax, untied=untied)
+
+ # EXPECTED
+ exp_ax = fig_ref.subplots()
+
+ expected = pd.DataFrame.from_dict(
+ {
+ "test_1": {"a": 1, "b": 2 if untied else 1},
+ "test_2": {"a": 1, "b": 2 if untied else 1},
+ }
+ )
+ expected.columns.name = "Method"
+ expected.index.name = "Alternatives"
+
+ expected.plot.barh(ax=exp_ax)
+
+ exp_ax.set_xlabel(ranks_cmp.RANKS_LABELS[untied])
diff --git a/tests/conftest.py b/tests/conftest.py
index 2dfca6c..438f0f6 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -45,12 +45,14 @@
@pytest.fixture(scope="session")
def data_values():
def make(
+ *,
seed=None,
min_alternatives=3,
max_alternatives=10,
min_criteria=3,
max_criteria=10,
min_objectives_proportion=0.5,
+ nan_proportion=0,
):
# start the random generator
@@ -71,9 +73,15 @@ def make(
# create the data matrix with rows = alt and columns = crit
mtx = random.random((alternatives_number, criteria_number))
- # determine the number of minimize objectives bases on the proportion
- # of the total number of criteria, and the maximize is the complement
+ # if we have a nan ratio >0 of nan we have to add them randomly
+ # in the matrix
+ if nan_proportion:
+ nan_number = round(mtx.size * float(nan_proportion))
+ nan_positions = random.choice(mtx.size, nan_number, replace=False)
+ mtx.ravel()[nan_positions] = np.nan
+ # determine the number of minimize objectives based on the proportion
+ # of the total number of criteria, and the maximize is the complement
min_objectives_number = round(
criteria_number * min_objectives_proportion
)
@@ -118,9 +126,9 @@ def make(
@pytest.fixture(scope="session")
def decision_matrix(data_values):
@functools.wraps(data_values)
- def make(*args, **kwargs):
+ def make(**kwargs):
mtx, objectives, weights, alternatives, criteria = data_values(
- *args, **kwargs
+ **kwargs
)
dm = core.mkdm(
diff --git a/tests/core/test_data.py b/tests/core/test_data.py
index 733f630..fe508b3 100644
--- a/tests/core/test_data.py
+++ b/tests/core/test_data.py
@@ -24,7 +24,7 @@
import pandas as pd
-from pyquery import PyQuery
+import pyquery
import pytest
@@ -37,37 +37,11 @@
def construct_iobjectives(arr):
- return [data.Objective.construct_from_alias(obj).value for obj in arr]
+ return [data.Objective.from_alias(obj).value for obj in arr]
def construct_objectives(arr):
- return [data.Objective.construct_from_alias(obj) for obj in arr]
-
-
-# =============================================================================
-# ENUM
-# =============================================================================
-
-
-def test_objective_construct():
- for alias in data.Objective._MAX_ALIASES.value:
- objective = data.Objective.construct_from_alias(alias)
- assert objective is data.Objective.MAX
- for alias in data.Objective._MIN_ALIASES.value:
- objective = data.Objective.construct_from_alias(alias)
- assert objective is data.Objective.MIN
- with pytest.raises(ValueError):
- data.Objective.construct_from_alias("no anda")
-
-
-def test_objective_str():
- assert str(data.Objective.MAX) == data.Objective.MAX.name
- assert str(data.Objective.MIN) == data.Objective.MIN.name
-
-
-def test_objective_to_string():
- assert data.Objective.MAX.to_string() == data.Objective._MAX_STR.value
- assert data.Objective.MIN.to_string() == data.Objective._MIN_STR.value
+ return [data.Objective.from_alias(obj) for obj in arr]
# =============================================================================
@@ -519,12 +493,80 @@ def test_DecisionMatrix_self_ne(data_values):
assert not dm.equals(other)
+# =============================================================================
+# SLICES
+# =============================================================================
+
+
+def test_DecisionMatrix__getitem__():
+ dm = data.mkdm(
+ matrix=[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+ objectives=[min, max, min],
+ weights=[0.1, 0.2, 0.3],
+ alternatives="A B C".split(),
+ criteria="X Y Z".split(),
+ )
+ assert dm["X"].equals(dm[["X"]])
+
+ expected = data.mkdm(
+ matrix=[[1, 3], [4, 6], [7, 9]],
+ objectives=[min, min],
+ weights=[0.1, 0.3],
+ alternatives="A B C".split(),
+ criteria="X Z".split(),
+ )
+ assert dm[["X", "Z"]].equals(expected)
+
+
+def test_DecisionMatrix_loc():
+ dm = data.mkdm(
+ matrix=[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+ objectives=[min, max, min],
+ weights=[0.1, 0.2, 0.3],
+ alternatives="A B C".split(),
+ criteria="X Y Z".split(),
+ )
+ assert dm.loc.name == "loc"
+ assert dm.loc["A"].equals(dm.loc[["A"]])
+
+ expected = data.mkdm(
+ matrix=[[1, 2, 3], [7, 8, 9]],
+ objectives=[min, max, min],
+ weights=[0.1, 0.2, 0.3],
+ alternatives="A C".split(),
+ criteria="X Y Z".split(),
+ )
+ assert dm.loc[["A", "C"]].equals(expected)
+
+
+def test_DecisionMatrix_iloc():
+ dm = data.mkdm(
+ matrix=[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+ objectives=[min, max, min],
+ weights=[0.1, 0.2, 0.3],
+ alternatives="A B C".split(),
+ criteria="X Y Z".split(),
+ )
+ assert dm.iloc.name == "iloc"
+ assert dm.iloc[2].equals(dm.iloc[[2]])
+
+ expected = data.mkdm(
+ matrix=[[1, 2, 3], [7, 8, 9]],
+ objectives=[min, max, min],
+ weights=[0.1, 0.2, 0.3],
+ alternatives="A C".split(),
+ criteria="X Y Z".split(),
+ )
+
+ assert dm.iloc[[0, 2]].equals(expected)
+
+
# =============================================================================
# REPR
# =============================================================================
-def test_mksm_simple_repr():
+def test_mkdm_simple_repr():
dm = data.mkdm(
matrix=[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
@@ -533,10 +575,10 @@ def test_mksm_simple_repr():
)
expected = (
- " C0[\u25bc 0.1] C1[\u25b2 0.2] C2[\u25bc 0.3]\n"
- "A0 1 2 3\n"
- "A1 4 5 6\n"
- "A2 7 8 9\n"
+ " C0[â–¼ 0.1] C1[â–² 0.2] C2[â–¼ 0.3]\n"
+ "A0 1 2 3\n"
+ "A1 4 5 6\n"
+ "A2 7 8 9\n"
"[3 Alternatives x 3 Criteria]"
)
@@ -544,18 +586,18 @@ def test_mksm_simple_repr():
assert result == expected
-def test_simple_html():
+def test_mkdm_simple_html():
dm = data.mkdm(
matrix=[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
objectives=[min, max, min],
weights=[0.1, 0.2, 0.3],
)
- expected = PyQuery(
+ expected = pyquery.PyQuery(
"""