Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes for scikit-learn 0.24 #77

Open
wants to merge 35 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
539380c
Fix imports for scikit-learn 0.24
gfournier Feb 1, 2021
1c37732
fix import sklearn 0.24
LionelMassoulard Mar 1, 2021
a417df0
explicit catch of 'ImportError'
LionelMassoulard Mar 2, 2021
ce31a90
make random_state None when shuffle is False
LionelMassoulard Mar 2, 2021
15382f9
fix test to unpack or not
LionelMassoulard Mar 2, 2021
b3eef93
remove old 'presort' attribute
LionelMassoulard Mar 2, 2021
c023741
fix test : ValueError or nan everywhere
LionelMassoulard Mar 2, 2021
f9d3add
fix longtest flag
LionelMassoulard Mar 3, 2021
801394f
FeatureSelector : fix random_state
LionelMassoulard Mar 3, 2021
63bf1fe
Update scikit-learn/pandas version for tests
gfournier Jan 27, 2022
047b101
fix tipo when raise
LionelMassoulard Jan 27, 2022
09e81ce
remove useless imports
LionelMassoulard Jan 27, 2022
a2c49d8
allow test if graphviz executable not installed
LionelMassoulard Jan 27, 2022
7dc3d18
fix test sparse vs non sparse
LionelMassoulard Jan 27, 2022
908e5a1
fix tests
LionelMassoulard Jan 27, 2022
c8ca386
fix compat old / new gensim
LionelMassoulard Jan 27, 2022
34d6caf
remove requirement scipy < 1.6.0
LionelMassoulard Jan 27, 2022
0d9ea57
compat old new gensim
LionelMassoulard Jan 27, 2022
e92c4d7
fix block manager to allow safe_indexing to work
LionelMassoulard Jan 29, 2022
c5f5810
use "fit_predict" instead of fit
LionelMassoulard Jan 29, 2022
1551d4b
remove 'min_impurity_split' argument
LionelMassoulard Jan 29, 2022
9ef0b6c
* remove 'precompute_distances'
LionelMassoulard Jan 29, 2022
8e34548
fill with 0 by default
LionelMassoulard Jan 29, 2022
4827907
don't sparsify if already sparse, don't copy if all sparse
LionelMassoulard Jan 29, 2022
579958e
* fix test in case of composition
LionelMassoulard Jan 29, 2022
be1df5c
allow argument to be passed to linear klass
LionelMassoulard Jan 31, 2022
fa052b2
fix solver for test
LionelMassoulard Jan 31, 2022
c92637a
add other_linear_params
LionelMassoulard Jan 31, 2022
8299e6e
simplify test
LionelMassoulard Jan 31, 2022
18443ee
change solver
LionelMassoulard Jan 31, 2022
0edae89
400 observation at least
LionelMassoulard Jan 31, 2022
1b5bd33
allow other parameter to be passed to truncated svd
LionelMassoulard Jan 31, 2022
e21a6a9
fix test : force 'n_iter' to 1 so that it doesn't create an error
LionelMassoulard Jan 31, 2022
69452e9
change default sover
LionelMassoulard Jan 31, 2022
9f96a92
fix : pass correct attribute
LionelMassoulard Jan 31, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/pythonapp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
scikitlearn-version: [0.21.3, 0.22.2, 0.23.1]
scikitlearn-version: [0.21.3, 0.22.2, 0.23.2, 0.24.1]
pandas-version: [0.25.3, 1.0.5]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe change pandas latest : 1.2.2 ?


steps:
Expand Down
5 changes: 3 additions & 2 deletions aikit/cross_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,17 +92,18 @@ def create_cv(cv=3, y=None, classifier=False, shuffle=False, random_state=None):
if cv is None:
cv = 3

random_state_ = random_state if shuffle else None
if isinstance(cv, sklearn.model_selection._split.numbers.Integral):
if (
classifier
and (y is not None)
and (sklearn.model_selection._split.type_of_target(y) in ("binary", "multiclass"))
):

return sklearn.model_selection.StratifiedKFold(cv, shuffle=shuffle, random_state=random_state)
return sklearn.model_selection.StratifiedKFold(cv, shuffle=shuffle, random_state=random_state_)

else:
return sklearn.model_selection.KFold(cv, shuffle=shuffle, random_state=random_state)
return sklearn.model_selection.KFold(cv, shuffle=shuffle, random_state=random_state_)

if not hasattr(cv, "split") or isinstance(cv, str):
if not isinstance(cv, sklearn.model_selection._split.Iterable) or isinstance(cv, str):
Expand Down
17 changes: 9 additions & 8 deletions aikit/models/rotation_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,16 @@
from sklearn.exceptions import NotFittedError
from sklearn.base import ClassifierMixin, BaseEstimator, TransformerMixin, RegressorMixin

from sklearn.ensemble.forest import ForestClassifier, ForestRegressor
try:
from sklearn.ensemble.forest import ForestClassifier, ForestRegressor
except ImportError:
from sklearn.ensemble._forest import ForestClassifier, ForestRegressor

from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.tree.tree import DTYPE
try:
from sklearn.tree.tree import DTYPE
except ImportError:
from sklearn.tree._tree import DTYPE


from sklearn.preprocessing import StandardScaler
Expand Down Expand Up @@ -182,7 +189,6 @@ def __init__(
min_impurity_decrease=0.0,
min_impurity_split=None,
class_weight=None,
presort=False,
pca_bootstrap=False,
pca_max_nb_groups=0.25,
pca_max_group_size=0.05,
Expand All @@ -200,7 +206,6 @@ def __init__(
self.min_impurity_decrease = min_impurity_decrease
self.min_impurity_split = min_impurity_split
self.class_weight = class_weight
self.presort = presort

self.pca_bootstrap = pca_bootstrap
self.pca_max_nb_groups = pca_max_nb_groups
Expand Down Expand Up @@ -237,7 +242,6 @@ def fit(self, X, y, sample_weight=None, check_input=True, X_idx_sorted=None):
random_state=self.random_state,
min_impurity_decrease=self.min_impurity_decrease,
min_impurity_split=self.min_impurity_split,
presort=self.presort,
)

# 3) Apply group PCA
Expand Down Expand Up @@ -329,7 +333,6 @@ def __init__(
max_leaf_nodes=None,
min_impurity_decrease=0.0,
min_impurity_split=None,
presort=False,
pca_bootstrap=False,
pca_max_nb_groups=0.25,
pca_max_group_size=0.05,
Expand All @@ -346,7 +349,6 @@ def __init__(
self.max_leaf_nodes = max_leaf_nodes
self.min_impurity_decrease = min_impurity_decrease
self.min_impurity_split = min_impurity_split
self.presort = presort

self.pca_bootstrap = pca_bootstrap
self.pca_max_nb_groups = pca_max_nb_groups
Expand Down Expand Up @@ -379,7 +381,6 @@ def fit(self, X, y, sample_weight=None, check_input=True, X_idx_sorted=None):
random_state=self.random_state,
min_impurity_decrease=self.min_impurity_decrease,
min_impurity_split=self.min_impurity_split,
presort=self.presort,
)

# 3) Apply group PCA
Expand Down
15 changes: 12 additions & 3 deletions aikit/scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,32 @@
logger = logging.getLogger(__name__)

import sklearn.metrics
from sklearn.metrics.regression import _check_reg_targets, r2_score

try:
from sklearn.metrics.regression import _check_reg_targets, r2_score
except ImportError:
from sklearn.metrics import r2_score
from sklearn.metrics._regression import _check_reg_targets

from sklearn.metrics import silhouette_score, davies_bouldin_score
try:
from sklearn.metrics import calinski_harabasz_score
except ImportError:
from sklearn.metrics import calinski_harabaz_score
calinski_harabasz_score = calinski_harabaz_score


from sklearn.metrics.scorer import SCORERS, _BaseScorer, type_of_target
try:
from sklearn.metrics.scorer import SCORERS, _BaseScorer, type_of_target
except ImportError:
from sklearn.metrics._scorer import SCORERS, _BaseScorer, type_of_target


import numpy as np
import pandas as pd

from functools import partial


class log_loss_scorer_patched(object):
""" Log Loss scorer, correcting a small issue in sklearn (labels not used) """

Expand Down
7 changes: 6 additions & 1 deletion aikit/tools/helper_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@
from io import StringIO
import hashlib

from sklearn.utils import check_random_state, safe_indexing
from sklearn.utils import check_random_state
try:
from sklearn.utils import safe_indexing
except ImportError:
from sklearn.utils import _safe_indexing
safe_indexing = _safe_indexing

from aikit.tools.json_helper import SpecialJSONEncoder

Expand Down
31 changes: 20 additions & 11 deletions aikit/transformers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,13 @@

from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin
from sklearn.exceptions import NotFittedError
from sklearn.metrics.scorer import _BaseScorer, _PredictScorer

try:
from sklearn.metrics.scorer import _BaseScorer, _PredictScorer
import sklearn.metrics.scorer as sk_scorer
except ImportError:
from sklearn.metrics._scorer import _BaseScorer, _PredictScorer
import sklearn.metrics._scorer as sk_scorer

from sklearn.utils import check_random_state
from sklearn.utils.multiclass import type_of_target
Expand All @@ -33,7 +38,6 @@

from sklearn.utils.metaestimators import if_delegate_has_method
from sklearn.cluster import KMeans
import sklearn.metrics.scorer

# from aikit.helper_functions import is_user
from aikit.enums import DataTypes
Expand Down Expand Up @@ -314,6 +318,7 @@ def __init__(
self.n_components = n_components
self.selector_type = selector_type
self.component_selection = component_selection
self.random_state=random_state
self.model_params = model_params
self.columns_to_use = columns_to_use
self.regex_match = regex_match
Expand All @@ -339,6 +344,7 @@ def _get_model(self, X, y=None):
component_selection=self.component_selection,
selector_type=self.selector_type,
model_params=self.model_params,
random_state=self.random_state
)


Expand All @@ -350,6 +356,7 @@ def __init__(
n_components=0.5,
selector_type="forest",
component_selection="number",
random_state=None,
model_params=None,
columns_to_use="all",
regex_match=False,
Expand All @@ -359,6 +366,7 @@ def __init__(
self.n_components = n_components
self.selector_type = selector_type
self.component_selection = component_selection
self.random_state=random_state
self.model_params = model_params
self.columns_to_use = columns_to_use
self.regex_match = regex_match
Expand All @@ -384,6 +392,7 @@ def _get_model(self, X, y=None):
component_selection=self.component_selection,
selector_type=self.selector_type,
model_params=self.model_params,
random_state=self.random_state
)


Expand Down Expand Up @@ -1008,15 +1017,15 @@ def _make_scorer(self, score_name):
if isinstance(score_name, str):

score_fun_dico = {
"explained_variance": sklearn.metrics.scorer.explained_variance_score,
"r2": sklearn.metrics.scorer.r2_score,
"neg_median_absolute_error": sklearn.metrics.scorer.median_absolute_error,
"neg_mean_absolute_error": sklearn.metrics.scorer.mean_absolute_error,
"neg_mean_squared_error": sklearn.metrics.scorer.mean_squared_error,
"neg_mean_squared_log_error": sklearn.metrics.scorer.mean_squared_log_error,
"median_absolute_error": sklearn.metrics.scorer.median_absolute_error,
"mean_absolute_error": sklearn.metrics.scorer.mean_absolute_error,
"mean_squared_error": sklearn.metrics.scorer.mean_squared_error,
"explained_variance": sk_scorer.explained_variance_score,
"r2": sk_scorer.r2_score,
"neg_median_absolute_error": sk_scorer.median_absolute_error,
"neg_mean_absolute_error": sk_scorer.mean_absolute_error,
"neg_mean_squared_error": sk_scorer.mean_squared_error,
"neg_mean_squared_log_error": sk_scorer.mean_squared_log_error,
"median_absolute_error": sk_scorer.median_absolute_error,
"mean_absolute_error": sk_scorer.mean_absolute_error,
"mean_squared_error": sk_scorer.mean_squared_error,
}

greater_is_better = {
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
scikit-learn>=0.20
pandas>=0.23
numpy
scipy
scipy<1.6.0 # https://github.com/scikit-optimize/scikit-optimize/issues/981
statsmodels
lockfile
decorator
Expand Down
2 changes: 1 addition & 1 deletion tests/models/test_model_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import scipy.sparse as sps

from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
from sklearn.datasets.samples_generator import make_blobs
from sklearn.datasets import make_blobs

from aikit.models import DBSCANWrapper, KMeansWrapper, AgglomerativeClusteringWrapper

Expand Down
4 changes: 1 addition & 3 deletions tests/models/test_rotation_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,7 @@ def test_GroupPCADecisionTreeClassifier():
)


pytest.mark.longtest


@pytest.mark.longtest
@pytest.mark.parametrize(
"random_state, max_depth, criterion, pca_bootstrap",
list(itertools.product(range(100), (None, 2, 5), ("gini", "entropy"), (True, False))),
Expand Down
18 changes: 15 additions & 3 deletions tests/test_cross_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,13 @@ def test_fit_and_predict_transfrom():
for train, test in cv.split(X, y):

pt = DebugPassThrough()
predictions, _ = sklearn.model_selection._validation._fit_and_predict(
temp = sklearn.model_selection._validation._fit_and_predict(
pt, X, y, train, test, verbose=1, fit_params=None, method="transform"
)
if isinstance(temp, tuple):
predictions = temp[0]
else:
predictions = temp

assert predictions.shape[0] == test.shape[0]
assert predictions.shape[1] == X.shape[1]
Expand All @@ -138,9 +142,13 @@ def test_fit_and_predict_predict():
for train, test in cv.split(X, y):

logit = LogisticRegression()
predictions, _ = sklearn.model_selection._validation._fit_and_predict(
temp = sklearn.model_selection._validation._fit_and_predict(
logit, X, y, train, test, verbose=1, fit_params=None, method="predict"
)
if isinstance(temp, tuple):
predictions=temp[0]
else:
predictions=temp

assert predictions.shape[0] == test.shape[0]
assert len(predictions.shape) == 1
Expand All @@ -157,9 +165,13 @@ def test_fit_and_predict_predict_proba():
for train, test in cv.split(X, y):

logit = LogisticRegression()
predictions, _ = sklearn.model_selection._validation._fit_and_predict(
temp = sklearn.model_selection._validation._fit_and_predict(
logit, X, y, train, test, verbose=1, fit_params=None, method="predict_proba"
)
if isinstance(temp, tuple):
predictions=temp[0]
else:
predictions=temp

assert predictions.shape[0] == test.shape[0]
assert predictions.shape[1] == 2
Expand Down
18 changes: 13 additions & 5 deletions tests/test_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,13 @@ def test_avg_roc_auc_scorer_aikit():

assert np.abs(cv_res1 - cv_res2).max() <= 10 ** (-5)

with pytest.raises(ValueError):
cross_val_score(logit, X, y, cv=cv, scoring="roc_auc") # sklearn doesn't handle that

try:
res = cross_val_score(logit, X, y, cv=cv, scoring="roc_auc") # sklearn doesn't handle that
except ValueError:
res = None
assert res is None or pd.isnull(res).all()
# sklearn <0.23 raise ValueError, sklearn >= 0.24 generates only 'nan'

cv_res_aikit = cross_val_score(logit, X, 1 * (y == "AA"), cv=cv, scoring="avg_roc_auc")
cv_res_sklearn = cross_val_score(logit, X, 1 * (y == "AA"), cv=cv, scoring="roc_auc")

Expand All @@ -107,8 +111,12 @@ def test_average_precision_scorer_aikit():

assert np.abs(cv_res1 - cv_res2).max() <= 10 ** (-5)

with pytest.raises(ValueError):
cross_val_score(logit, X, y, cv=cv, scoring="average_precision") # sklearn doesn't handle that
try:
res = cross_val_score(logit, X, y, cv=cv, scoring="average_precision") # sklearn doesn't handle that
except ValueError:
res = None
assert res is None or pd.isnull(res).all()
# sklearn <0.23 raise ValueError, sklearn >= 0.24 generates only 'nan'


def test_log_loss_patched_multioutput():
Expand Down