diff --git a/.travis.yml b/.travis.yml index 984b5dbae8..59a3ec42a4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -55,14 +55,11 @@ before_install: - bash miniconda.sh -b -p $HOME/miniconda - export PATH="$HOME/miniconda/bin:$PATH" - if [[ `which conda` ]]; then echo 'Conda installation successful'; else exit 1; fi - - conda update --yes conda - conda create -n testenv --yes python=$PYTHON_VERSION pip wheel nose - source activate testenv - conda install --yes gcc swig - echo "Using GCC at "`which gcc` - export CC=`which gcc` - # Fixes version `GLIBCXX_3.4.21' not found (on Ubuntu 16.04) - - conda install --yes libgcc install: # Install general requirements the way setup.py suggests diff --git a/autosklearn/__init__.py b/autosklearn/__init__.py index 88c77b79c9..ad2c39be8a 100644 --- a/autosklearn/__init__.py +++ b/autosklearn/__init__.py @@ -6,10 +6,11 @@ __MANDATORY_PACKAGES__ = ''' numpy>=1.9 scikit-learn>=0.18.1,<0.19 -smac==0.5.0 lockfile>=0.10 +smac>=0.6.0,<0.7 +pyrfr>=0.6.1,<0.7 ConfigSpace>=0.3.3,<0.4 -pyrfr>=0.4.0,<0.5 +pyrfr>=0.6.0,<0.7 ''' dependencies.verify_packages(__MANDATORY_PACKAGES__) diff --git a/autosklearn/evaluation/__init__.py b/autosklearn/evaluation/__init__.py index b1018ef92c..a77e87e62f 100644 --- a/autosklearn/evaluation/__init__.py +++ b/autosklearn/evaluation/__init__.py @@ -72,8 +72,14 @@ def __init__(self, backend, autosklearn_seed, resampling_strategy, metric, eval_function = functools.partial(fit_predict_try_except_decorator, ta=eval_function) - super().__init__(ta=eval_function, stats=stats, runhistory=runhistory, - run_obj=run_obj, par_factor=par_factor) + super().__init__( + ta=eval_function, + stats=stats, + runhistory=runhistory, + run_obj=run_obj, + par_factor=par_factor, + cost_for_crash=WORST_POSSIBLE_RESULT, + ) self.backend = backend self.autosklearn_seed = autosklearn_seed diff --git a/autosklearn/smbo.py b/autosklearn/smbo.py index 25ce79320b..8ff8382148 100644 --- a/autosklearn/smbo.py +++ b/autosklearn/smbo.py @@ -450,42 +450,6 @@ def run_smbo(self): (1, -1)) self.logger.info(list(meta_features_dict.keys())) - # meta_runs = meta_base.get_all_runs(METRIC_TO_STRING[self.metric]) - # meta_runs_index = 0 - # try: - # meta_durations = meta_base.get_all_runs('runtime') - # read_runtime_data = True - # except KeyError: - # read_runtime_data = False - # self.logger.critical('Cannot read runtime data.') - # if self.acquisition_function == 'EIPS': - # self.logger.critical('Reverting to acquisition function EI!') - # self.acquisition_function = 'EI' - - # for meta_dataset in meta_runs.index: - # meta_dataset_start_index = meta_runs_index - # for meta_configuration in meta_runs.columns: - # if np.isfinite(meta_runs.loc[meta_dataset, meta_configuration]): - # try: - # config = meta_base.get_configuration_from_algorithm_index( - # meta_configuration) - # cost = meta_runs.loc[meta_dataset, meta_configuration] - # if read_runtime_data: - # runtime = meta_durations.loc[meta_dataset, - # meta_configuration] - # else: - # runtime = 1 - # # TODO read out other status types! - # meta_runhistory.add(config, cost, runtime, - # StatusType.SUCCESS, - # instance_id=meta_dataset) - # meta_runs_index += 1 - # except: - # # TODO maybe add warning - # pass - # - # meta_runs_dataset_indices[meta_dataset] = ( - # meta_dataset_start_index, meta_runs_index) else: meta_features = None self.logger.warning('Could not find meta-data directory %s' % @@ -514,13 +478,13 @@ def run_smbo(self): startup_time = self.watcher.wall_elapsed(self.dataset_name) total_walltime_limit = self.total_walltime_limit - startup_time - 5 scenario_dict = {'cs': self.config_space, - 'cutoff-time': self.func_eval_time_limit, - 'memory-limit': self.memory_limit, - 'wallclock-limit': total_walltime_limit, + 'cutoff_time': self.func_eval_time_limit, + 'memory_limit': self.memory_limit, + 'wallclock_limit': total_walltime_limit, 'output-dir': self.backend.get_smac_output_directory(self.seed), 'shared-model': self.shared_mode, - 'run-obj': 'quality', + 'run_obj': 'quality', 'deterministic': 'true', 'instances': instances} @@ -631,31 +595,6 @@ def run_smbo(self): else: raise ValueError(self.configuration_mode) - # Build a runtime model - # runtime_rf = RandomForestWithInstances(types, - # instance_features=meta_features_list, - # seed=1, num_trees=10) - # runtime_rh2EPM = RunHistory2EPM4EIPS(num_params=num_params, - # scenario=self.scenario, - # success_states=None, - # impute_censored_data=False, - # impute_state=None) - # X_runtime, y_runtime = runtime_rh2EPM.transform(meta_runhistory) - # runtime_rf.train(X_runtime, y_runtime[:, 1].flatten()) - # X_meta, Y_meta = rh2EPM.transform(meta_runhistory) - # # Transform Y_meta on a per-dataset base - # for meta_dataset in meta_runs_dataset_indices: - # start_index, end_index = meta_runs_dataset_indices[meta_dataset] - # end_index += 1 # Python indexing - # Y_meta[start_index:end_index, 0]\ - # [Y_meta[start_index:end_index, 0] >2.0] = 2.0 - # dataset_minimum = np.min(Y_meta[start_index:end_index, 0]) - # Y_meta[start_index:end_index, 0] = 1 - ( - # (1. - Y_meta[start_index:end_index, 0]) / - # (1. - dataset_minimum)) - # Y_meta[start_index:end_index, 0]\ - # [Y_meta[start_index:end_index, 0] > 2] = 2 - smac.solver.stats.start_timing() # == first, evaluate all metelearning and default configurations smac.solver.incumbent = smac.solver.initial_design.run() @@ -670,9 +609,10 @@ def run_smbo(self): time_bound=self.total_walltime_limit) if smac.solver.scenario.shared_model: - pSMAC.write(run_history=smac.solver.runhistory, - output_directory=smac.solver.scenario.output_dir, - num_run=self.seed) + pSMAC.write( + run_history=smac.solver.runhistory, + output_directory=smac.solver.scenario.output_dir, + ) if smac.solver.stats.is_budget_exhausted(): break @@ -687,14 +627,7 @@ def run_smbo(self): logger=self.logger) choose_next_start_time = time.time() - try: - challengers = self.choose_next(smac) - except Exception as e: - self.logger.error(e) - self.logger.error("Error in getting next configurations " - "with SMAC. Using random configuration!") - next_config = self.config_space.sample_configuration() - challengers = [next_config] + challengers = self.choose_next(smac) time_for_choose_next = time.time() - choose_next_start_time self.logger.info('Used %g seconds to find next ' 'configurations' % (time_for_choose_next)) @@ -708,9 +641,10 @@ def run_smbo(self): time_bound=time_for_choose_next) if smac.solver.scenario.shared_model: - pSMAC.write(run_history=smac.solver.runhistory, - output_directory=smac.solver.scenario.output_dir, - num_run=self.seed) + pSMAC.write( + run_history=smac.solver.runhistory, + output_directory=smac.solver.scenario.output_dir, + ) if smac.solver.stats.is_budget_exhausted(): break @@ -737,18 +671,8 @@ def choose_next(self, smac): (1. - dataset_minimum)) Y_cfg[:, 0][Y_cfg[:, 0] > 2] = 2 - # if len(X_meta) > 0 and len(X_cfg) > 0: - # pass - # X_cfg = np.concatenate((X_meta, X_cfg)) - # Y_cfg = np.concatenate((Y_meta, Y_cfg)) - # elif len(X_meta) > 0: - # X_cfg = X_meta.copy() - # Y_cfg = Y_meta.copy() - # elif len(X_cfg) > 0: X_cfg = X_cfg.copy() Y_cfg = Y_cfg.copy() - # else: - # raise ValueError('No training data for SMAC random forest!') self.logger.info('Using %d training points for SMAC.' % X_cfg.shape[0]) diff --git a/ci_scripts/circle_install.sh b/ci_scripts/circle_install.sh index fc4d372f81..d7c1aa0162 100644 --- a/ci_scripts/circle_install.sh +++ b/ci_scripts/circle_install.sh @@ -6,6 +6,7 @@ # here. source activate testenv +export CC=`which gcc` # install documentation building dependencies pip install --upgrade numpy pip install --upgrade matplotlib setuptools nose coverage sphinx==1.5.5 sphinx_bootstrap_theme numpydoc @@ -16,4 +17,4 @@ python setup.py clean python setup.py develop # pipefail is necessary to propagate exit codes -set -o pipefail && cd doc && make html 2>&1 | tee ~/log.txt \ No newline at end of file +set -o pipefail && cd doc && make html 2>&1 | tee ~/log.txt diff --git a/circle.yml b/circle.yml index 884024a258..f0a372e043 100644 --- a/circle.yml +++ b/circle.yml @@ -26,10 +26,11 @@ dependencies: - sudo -E apt-get -yq remove texlive-binaries --purge - sudo -E apt-get -yq update - sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes install dvipng texlive-latex-base texlive-latex-extra + # Other stuff... + - sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes install build-essential # Conda installation - wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh - bash ~/miniconda.sh -b -p $HOME/miniconda - - conda update --yes conda - conda create -n testenv --yes python=3.6 pip wheel nose gcc swig # The --user is needed to let sphinx see the source and the binaries diff --git a/doc/installation.rst b/doc/installation.rst index 4ffe17ab28..c314617d48 100644 --- a/doc/installation.rst +++ b/doc/installation.rst @@ -58,7 +58,7 @@ the Section `Installing auto-sklearn`_. A common installation problem under recent Linux distribution is the incompability of the compiler version used to compile the Python binary shipped by AnaConda and the compiler installed by the distribution. This can -be solved by istalling the *gcc* compiler shipped with AnaConda (as well as +be solved by installing the *gcc* compiler shipped with AnaConda (as well as *swig*): .. code:: bash diff --git a/requirements.txt b/requirements.txt index 378b70108a..528a12b365 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,5 +18,5 @@ pandas ConfigSpace>=0.3.3,<0.4 pynisher>=0.4 -pyrfr>=0.4.0,<0.5 -smac==0.5.0 +pyrfr>=0.6.1,<0.7 +smac>=0.6.0,<0.7 diff --git a/scripts/run_auto-sklearn_for_metadata_generation.py b/scripts/run_auto-sklearn_for_metadata_generation.py index 712a0f80ef..fe9a616c64 100644 --- a/scripts/run_auto-sklearn_for_metadata_generation.py +++ b/scripts/run_auto-sklearn_for_metadata_generation.py @@ -94,7 +94,12 @@ config = entry.incumbent logger = logging.getLogger('Testing:)') - stats = Stats(Scenario({'cutoff_time': per_run_time_limit * 2})) + stats = Stats( + Scenario({ + 'cutoff_time': per_run_time_limit * 2, + 'run_obj': 'quality', + }) + ) stats.start_timing() # To avoid the output "first run crashed"... stats.ta_runs += 1 diff --git a/setup.py b/setup.py index 9efae0dd41..1ccea1a2ed 100644 --- a/setup.py +++ b/setup.py @@ -26,9 +26,9 @@ "liac-arff", "pandas", "ConfigSpace>=0.3.3,<0.4", - "pynisher>=0.4", - "pyrfr>=0.4,<0.5", - "smac==0.5.0" + "pynisher>=0.4,<0.5", + "pyrfr>=0.6.1,<0.7", + "smac>=0.6.0,<0.7" ] with open("autosklearn/__version__.py") as fh: diff --git a/test/test_automl/test_smbo.py b/test/test_automl/test_smbo.py index c8d9a27faf..7094e9c51a 100644 --- a/test/test_automl/test_smbo.py +++ b/test/test_automl/test_smbo.py @@ -28,18 +28,24 @@ def test_choose_next(self): total_walltime_limit=total_walltime_limit, memory_limit=memory_limit, watcher=None, - metric=accuracy) + metric=accuracy + ) auto.config_space = configspace - scenario = Scenario({'cs': configspace, - 'cutoff-time': func_eval_time_limit, - 'wallclock-limit': total_walltime_limit, - 'memory-limit': memory_limit, - 'run-obj': 'quality'}) + scenario = Scenario({ + 'cs': configspace, + 'cutoff_time': func_eval_time_limit, + 'wallclock_limit': total_walltime_limit, + 'memory_limit': memory_limit, + 'run_obj': 'quality', + }) smac = SMAC(scenario) - self.assertRaisesRegex(ValueError, 'Cannot use SMBO algorithm on ' - 'empty runhistory', - auto.choose_next, smac) + self.assertRaisesRegex( + ValueError, + 'Cannot use SMBO algorithm on empty runhistory', + auto.choose_next, + smac + ) config = Configuration(configspace, values={'a': 0.1, 'b': 0.2}) # TODO make sure the incumbent is always set? diff --git a/test/test_metalearning/pyMetaLearn/test_meta_features.py b/test/test_metalearning/pyMetaLearn/test_meta_features.py index d8fbc0e397..0634298ec1 100644 --- a/test/test_metalearning/pyMetaLearn/test_meta_features.py +++ b/test/test_metalearning/pyMetaLearn/test_meta_features.py @@ -1,13 +1,15 @@ +import os +import tempfile from six import StringIO from unittest import TestCase import unittest -import os import arff import numpy as np import scipy.sparse from sklearn.preprocessing.imputation import Imputer from sklearn.datasets import make_multilabel_classification +from sklearn.externals.joblib import Memory from autosklearn.pipeline.implementations.OneHotEncoder import OneHotEncoder from sklearn.preprocessing import StandardScaler @@ -81,12 +83,16 @@ def tearDown(self): os.chdir(self.cwd) def get_multilabel(self): - return make_multilabel_classification(n_samples=100, - n_features=10, - n_classes=5, - n_labels=5, - return_indicator=True, - random_state=1) + cache = Memory(cachedir=tempfile.gettempdir()) + cached_func = cache.cache(make_multilabel_classification) + return cached_func( + n_samples=100, + n_features=10, + n_classes=5, + n_labels=5, + return_indicator=True, + random_state=1 + ) def test_number_of_instance(self): mf = self.mf["NumberOfInstances"](self.X, self.y, self.categorical) diff --git a/test/test_pipeline/components/classification/test_liblinear.py b/test/test_pipeline/components/classification/test_liblinear.py index 6eefda8c11..ca61b3f44e 100644 --- a/test/test_pipeline/components/classification/test_liblinear.py +++ b/test/test_pipeline/components/classification/test_liblinear.py @@ -16,6 +16,7 @@ class LibLinearComponentTest(BaseClassificationComponentTest): res["default_iris_proba"] = 0.33728319465089696 res["default_iris_sparse"] = 0.56 res["default_digits"] = 0.91499696417729204 + res['default_digits_places'] = 2 res["default_digits_iterative"] = -1 res["default_digits_binary"] = 0.98907103825136611 res["default_digits_multilabel"] = 0.89539354612444322 diff --git a/test/test_pipeline/components/regression/test_base.py b/test/test_pipeline/components/regression/test_base.py index f33ed5ecd0..508ece98cc 100644 --- a/test/test_pipeline/components/regression/test_base.py +++ b/test/test_pipeline/components/regression/test_base.py @@ -1,12 +1,12 @@ import unittest +import numpy as np +import sklearn.metrics + from autosklearn.pipeline.util import _test_regressor, \ _test_regressor_iterative_fit - from autosklearn.pipeline.constants import * -import sklearn.metrics - class BaseRegressionComponentTest(unittest.TestCase): @@ -35,11 +35,18 @@ def test_default_boston(self): y_pred=predictions), self.res["default_boston_le_ge"][1]) else: - self.assertAlmostEqual(self.res["default_boston"], - sklearn.metrics.r2_score(targets, - predictions), - places=self.res.get( - "default_boston_places", 7)) + score = sklearn.metrics.r2_score(targets, predictions) + fixture = self.res["default_boston"] + + if score < -1e10: + score = np.log(-score) + fixture = np.log(-fixture) + + self.assertAlmostEqual( + fixture, + score, + places=self.res.get("default_boston_places", 7), + ) def test_default_boston_iterative_fit(self): if not hasattr(self.module, 'iterative_fit'): @@ -49,11 +56,18 @@ def test_default_boston_iterative_fit(self): predictions, targets = \ _test_regressor_iterative_fit(dataset="boston", Regressor=self.module) - self.assertAlmostEqual(self.res["default_boston_iterative"], - sklearn.metrics.r2_score(targets, - predictions), - places=self.res.get( - "default_boston_iterative_places", 7)) + score = sklearn.metrics.r2_score(targets, predictions) + fixture = self.res["default_boston_iterative"] + + if score < -1e10: + score = np.log(-score) + fixture = np.log(-fixture) + + self.assertAlmostEqual( + fixture, + score, + places=self.res.get("default_boston_iterative_places", 7), + ) def test_default_boston_iterative_sparse_fit(self): if not hasattr(self.module, 'iterative_fit'): diff --git a/test/test_pipeline/components/regression/test_liblinear_svr.py b/test/test_pipeline/components/regression/test_liblinear_svr.py index 55608c8f04..42b73bfba7 100644 --- a/test/test_pipeline/components/regression/test_liblinear_svr.py +++ b/test/test_pipeline/components/regression/test_liblinear_svr.py @@ -10,8 +10,10 @@ class SupportVectorComponentTest(BaseRegressionComponentTest): res = dict() res["default_boston"] = 0.6768297818275556 + res["default_boston_places"] = 2 res["default_boston_iterative"] = None res["default_boston_sparse"] = 0.12626519114138912 + res["default_boston_sparse_places"] = 2 res["default_boston_iterative_sparse"] = None res["default_diabetes"] = 0.39152218711865661 res["default_diabetes_iterative"] = None diff --git a/test/test_pipeline/test_classification.py b/test/test_pipeline/test_classification.py index bd99b3fd58..bc0a2a03c7 100644 --- a/test/test_pipeline/test_classification.py +++ b/test/test_pipeline/test_classification.py @@ -1,7 +1,7 @@ import copy import os import resource -import sys +import tempfile import traceback import unittest import unittest.mock @@ -13,7 +13,7 @@ import sklearn.ensemble import sklearn.svm from sklearn.utils.testing import assert_array_almost_equal -#from xgboost.core import XGBoostError +from sklearn.externals.joblib import Memory from ConfigSpace.configuration_space import ConfigurationSpace, \ Configuration @@ -153,18 +153,22 @@ def test_repr(self): self.assertIsInstance(cls, SimpleClassificationPipeline) def test_multilabel(self): - - X, Y = sklearn.datasets.\ - make_multilabel_classification(n_samples=150, - n_features=20, - n_classes=5, - n_labels=2, - length=50, - allow_unlabeled=True, - sparse=False, - return_indicator=True, - return_distributions=False, - random_state=1) + cache = Memory(cachedir=tempfile.gettempdir()) + cached_func = cache.cache( + sklearn.datasets.make_multilabel_classification + ) + X, Y = cached_func( + n_samples=150, + n_features=20, + n_classes=5, + n_labels=2, + length=50, + allow_unlabeled=True, + sparse=False, + return_indicator=True, + return_distributions=False, + random_state=1 + ) X_train = X[:100, :] Y_train = Y[:100, :] X_test = X[101:, :]