Skip to content

Commit

Permalink
Merge pull request #357 from automl/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
mfeurer authored Oct 2, 2017
2 parents f4b72be + 03879f4 commit 0ee3699
Show file tree
Hide file tree
Showing 16 changed files with 116 additions and 148 deletions.
3 changes: 0 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,11 @@ before_install:
- bash miniconda.sh -b -p $HOME/miniconda
- export PATH="$HOME/miniconda/bin:$PATH"
- if [[ `which conda` ]]; then echo 'Conda installation successful'; else exit 1; fi
- conda update --yes conda
- conda create -n testenv --yes python=$PYTHON_VERSION pip wheel nose
- source activate testenv
- conda install --yes gcc swig
- echo "Using GCC at "`which gcc`
- export CC=`which gcc`
# Fixes version `GLIBCXX_3.4.21' not found (on Ubuntu 16.04)
- conda install --yes libgcc

install:
# Install general requirements the way setup.py suggests
Expand Down
5 changes: 3 additions & 2 deletions autosklearn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
__MANDATORY_PACKAGES__ = '''
numpy>=1.9
scikit-learn>=0.18.1,<0.19
smac==0.5.0
lockfile>=0.10
smac>=0.6.0,<0.7
pyrfr>=0.6.1,<0.7
ConfigSpace>=0.3.3,<0.4
pyrfr>=0.4.0,<0.5
pyrfr>=0.6.0,<0.7
'''

dependencies.verify_packages(__MANDATORY_PACKAGES__)
10 changes: 8 additions & 2 deletions autosklearn/evaluation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,14 @@ def __init__(self, backend, autosklearn_seed, resampling_strategy, metric,

eval_function = functools.partial(fit_predict_try_except_decorator,
ta=eval_function)
super().__init__(ta=eval_function, stats=stats, runhistory=runhistory,
run_obj=run_obj, par_factor=par_factor)
super().__init__(
ta=eval_function,
stats=stats,
runhistory=runhistory,
run_obj=run_obj,
par_factor=par_factor,
cost_for_crash=WORST_POSSIBLE_RESULT,
)

self.backend = backend
self.autosklearn_seed = autosklearn_seed
Expand Down
102 changes: 13 additions & 89 deletions autosklearn/smbo.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,42 +450,6 @@ def run_smbo(self):
(1, -1))
self.logger.info(list(meta_features_dict.keys()))

# meta_runs = meta_base.get_all_runs(METRIC_TO_STRING[self.metric])
# meta_runs_index = 0
# try:
# meta_durations = meta_base.get_all_runs('runtime')
# read_runtime_data = True
# except KeyError:
# read_runtime_data = False
# self.logger.critical('Cannot read runtime data.')
# if self.acquisition_function == 'EIPS':
# self.logger.critical('Reverting to acquisition function EI!')
# self.acquisition_function = 'EI'

# for meta_dataset in meta_runs.index:
# meta_dataset_start_index = meta_runs_index
# for meta_configuration in meta_runs.columns:
# if np.isfinite(meta_runs.loc[meta_dataset, meta_configuration]):
# try:
# config = meta_base.get_configuration_from_algorithm_index(
# meta_configuration)
# cost = meta_runs.loc[meta_dataset, meta_configuration]
# if read_runtime_data:
# runtime = meta_durations.loc[meta_dataset,
# meta_configuration]
# else:
# runtime = 1
# # TODO read out other status types!
# meta_runhistory.add(config, cost, runtime,
# StatusType.SUCCESS,
# instance_id=meta_dataset)
# meta_runs_index += 1
# except:
# # TODO maybe add warning
# pass
#
# meta_runs_dataset_indices[meta_dataset] = (
# meta_dataset_start_index, meta_runs_index)
else:
meta_features = None
self.logger.warning('Could not find meta-data directory %s' %
Expand Down Expand Up @@ -514,13 +478,13 @@ def run_smbo(self):
startup_time = self.watcher.wall_elapsed(self.dataset_name)
total_walltime_limit = self.total_walltime_limit - startup_time - 5
scenario_dict = {'cs': self.config_space,
'cutoff-time': self.func_eval_time_limit,
'memory-limit': self.memory_limit,
'wallclock-limit': total_walltime_limit,
'cutoff_time': self.func_eval_time_limit,
'memory_limit': self.memory_limit,
'wallclock_limit': total_walltime_limit,
'output-dir':
self.backend.get_smac_output_directory(self.seed),
'shared-model': self.shared_mode,
'run-obj': 'quality',
'run_obj': 'quality',
'deterministic': 'true',
'instances': instances}

Expand Down Expand Up @@ -631,31 +595,6 @@ def run_smbo(self):
else:
raise ValueError(self.configuration_mode)

# Build a runtime model
# runtime_rf = RandomForestWithInstances(types,
# instance_features=meta_features_list,
# seed=1, num_trees=10)
# runtime_rh2EPM = RunHistory2EPM4EIPS(num_params=num_params,
# scenario=self.scenario,
# success_states=None,
# impute_censored_data=False,
# impute_state=None)
# X_runtime, y_runtime = runtime_rh2EPM.transform(meta_runhistory)
# runtime_rf.train(X_runtime, y_runtime[:, 1].flatten())
# X_meta, Y_meta = rh2EPM.transform(meta_runhistory)
# # Transform Y_meta on a per-dataset base
# for meta_dataset in meta_runs_dataset_indices:
# start_index, end_index = meta_runs_dataset_indices[meta_dataset]
# end_index += 1 # Python indexing
# Y_meta[start_index:end_index, 0]\
# [Y_meta[start_index:end_index, 0] >2.0] = 2.0
# dataset_minimum = np.min(Y_meta[start_index:end_index, 0])
# Y_meta[start_index:end_index, 0] = 1 - (
# (1. - Y_meta[start_index:end_index, 0]) /
# (1. - dataset_minimum))
# Y_meta[start_index:end_index, 0]\
# [Y_meta[start_index:end_index, 0] > 2] = 2

smac.solver.stats.start_timing()
# == first, evaluate all metelearning and default configurations
smac.solver.incumbent = smac.solver.initial_design.run()
Expand All @@ -670,9 +609,10 @@ def run_smbo(self):
time_bound=self.total_walltime_limit)

if smac.solver.scenario.shared_model:
pSMAC.write(run_history=smac.solver.runhistory,
output_directory=smac.solver.scenario.output_dir,
num_run=self.seed)
pSMAC.write(
run_history=smac.solver.runhistory,
output_directory=smac.solver.scenario.output_dir,
)

if smac.solver.stats.is_budget_exhausted():
break
Expand All @@ -687,14 +627,7 @@ def run_smbo(self):
logger=self.logger)

choose_next_start_time = time.time()
try:
challengers = self.choose_next(smac)
except Exception as e:
self.logger.error(e)
self.logger.error("Error in getting next configurations "
"with SMAC. Using random configuration!")
next_config = self.config_space.sample_configuration()
challengers = [next_config]
challengers = self.choose_next(smac)
time_for_choose_next = time.time() - choose_next_start_time
self.logger.info('Used %g seconds to find next '
'configurations' % (time_for_choose_next))
Expand All @@ -708,9 +641,10 @@ def run_smbo(self):
time_bound=time_for_choose_next)

if smac.solver.scenario.shared_model:
pSMAC.write(run_history=smac.solver.runhistory,
output_directory=smac.solver.scenario.output_dir,
num_run=self.seed)
pSMAC.write(
run_history=smac.solver.runhistory,
output_directory=smac.solver.scenario.output_dir,
)

if smac.solver.stats.is_budget_exhausted():
break
Expand All @@ -737,18 +671,8 @@ def choose_next(self, smac):
(1. - dataset_minimum))
Y_cfg[:, 0][Y_cfg[:, 0] > 2] = 2

# if len(X_meta) > 0 and len(X_cfg) > 0:
# pass
# X_cfg = np.concatenate((X_meta, X_cfg))
# Y_cfg = np.concatenate((Y_meta, Y_cfg))
# elif len(X_meta) > 0:
# X_cfg = X_meta.copy()
# Y_cfg = Y_meta.copy()
# elif len(X_cfg) > 0:
X_cfg = X_cfg.copy()
Y_cfg = Y_cfg.copy()
# else:
# raise ValueError('No training data for SMAC random forest!')

self.logger.info('Using %d training points for SMAC.' %
X_cfg.shape[0])
Expand Down
3 changes: 2 additions & 1 deletion ci_scripts/circle_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# here.
source activate testenv

export CC=`which gcc`
# install documentation building dependencies
pip install --upgrade numpy
pip install --upgrade matplotlib setuptools nose coverage sphinx==1.5.5 sphinx_bootstrap_theme numpydoc
Expand All @@ -16,4 +17,4 @@ python setup.py clean
python setup.py develop

# pipefail is necessary to propagate exit codes
set -o pipefail && cd doc && make html 2>&1 | tee ~/log.txt
set -o pipefail && cd doc && make html 2>&1 | tee ~/log.txt
3 changes: 2 additions & 1 deletion circle.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,11 @@ dependencies:
- sudo -E apt-get -yq remove texlive-binaries --purge
- sudo -E apt-get -yq update
- sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes install dvipng texlive-latex-base texlive-latex-extra
# Other stuff...
- sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes install build-essential
# Conda installation
- wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
- bash ~/miniconda.sh -b -p $HOME/miniconda
- conda update --yes conda
- conda create -n testenv --yes python=3.6 pip wheel nose gcc swig

# The --user is needed to let sphinx see the source and the binaries
Expand Down
2 changes: 1 addition & 1 deletion doc/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ the Section `Installing auto-sklearn`_.
A common installation problem under recent Linux distribution is the
incompability of the compiler version used to compile the Python binary
shipped by AnaConda and the compiler installed by the distribution. This can
be solved by istalling the *gcc* compiler shipped with AnaConda (as well as
be solved by installing the *gcc* compiler shipped with AnaConda (as well as
*swig*):

.. code:: bash
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@ pandas

ConfigSpace>=0.3.3,<0.4
pynisher>=0.4
pyrfr>=0.4.0,<0.5
smac==0.5.0
pyrfr>=0.6.1,<0.7
smac>=0.6.0,<0.7
7 changes: 6 additions & 1 deletion scripts/run_auto-sklearn_for_metadata_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,12 @@
config = entry.incumbent

logger = logging.getLogger('Testing:)')
stats = Stats(Scenario({'cutoff_time': per_run_time_limit * 2}))
stats = Stats(
Scenario({
'cutoff_time': per_run_time_limit * 2,
'run_obj': 'quality',
})
)
stats.start_timing()
# To avoid the output "first run crashed"...
stats.ta_runs += 1
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@
"liac-arff",
"pandas",
"ConfigSpace>=0.3.3,<0.4",
"pynisher>=0.4",
"pyrfr>=0.4,<0.5",
"smac==0.5.0"
"pynisher>=0.4,<0.5",
"pyrfr>=0.6.1,<0.7",
"smac>=0.6.0,<0.7"
]

with open("autosklearn/__version__.py") as fh:
Expand Down
24 changes: 15 additions & 9 deletions test/test_automl/test_smbo.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,24 @@ def test_choose_next(self):
total_walltime_limit=total_walltime_limit,
memory_limit=memory_limit,
watcher=None,
metric=accuracy)
metric=accuracy
)
auto.config_space = configspace
scenario = Scenario({'cs': configspace,
'cutoff-time': func_eval_time_limit,
'wallclock-limit': total_walltime_limit,
'memory-limit': memory_limit,
'run-obj': 'quality'})
scenario = Scenario({
'cs': configspace,
'cutoff_time': func_eval_time_limit,
'wallclock_limit': total_walltime_limit,
'memory_limit': memory_limit,
'run_obj': 'quality',
})
smac = SMAC(scenario)

self.assertRaisesRegex(ValueError, 'Cannot use SMBO algorithm on '
'empty runhistory',
auto.choose_next, smac)
self.assertRaisesRegex(
ValueError,
'Cannot use SMBO algorithm on empty runhistory',
auto.choose_next,
smac
)

config = Configuration(configspace, values={'a': 0.1, 'b': 0.2})
# TODO make sure the incumbent is always set?
Expand Down
20 changes: 13 additions & 7 deletions test/test_metalearning/pyMetaLearn/test_meta_features.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import os
import tempfile
from six import StringIO
from unittest import TestCase
import unittest
import os

import arff
import numpy as np
import scipy.sparse
from sklearn.preprocessing.imputation import Imputer
from sklearn.datasets import make_multilabel_classification
from sklearn.externals.joblib import Memory

from autosklearn.pipeline.implementations.OneHotEncoder import OneHotEncoder
from sklearn.preprocessing import StandardScaler
Expand Down Expand Up @@ -81,12 +83,16 @@ def tearDown(self):
os.chdir(self.cwd)

def get_multilabel(self):
return make_multilabel_classification(n_samples=100,
n_features=10,
n_classes=5,
n_labels=5,
return_indicator=True,
random_state=1)
cache = Memory(cachedir=tempfile.gettempdir())
cached_func = cache.cache(make_multilabel_classification)
return cached_func(
n_samples=100,
n_features=10,
n_classes=5,
n_labels=5,
return_indicator=True,
random_state=1
)

def test_number_of_instance(self):
mf = self.mf["NumberOfInstances"](self.X, self.y, self.categorical)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class LibLinearComponentTest(BaseClassificationComponentTest):
res["default_iris_proba"] = 0.33728319465089696
res["default_iris_sparse"] = 0.56
res["default_digits"] = 0.91499696417729204
res['default_digits_places'] = 2
res["default_digits_iterative"] = -1
res["default_digits_binary"] = 0.98907103825136611
res["default_digits_multilabel"] = 0.89539354612444322
Expand Down
Loading

0 comments on commit 0ee3699

Please sign in to comment.