Skip to content

Commit

Permalink
187 score samples (#192)
Browse files Browse the repository at this point in the history
* added score_samples

* added tests for score samples and added case where score_Samples returns an ndarray

* score_samples opens a new experiment if not fitted
  • Loading branch information
shania-m authored Mar 10, 2022
1 parent 221f260 commit ed92818
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 0 deletions.
33 changes: 33 additions & 0 deletions rubicon_ml/sklearn/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,39 @@ def score(self, X, y=None, sample_weight=None, experiment=None):
self.experiment = None
return score

def score_samples(self, X, experiment=None):
"""Score with the final estimator and automatically
log the results to Rubicon.
Parameters
----------
X : iterable
Data to predict on. Must fulfill input requirements of first step of the pipeline.
y : iterable, optional
Targets used for scoring. Must fulfill label requirements for all steps of the pipeline.
sample_weight : list, optional
If not None, this argument is passed as sample_weight keyword argument to the
score method of the final estimator.
"""
score_samples = super().score_samples(X)

if experiment is not None:
# fitted
self.experiment = experiment
elif self.experiment is None:
# not fitted
self.experiment = self.project.log_experiment(**self.experiment_kwargs)

logger = self.get_estimator_logger()
try:
logger.log_metric("score_samples", score_samples)
except TypeError:
score_samples = score_samples.tolist()
logger.log_metric("score_samples", score_samples)
# clear self.experiment and its not set for when a score is called
self.experiment = None
return score_samples

def get_estimator_logger(self, step_name=None, estimator=None):
"""Get a logger for the estimator. By default, the logger will
have the current experiment set.
Expand Down
31 changes: 31 additions & 0 deletions tests/unit/sklearn/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,3 +225,34 @@ def test_pipeline_memory_verbose(project_client, fake_estimator_cls):
pipeline = RubiconPipeline(project, steps, {"est", user_defined_logger})
assert pipeline.memory is None
assert pipeline.verbose is False


def test_score_samples(project_client, fake_estimator_cls):
project = project_client
estimator = fake_estimator_cls()
steps = [("est", estimator)]
user_defined_logger = {"est": FilterEstimatorLogger(ignore_all=True)}

pipeline = RubiconPipeline(project, steps, user_defined_logger)

project = project_client
estimator = fake_estimator_cls()
steps = [("est", estimator)]
user_defined_logger = {"est": FilterEstimatorLogger(ignore_all=True)}
pipeline = RubiconPipeline(project, steps, user_defined_logger)

with patch.object(Pipeline, "fit", return_value=None):
with patch.object(FilterEstimatorLogger, "log_parameters", return_value=None):
pipeline.fit(["fake data"])
assert len(project.experiments()) == 1

with patch.object(Pipeline, "score_samples", return_value=None):
with patch.object(EstimatorLogger, "log_metric", return_value=None) as mock_log_metric:
pipeline.score_samples(["fake data"])
pipeline.score_samples(["additional fake data"])
experiment = project.log_experiment(name="fake experiment")
pipeline.score_samples(["additional fake data"], experiment=experiment)

assert mock_log_metric._mock_call_count == 3
assert len(project.experiments()) == 3
assert project.experiments()[2].name == "fake experiment"

0 comments on commit ed92818

Please sign in to comment.