187 score samples (#192)

* added score_samples * added tests for score samples and added case where score_Samples returns an ndarray * score_samples opens a new experiment if not fitted
capitalone · Mar 10, 2022 · ed92818 · ed92818
1 parent 221f260
commit ed92818
Show file tree

Hide file tree

Showing 2 changed files with 64 additions and 0 deletions.
diff --git a/rubicon_ml/sklearn/pipeline.py b/rubicon_ml/sklearn/pipeline.py
@@ -143,6 +143,39 @@ def score(self, X, y=None, sample_weight=None, experiment=None):
         self.experiment = None
         return score
 
+    def score_samples(self, X, experiment=None):
+        """Score with the final estimator and automatically
+        log the results to Rubicon.
+
+        Parameters
+        ----------
+        X : iterable
+            Data to predict on. Must fulfill input requirements of first step of the pipeline.
+        y : iterable, optional
+            Targets used for scoring. Must fulfill label requirements for all steps of the pipeline.
+        sample_weight : list, optional
+            If not None, this argument is passed as sample_weight keyword argument to the
+            score method of the final estimator.
+        """
+        score_samples = super().score_samples(X)
+
+        if experiment is not None:
+            # fitted
+            self.experiment = experiment
+        elif self.experiment is None:
+            # not fitted
+            self.experiment = self.project.log_experiment(**self.experiment_kwargs)
+
+        logger = self.get_estimator_logger()
+        try:
+            logger.log_metric("score_samples", score_samples)
+        except TypeError:
+            score_samples = score_samples.tolist()
+            logger.log_metric("score_samples", score_samples)
+        # clear self.experiment and its not set for when a score is called
+        self.experiment = None
+        return score_samples
+
     def get_estimator_logger(self, step_name=None, estimator=None):
         """Get a logger for the estimator. By default, the logger will
         have the current experiment set.

diff --git a/tests/unit/sklearn/test_pipeline.py b/tests/unit/sklearn/test_pipeline.py
@@ -225,3 +225,34 @@ def test_pipeline_memory_verbose(project_client, fake_estimator_cls):
     pipeline = RubiconPipeline(project, steps, {"est", user_defined_logger})
     assert pipeline.memory is None
     assert pipeline.verbose is False
+
+
+def test_score_samples(project_client, fake_estimator_cls):
+    project = project_client
+    estimator = fake_estimator_cls()
+    steps = [("est", estimator)]
+    user_defined_logger = {"est": FilterEstimatorLogger(ignore_all=True)}
+
+    pipeline = RubiconPipeline(project, steps, user_defined_logger)
+
+    project = project_client
+    estimator = fake_estimator_cls()
+    steps = [("est", estimator)]
+    user_defined_logger = {"est": FilterEstimatorLogger(ignore_all=True)}
+    pipeline = RubiconPipeline(project, steps, user_defined_logger)
+
+    with patch.object(Pipeline, "fit", return_value=None):
+        with patch.object(FilterEstimatorLogger, "log_parameters", return_value=None):
+            pipeline.fit(["fake data"])
+    assert len(project.experiments()) == 1
+
+    with patch.object(Pipeline, "score_samples", return_value=None):
+        with patch.object(EstimatorLogger, "log_metric", return_value=None) as mock_log_metric:
+            pipeline.score_samples(["fake data"])
+            pipeline.score_samples(["additional fake data"])
+            experiment = project.log_experiment(name="fake experiment")
+            pipeline.score_samples(["additional fake data"], experiment=experiment)
+
+    assert mock_log_metric._mock_call_count == 3
+    assert len(project.experiments()) == 3
+    assert project.experiments()[2].name == "fake experiment"