From c27678da672a717c6adeaeaf3bc1c618acd52bca Mon Sep 17 00:00:00 2001 From: tommoral Date: Wed, 13 Dec 2023 19:37:15 +0100 Subject: [PATCH 1/3] FIX bad evaluation of the prediction --- stroke/scoring.py | 37 +++++++++++-------------------------- 1 file changed, 11 insertions(+), 26 deletions(-) diff --git a/stroke/scoring.py b/stroke/scoring.py index 139a481..a7f476e 100644 --- a/stroke/scoring.py +++ b/stroke/scoring.py @@ -43,34 +43,21 @@ def score_function(self, Y_true: np.array, Y_pred: np.array): Sørensen–Dice coefficient. """ y_true = np.array(Y_true.y_true) + assert len(y_true) == len(Y_pred.y_pred) if len(Y_pred.y_pred) == 0: return 0 estimator = Y_pred.y_pred[0].estimator fscore = 0 - # Load example to ensure that the size fits - dat = estimator.predict(BIDSLoader.load_image_tuple(Y_pred.y_pred[0].pred)) - # Have to unpack if y_true is bool - # Using proxy of y_true.shape != y_pred.shape to indicate that data needs to be unpacked - must_unpack = y_true[0, ...].shape != dat.shape - - for idx, prediction_object in enumerate(Y_pred.y_pred): - # First sample is already loaded; let's not waste the loading. - if idx != 0: - dat = BIDSLoader.load_image_tuple(prediction_object.pred) - - # Note: If you want to get the weighted mean, use - # self.calc_score_parts - if must_unpack: - unpacked_y_sample = np.array( - self.unpack_data(y_true[idx, ...], dat.shape), dtype=dat.dtype + for y_true_i, prediction_object in zip(y_true, Y_pred.y_pred): + dat = estimator.predict(BIDSLoader.load_image_tuple(prediction_object.pred)) + + # Using proxy of y_true.shape != y_pred.shape to indicate that data needs to be unpacked + if y_true_i.shape != dat.shape: + y_true_i = np.array( + self.unpack_data(y_true_i, dat.shape), dtype=dat.dtype ) - # unpacked_y_sample = np.array(np.unpackbits(y_true[idx, ...]), dtype=dat.dtype) - unpacked_y_sample = unpacked_y_sample.reshape(dat.shape) - sd_score = self.calc_score(dat, unpacked_y_sample) - else: - sd_score = self.calc_score(dat, y_true[idx, ...]) - fscore += sd_score + fscore += self.calc_score(dat, y_true_i) # Return the mean score return fscore / (idx + 1) @@ -132,10 +119,8 @@ def calc_score_parts(array_0: np.array, array_1: np.array): tuple Tuple containing (overlap, sum(array_0), sum(array_1) """ - array_0_reshape = np.reshape(array_0, (1, np.prod(array_0.shape))) - array_1_reshape = np.reshape(array_1, (np.prod((array_1.shape)), 1)) - overlap = 2 * array_0_reshape @ array_1_reshape - return (overlap[0][0], np.sum(array_0), np.sum(array_1)) + overlap = 2 * array_0.ravel() @ array_1.ravel() + return (overlap, np.sum(array_0), np.sum(array_1)) @staticmethod def check_y_pred_dimensions(array_0: np.array, array_1: np.array): From ae84cdc9f7a526313fef7b529e883899c0d99543 Mon Sep 17 00:00:00 2001 From: Thomas Moreau Date: Wed, 13 Dec 2023 22:41:21 +0100 Subject: [PATCH 2/3] Update stroke/scoring.py --- stroke/scoring.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stroke/scoring.py b/stroke/scoring.py index a7f476e..a6014a3 100644 --- a/stroke/scoring.py +++ b/stroke/scoring.py @@ -60,7 +60,7 @@ def score_function(self, Y_true: np.array, Y_pred: np.array): fscore += self.calc_score(dat, y_true_i) # Return the mean score - return fscore / (idx + 1) + return fscore / len(y_true) @staticmethod def unpack_data(array_0: np.array, output_shape: np.array): From 987bd045429acdd56349dec27ed899e2e981ae89 Mon Sep 17 00:00:00 2001 From: tommoral Date: Thu, 14 Dec 2023 00:42:36 +0100 Subject: [PATCH 3/3] FIX unpack labels in training loop --- stroke/bids_workflow.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/stroke/bids_workflow.py b/stroke/bids_workflow.py index 380a11d..200f63b 100644 --- a/stroke/bids_workflow.py +++ b/stroke/bids_workflow.py @@ -1,6 +1,8 @@ import os +import numpy as np from rampwf.utils.importing import import_module_from_source from stroke import stroke_config +from stroke.scoring import DiceCoeff from stroke.bids_loader import BIDSLoader @@ -61,13 +63,16 @@ def train_submission( for idx in range(0, len(train_is), batch_size): # Get tuples to load - data_to_load = [X_array[i] for i in train_is[idx : idx + batch_size]] - target_to_load = [y_array[i] for i in train_is[idx : idx + batch_size]] + data_to_load = [X_array[i] for i in train_is[idx:idx + batch_size]] + target_to_load = [y_array[i] for i in train_is[idx:idx + batch_size]] # Load data data = BIDSLoader.load_image_tuple_list(data_to_load) target = BIDSLoader.load_image_tuple_list( target_to_load, dtype=stroke_config.data_types["target"] ) + target = np.array([ + DiceCoeff.unpack_data(y, X.shape) for y, X in zip(target, data) + ]) # Fit self.estimator.fit_partial(data, target)