From fc1944ad6512839ee84bdf223745e59b77945a75 Mon Sep 17 00:00:00 2001 From: arnaudbore Date: Tue, 29 Oct 2024 08:47:35 -0400 Subject: [PATCH 1/3] fix conversion and bundle metric extraction --- scripts/scil_bundle_shape_measures.py | 132 ++++++++++--------- scripts/scil_json_convert_entries_to_xlsx.py | 60 ++++----- 2 files changed, 94 insertions(+), 98 deletions(-) diff --git a/scripts/scil_bundle_shape_measures.py b/scripts/scil_bundle_shape_measures.py index 66307d30e..adb5dedda 100755 --- a/scripts/scil_bundle_shape_measures.py +++ b/scripts/scil_bundle_shape_measures.py @@ -215,70 +215,74 @@ def main(): pool.join() output_measures_dict = {} - for measure_dict in all_measures_dict: - # Empty bundle should not make the script crash - if measure_dict is not None: - for measure_name in measure_dict.keys(): - # Create an empty list first - if measure_name not in output_measures_dict: - output_measures_dict[measure_name] = [] - output_measures_dict[measure_name].append( - measure_dict[measure_name]) - # add group stats if user wants - if args.group_statistics: - # length and span are weighted by streamline count - group_total_length = np.sum( - np.multiply(output_measures_dict['avg_length'], - output_measures_dict['streamlines_count'])) - group_total_span = np.sum( - np.multiply(output_measures_dict['span'], - output_measures_dict['streamlines_count'])) - group_streamlines_count = \ - np.sum(output_measures_dict['streamlines_count']) - group_avg_length = group_total_length / group_streamlines_count - group_avg_span = group_total_span / group_streamlines_count - group_avg_vol = np.average(output_measures_dict['volume']) - group_avg_diam = \ - 2 * np.sqrt(group_avg_vol / (np.pi * group_avg_length)) - output_measures_dict['group_stats'] = {} - output_measures_dict['group_stats']['total_streamlines_count'] = \ - float(group_streamlines_count) - output_measures_dict['group_stats']['avg_streamline_length'] = \ - group_avg_length - # max and min length of all streamlines in all input bundles - output_measures_dict['group_stats']['max_streamline_length'] = \ - float(np.max(output_measures_dict['max_length'])) - output_measures_dict['group_stats']['min_streamline_length'] = \ - float(np.min(output_measures_dict['min_length'])) - output_measures_dict['group_stats']['avg_streamline_span'] = \ - group_avg_span - # computed with other set averages and not weighted by streamline count - output_measures_dict['group_stats']['avg_volume'] = group_avg_vol - output_measures_dict['group_stats']['avg_curl'] = \ - group_avg_length / group_avg_span - output_measures_dict['group_stats']['avg_diameter'] = group_avg_diam - output_measures_dict['group_stats']['avg_elongation'] = \ - group_avg_length / group_avg_diam - output_measures_dict['group_stats']['avg_surface_area'] = \ - np.average(output_measures_dict['surface_area']) - output_measures_dict['group_stats']['avg_irreg'] = \ - np.average(output_measures_dict['irregularity']) - output_measures_dict['group_stats']['avg_end_surface_area_head'] = \ - np.average(output_measures_dict['end_surface_area_head']) - output_measures_dict['group_stats']['avg_end_surface_area_tail'] = \ - np.average(output_measures_dict['end_surface_area_tail']) - output_measures_dict['group_stats']['avg_radius_head'] = \ - np.average(output_measures_dict['radius_head']) - output_measures_dict['group_stats']['avg_radius_tail'] = \ - np.average(output_measures_dict['radius_tail']) - output_measures_dict['group_stats']['avg_irregularity_head'] = \ - np.average( - output_measures_dict['irregularity_of_end_surface_head']) - output_measures_dict['group_stats']['avg_irregularity_tail'] = \ - np.average( - output_measures_dict['irregularity_of_end_surface_tail']) - output_measures_dict['group_stats']['avg_fractal_dimension'] = \ - np.average(output_measures_dict['fractal_dimension']) + if len(args.in_bundles) == 1: + output_measures_dict = all_measures_dict[0] + else: + for measure_dict in all_measures_dict: + # Empty bundle should not make the script crash + if measure_dict is not None: + for measure_name in measure_dict.keys(): + # Create an empty list first + if measure_name not in output_measures_dict: + output_measures_dict[measure_name] = [] + output_measures_dict[measure_name].append( + measure_dict[measure_name]) + # add group stats if user wants + if args.group_statistics: + # length and span are weighted by streamline count + group_total_length = np.sum( + np.multiply(output_measures_dict['avg_length'], + output_measures_dict['streamlines_count'])) + group_total_span = np.sum( + np.multiply(output_measures_dict['span'], + output_measures_dict['streamlines_count'])) + group_streamlines_count = \ + np.sum(output_measures_dict['streamlines_count']) + group_avg_length = group_total_length / group_streamlines_count + group_avg_span = group_total_span / group_streamlines_count + group_avg_vol = np.average(output_measures_dict['volume']) + group_avg_diam = \ + 2 * np.sqrt(group_avg_vol / (np.pi * group_avg_length)) + output_measures_dict['group_stats'] = {} + output_measures_dict['group_stats']['total_streamlines_count'] = \ + float(group_streamlines_count) + output_measures_dict['group_stats']['avg_streamline_length'] = \ + group_avg_length + # max and min length of all streamlines in all input bundles + output_measures_dict['group_stats']['max_streamline_length'] = \ + float(np.max(output_measures_dict['max_length'])) + output_measures_dict['group_stats']['min_streamline_length'] = \ + float(np.min(output_measures_dict['min_length'])) + output_measures_dict['group_stats']['avg_streamline_span'] = \ + group_avg_span + # computed with other set averages and not weighted by + # streamline count + output_measures_dict['group_stats']['avg_volume'] = group_avg_vol + output_measures_dict['group_stats']['avg_curl'] = \ + group_avg_length / group_avg_span + output_measures_dict['group_stats']['avg_diameter'] = group_avg_diam + output_measures_dict['group_stats']['avg_elongation'] = \ + group_avg_length / group_avg_diam + output_measures_dict['group_stats']['avg_surface_area'] = \ + np.average(output_measures_dict['surface_area']) + output_measures_dict['group_stats']['avg_irreg'] = \ + np.average(output_measures_dict['irregularity']) + output_measures_dict['group_stats']['avg_end_surface_area_head'] = \ + np.average(output_measures_dict['end_surface_area_head']) + output_measures_dict['group_stats']['avg_end_surface_area_tail'] = \ + np.average(output_measures_dict['end_surface_area_tail']) + output_measures_dict['group_stats']['avg_radius_head'] = \ + np.average(output_measures_dict['radius_head']) + output_measures_dict['group_stats']['avg_radius_tail'] = \ + np.average(output_measures_dict['radius_tail']) + output_measures_dict['group_stats']['avg_irregularity_head'] = \ + np.average( + output_measures_dict['irregularity_of_end_surface_head']) + output_measures_dict['group_stats']['avg_irregularity_tail'] = \ + np.average( + output_measures_dict['irregularity_of_end_surface_tail']) + output_measures_dict['group_stats']['avg_fractal_dimension'] = \ + np.average(output_measures_dict['fractal_dimension']) if args.out_json: with open(args.out_json, 'w') as outfile: diff --git a/scripts/scil_json_convert_entries_to_xlsx.py b/scripts/scil_json_convert_entries_to_xlsx.py index 8d38a2e55..394b725bd 100755 --- a/scripts/scil_json_convert_entries_to_xlsx.py +++ b/scripts/scil_json_convert_entries_to_xlsx.py @@ -18,6 +18,8 @@ from scilpy.io.utils import (add_overwrite_arg, add_verbose_arg, assert_inputs_exist, assert_outputs_exist) +dps_dpp = ['data_per_streamline_keys', 'data_per_point_keys'] + def _get_all_bundle_names(stats): bnames = set() @@ -75,15 +77,11 @@ def _get_stats_parse_function(stats, stats_over_population): if len(first_bundle_stats.keys()) == 1 and\ _are_all_elements_scalars(first_bundle_stats): return _parse_scalar_stats - elif len(first_bundle_stats.keys()) == 4 and \ + if len(first_bundle_stats.keys()) == 4 and \ set(first_bundle_stats.keys()) == \ set(['lesion_total_vol', 'lesion_avg_vol', 'lesion_std_vol', 'lesion_count']): return _parse_lesion - elif len(first_bundle_stats.keys()) == 4 and \ - set(first_bundle_stats.keys()) == \ - set(['min_length', 'max_length', 'mean_length', 'std_length']): - return _parse_lengths elif type(first_bundle_substat) is dict: sub_keys = list(first_bundle_substat.keys()) if set(sub_keys) == set(['mean', 'std']): @@ -95,6 +93,8 @@ def _get_stats_parse_function(stats, stats_over_population): return _parse_per_point_meanstd elif _are_all_elements_scalars(first_bundle_substat): return _parse_per_label_scalar + else: + return _parse_stats raise IOError('Unable to recognize stats type!') @@ -201,39 +201,31 @@ def _parse_scalar_lesions(stats, subs, bundles): return dataframes, df_names -def _parse_lengths(stats, subs, bundles): +def _parse_stats(stats, subs, bundles): nb_subs = len(subs) nb_bundles = len(bundles) - min_lengths = np.full((nb_subs, nb_bundles), np.NaN) - max_lengths = np.full((nb_subs, nb_bundles), np.NaN) - mean_lengths = np.full((nb_subs, nb_bundles), np.NaN) - std_lengths = np.full((nb_subs, nb_bundles), np.NaN) - - for sub_id, sub_name in enumerate(subs): - for bundle_id, bundle_name in enumerate(bundles): - b_stat = stats[sub_name].get(bundle_name) - - if b_stat is not None: - min_lengths[sub_id, bundle_id] = b_stat['min_length'] - max_lengths[sub_id, bundle_id] = b_stat['max_length'] - mean_lengths[sub_id, bundle_id] = b_stat['mean_length'] - std_lengths[sub_id, bundle_id] = b_stat['std_length'] - - dataframes = [pd.DataFrame(data=min_lengths, - index=subs, - columns=bundles), - pd.DataFrame(data=max_lengths, - index=subs, - columns=bundles), - pd.DataFrame(data=mean_lengths, - index=subs, - columns=bundles), - pd.DataFrame(data=std_lengths, - index=subs, - columns=bundles)] + dataframes = [] - df_names = ["min_length", "max_length", "mean_length", "std_length"] + # Check all metrics keys + metrics_keys = stats[subs[0]][bundles[0]].keys() + df_names = list(metrics_keys) + + for metric_name in metrics_keys: + if metric_name in dps_dpp: + df_names.remove(metric_name) + else: + curr_metric = np.full((nb_subs, nb_bundles), np.NaN) + for bundle_id, bundle_name in enumerate(bundles): + for sub_id, sub_name in enumerate(subs): + b_stat = stats[sub_name][bundle_name].get(metric_name) + if b_stat is not None and metric_name not in dps_dpp: + curr_metric[sub_id, bundle_id] = b_stat + + dataframes.append(pd.DataFrame(data=curr_metric, + index=subs, + columns=bundles)) + del curr_metric return dataframes, df_names From 3992ed8d2e4573869cea5c35bd47ad55bff03d08 Mon Sep 17 00:00:00 2001 From: arnaudbore Date: Mon, 4 Nov 2024 14:29:16 -0500 Subject: [PATCH 2/3] answer francois comments --- scripts/scil_bundle_shape_measures.py | 23 +++++++------------- scripts/scil_json_convert_entries_to_xlsx.py | 12 +++++----- 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/scripts/scil_bundle_shape_measures.py b/scripts/scil_bundle_shape_measures.py index adb5dedda..ef6669a11 100755 --- a/scripts/scil_bundle_shape_measures.py +++ b/scripts/scil_bundle_shape_measures.py @@ -263,26 +263,19 @@ def main(): output_measures_dict['group_stats']['avg_diameter'] = group_avg_diam output_measures_dict['group_stats']['avg_elongation'] = \ group_avg_length / group_avg_diam - output_measures_dict['group_stats']['avg_surface_area'] = \ - np.average(output_measures_dict['surface_area']) - output_measures_dict['group_stats']['avg_irreg'] = \ - np.average(output_measures_dict['irregularity']) - output_measures_dict['group_stats']['avg_end_surface_area_head'] = \ - np.average(output_measures_dict['end_surface_area_head']) - output_measures_dict['group_stats']['avg_end_surface_area_tail'] = \ - np.average(output_measures_dict['end_surface_area_tail']) - output_measures_dict['group_stats']['avg_radius_head'] = \ - np.average(output_measures_dict['radius_head']) - output_measures_dict['group_stats']['avg_radius_tail'] = \ - np.average(output_measures_dict['radius_tail']) output_measures_dict['group_stats']['avg_irregularity_head'] = \ np.average( output_measures_dict['irregularity_of_end_surface_head']) output_measures_dict['group_stats']['avg_irregularity_tail'] = \ np.average( - output_measures_dict['irregularity_of_end_surface_tail']) - output_measures_dict['group_stats']['avg_fractal_dimension'] = \ - np.average(output_measures_dict['fractal_dimension']) + output_measures_dict['irregularity_of_end_surface_tail']) + + list_metrics = ['surface_area', 'irregularity', 'end_surface_area_head', + 'end_surface_area_tail', 'radius_head', 'radius_tail', + 'fractal_dimension'] + for curr_metric in list_metrics: + output_measures_dict['group_stats']['avg_' + curr_metric] = \ + np.average(output_measures_dict[curr_metric]) if args.out_json: with open(args.out_json, 'w') as outfile: diff --git a/scripts/scil_json_convert_entries_to_xlsx.py b/scripts/scil_json_convert_entries_to_xlsx.py index 394b725bd..dfdfc228b 100755 --- a/scripts/scil_json_convert_entries_to_xlsx.py +++ b/scripts/scil_json_convert_entries_to_xlsx.py @@ -75,16 +75,16 @@ def _get_stats_parse_function(stats, stats_over_population): first_bundle_stats.keys())[0]] if len(first_bundle_stats.keys()) == 1 and\ - _are_all_elements_scalars(first_bundle_stats): + _are_all_elements_scalars(first_bundle_stats): # when you have only on key per bundle return _parse_scalar_stats if len(first_bundle_stats.keys()) == 4 and \ set(first_bundle_stats.keys()) == \ set(['lesion_total_vol', 'lesion_avg_vol', 'lesion_std_vol', - 'lesion_count']): + 'lesion_count']): # when you have lesion stats return _parse_lesion elif type(first_bundle_substat) is dict: sub_keys = list(first_bundle_substat.keys()) - if set(sub_keys) == set(['mean', 'std']): + if set(sub_keys) == set(['mean', 'std']): # when you have mean and std per stats if stats_over_population: return _parse_per_label_population_stats else: @@ -93,7 +93,7 @@ def _get_stats_parse_function(stats, stats_over_population): return _parse_per_point_meanstd elif _are_all_elements_scalars(first_bundle_substat): return _parse_per_label_scalar - else: + else: # when you have multiple metrics per bundle return _parse_stats raise IOError('Unable to recognize stats type!') @@ -212,14 +212,14 @@ def _parse_stats(stats, subs, bundles): df_names = list(metrics_keys) for metric_name in metrics_keys: - if metric_name in dps_dpp: + if metric_name in dps_dpp: # remove dps and dpp keys df_names.remove(metric_name) else: curr_metric = np.full((nb_subs, nb_bundles), np.NaN) for bundle_id, bundle_name in enumerate(bundles): for sub_id, sub_name in enumerate(subs): b_stat = stats[sub_name][bundle_name].get(metric_name) - if b_stat is not None and metric_name not in dps_dpp: + if b_stat is not None: curr_metric[sub_id, bundle_id] = b_stat dataframes.append(pd.DataFrame(data=curr_metric, From 2a067337e3fde73d241f456b5896824d1cd39ee8 Mon Sep 17 00:00:00 2001 From: arnaudbore Date: Mon, 4 Nov 2024 14:46:55 -0500 Subject: [PATCH 3/3] fix pep8 --- scripts/scil_bundle_shape_measures.py | 9 +++++---- scripts/scil_json_convert_entries_to_xlsx.py | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/scripts/scil_bundle_shape_measures.py b/scripts/scil_bundle_shape_measures.py index ef6669a11..059aca521 100755 --- a/scripts/scil_bundle_shape_measures.py +++ b/scripts/scil_bundle_shape_measures.py @@ -268,11 +268,12 @@ def main(): output_measures_dict['irregularity_of_end_surface_head']) output_measures_dict['group_stats']['avg_irregularity_tail'] = \ np.average( - output_measures_dict['irregularity_of_end_surface_tail']) + output_measures_dict['irregularity_of_end_surface_tail']) - list_metrics = ['surface_area', 'irregularity', 'end_surface_area_head', - 'end_surface_area_tail', 'radius_head', 'radius_tail', - 'fractal_dimension'] + list_metrics = ['surface_area', 'irregularity', + 'end_surface_area_head', + 'end_surface_area_tail', 'radius_head', + 'radius_tail', 'fractal_dimension'] for curr_metric in list_metrics: output_measures_dict['group_stats']['avg_' + curr_metric] = \ np.average(output_measures_dict[curr_metric]) diff --git a/scripts/scil_json_convert_entries_to_xlsx.py b/scripts/scil_json_convert_entries_to_xlsx.py index dfdfc228b..bf5746278 100755 --- a/scripts/scil_json_convert_entries_to_xlsx.py +++ b/scripts/scil_json_convert_entries_to_xlsx.py @@ -80,7 +80,7 @@ def _get_stats_parse_function(stats, stats_over_population): if len(first_bundle_stats.keys()) == 4 and \ set(first_bundle_stats.keys()) == \ set(['lesion_total_vol', 'lesion_avg_vol', 'lesion_std_vol', - 'lesion_count']): # when you have lesion stats + 'lesion_count']): # when you have lesion stats return _parse_lesion elif type(first_bundle_substat) is dict: sub_keys = list(first_bundle_substat.keys())