Update agg_stat_bootstrap.py

fix safe_log.logger to safe_log()
dtcenter · Jan 2, 2025 · 99a0072 · 99a0072
1 parent be32b14
commit 99a0072
Showing 1 changed file with 52 additions and 52 deletions.
diff --git a/metcalcpy/agg_stat_bootstrap.py b/metcalcpy/agg_stat_bootstrap.py
@@ -71,7 +71,7 @@ def __init__(self, in_params):
         """
         self.logger = setup_logging(in_params)
         logger = self.logger
-        safe_log.logger(logger, "debug", "Initializing AggStatBootstrap with parameters.")
+        safe_log(logger, "debug", "Initializing AggStatBootstrap with parameters.")
         self.statistic = None
         self.derived_name_to_values = {}
         self.params = in_params
@@ -94,53 +94,53 @@ def _init_out_frame(self, series_fields, series):
                 pandas data frame
         """
         logger = self.logger
-        safe_log.logger(logger, "debug", "Initializing output data frame.")
+        safe_log(logger, "debug", "Initializing output data frame.")
         result = pd.DataFrame()
         row_number = len(series)
-        safe_log.logger(logger, "debug", f"Number of rows to initialize: {row_number}")
+        safe_log(logger, "debug", f"Number of rows to initialize: {row_number}")
         # fill series variables and values
         for field_ind, field in enumerate(series_fields):
             result[field] = [row[field_ind] for row in series]
-            safe_log.logger(logger, "debug", f"Field '{field}' initialized with {len(result[field])} entries.")
+            safe_log(logger, "debug", f"Field '{field}' initialized with {len(result[field])} entries.")
         # fill the stats  and CI values placeholders with None
         result['fcst_var'] = [None] * row_number
         result['stat_value'] = [None] * row_number
         result['stat_btcl'] = [None] * row_number
         result['stat_btcu'] = [None] * row_number
         result['nstats'] = [None] * row_number
 
-        safe_log.logger(logger, "debug", "Stats and confidence interval placeholders added.")
-        safe_log.logger(logger, "debug", f"DataFrame initialized with columns: {result.columns.tolist()}")
+        safe_log(logger, "debug", "Stats and confidence interval placeholders added.")
+        safe_log(logger, "debug", f"DataFrame initialized with columns: {result.columns.tolist()}")
 
         return result
 
     def _proceed_with_axis(self, axis="1"):
 
         logger = self.logger
-        safe_log.logger(logger, "info", f"Proceeding with axis: {axis}")
+        safe_log(logger, "info", f"Proceeding with axis: {axis}")
         if not self.input_data.empty:
             # identify all possible points values by adding series values, indy values
             # and statistics and then permute them
-            safe_log.logger(logger, "debug", "Input data is not empty. Proceeding with calculations.")
+            safe_log(logger, "debug", "Input data is not empty. Proceeding with calculations.")
             indy_vals = self.params['indy_vals']
             series_val = self.params['series_val_' + axis]
             all_fields_values = series_val.copy()
             all_fields_values[self.params['indy_var']] = indy_vals
             all_fields_values['stat_name'] = self.params['list_stat_' + axis]
             all_points = list(itertools.product(*all_fields_values.values()))
-            safe_log.logger(logger, "debug", f"All points generated: {len(all_points)} points created for axis {axis}.")
+            safe_log(logger, "debug", f"All points generated: {len(all_points)} points created for axis {axis}.")
             fcst_var = None
             if len(self.params['fcst_var_val_' + axis]) > 0 and 'fcst_var' in self.input_data.columns:
                 fcst_var = list(self.params['fcst_var_val_' + axis].keys())[0]
-                safe_log.logger(logger, "debug", f"Forecast variable identified: {fcst_var}")
+                safe_log(logger, "debug", f"Forecast variable identified: {fcst_var}")
             cases = []
             out_frame = self._init_out_frame(all_fields_values.keys(), all_points)
-            safe_log.logger(logger, "debug", f"Output DataFrame initialized with {len(out_frame)} rows.")
+            safe_log(logger, "debug", f"Output DataFrame initialized with {len(out_frame)} rows.")
             point_to_distrib = {}
 
             # run the bootstrap flow for each independent variable value
             for indy_val in indy_vals:
-                safe_log.logger(logger, "debug", f"Processing independent value: {indy_val}")
+                safe_log(logger, "debug", f"Processing independent value: {indy_val}")
                 # extract the records for the current indy value
                 if is_string_integer(indy_val):
                     filtered_by_indy_data = \
@@ -155,7 +155,7 @@ def _proceed_with_axis(self, axis="1"):
                 all_fields_values = series_val.copy()
 
                 all_points = list(itertools.product(*all_fields_values.values()))
-                safe_log.logger(logger, "debug", f"Number of points for independent value '{indy_val}': {len(all_points)}.")
+                safe_log(logger, "debug", f"Number of points for independent value '{indy_val}': {len(all_points)}.")
 
                 for point in all_points:
                     all_filters = []
@@ -182,7 +182,7 @@ def _proceed_with_axis(self, axis="1"):
                     # use numpy to select the rows where any record evaluates to True
                     mask = np.array(all_filters).all(axis=0)
                     point_data = filtered_by_indy_data.loc[mask]
-                    safe_log.logger(logger, "debug", f"Point data filtered for point {point}. Number of records: {len(point_data)}")
+                    safe_log(logger, "debug", f"Point data filtered for point {point}. Number of records: {len(point_data)}")
 
                     # build a list of cases to sample
                     fcst_valid = point_data.loc[:, 'fcst_valid'].astype(str)
@@ -193,7 +193,7 @@ def _proceed_with_axis(self, axis="1"):
                 # calculate bootstrap for cases
                 for stat_upper in self.params['list_stat_' + axis]:
                     self.statistic = stat_upper.lower()
-                    safe_log.logger(logger, "debug", f"Calculating bootstrap for statistic: {self.statistic}")
+                    safe_log(logger, "debug", f"Calculating bootstrap for statistic: {self.statistic}")
                     for point in all_points:
                         all_filters = []
                         out_frame_filter = []
@@ -218,7 +218,7 @@ def _proceed_with_axis(self, axis="1"):
                         mask_out_frame = np.array(out_frame_filter).all(axis=0)
                         point_data = filtered_by_indy_data.loc[mask]
                         bootstrap_results = self._get_bootstrapped_stats(point_data, cases)
-                        safe_log.logger(logger, "debug", f"Bootstrap results calculated for point {point}: {bootstrap_results.value}")
+                        safe_log(logger, "debug", f"Bootstrap results calculated for point {point}: {bootstrap_results.value}")
                         # save bootstrap results
                         point_to_distrib[point] = bootstrap_results
                         n_stats = len(point_data)
@@ -235,32 +235,32 @@ def _proceed_with_axis(self, axis="1"):
                         out_frame.loc[index, 'stat_btcl'] = bootstrap_results.lower_bound
                         out_frame.loc[index, 'stat_btcu'] = bootstrap_results.upper_bound
                         out_frame.loc[index, 'nstats'] = n_stats
-                        safe_log.logger(logger, "debug", f"Results saved to output DataFrame at index {index} for point {point}.")
+                        safe_log(logger, "debug", f"Results saved to output DataFrame at index {index} for point {point}.")
         else:
             out_frame = pd.DataFrame()
-            safe_log.logger(logger, "warning", "Input data is empty. Returning an empty DataFrame.")
+            safe_log(logger, "warning", "Input data is empty. Returning an empty DataFrame.")
 
-        safe_log.logger(logger, "info", f"Completed processing for axis: {axis}")
+        safe_log(logger, "info", f"Completed processing for axis: {axis}")
         return out_frame
 
     def _get_bootstrapped_stats(self, series_data, cases):
         logger = self.logger
-        safe_log.logger(logger, "info", "Starting bootstrapping process.")
+        safe_log(logger, "info", "Starting bootstrapping process.")
 
-        safe_log.logger(logger, "debug", "Sorting series data.")
+        safe_log(logger, "debug", "Sorting series data.")
         self.series_data = sort_data(series_data)
-        safe_log.logger(logger, "debug", f"Data sorted. Number of rows: {len(self.series_data)}")
+        safe_log(logger, "debug", f"Data sorted. Number of rows: {len(self.series_data)}")
         if self.params['num_iterations'] == 1:
-            safe_log.logger(logger, "info", "Only one iteration specified. Skipping bootstrapping.")
+            safe_log(logger, "info", "Only one iteration specified. Skipping bootstrapping.")
             stat_val = self._calc_stats(cases)[0]
-            safe_log.logger(logger, "debug", f"Statistic calculated: {stat_val}")
+            safe_log(logger, "debug", f"Statistic calculated: {stat_val}")
             results = BootstrapResults(lower_bound=None,
                                                    value=stat_val,
                                                    upper_bound=None)
-            safe_log.logger(logger, "info", "Statistic calculated without bootstrapping.")
+            safe_log(logger, "info", "Statistic calculated without bootstrapping.")
         else:
             # need bootstrapping and CI calculation in addition to 
-            safe_log.logger(logger, "info", "Performing bootstrapping and confidence interval calculation.")
+            safe_log(logger, "info", "Performing bootstrapping and confidence interval calculation.")
             try:
                 results = bootstrap_and_value_mode(
                     self.series_data,
@@ -271,13 +271,13 @@ def _get_bootstrapped_stats(self, series_data, cases):
                     ci_method=self.params['method'],
                     logger=logger
                     )
-                safe_log.logger(logger, "debug", "Bootstrapping completed successfully.")
+                safe_log(logger, "debug", "Bootstrapping completed successfully.")
             except KeyError as err:
-                safe_log.logger(logger, "error", f"Error during bootstrapping: {err}")
+                safe_log(logger, "error", f"Error during bootstrapping: {err}")
                 results = BootstrapResults(None, None, None)
-                safe_log.logger(logger, "info", "Returning empty BootstrapResults due to error.")
+                safe_log(logger, "info", "Returning empty BootstrapResults due to error.")
                 print(err)
-        safe_log.logger(logger, "info", "Bootstrapping process completed.")
+        safe_log(logger, "info", "Bootstrapping process completed.")
         return results
 
     def _calc_stats(self, cases):
@@ -294,23 +294,23 @@ def _calc_stats(self, cases):
         """
         logger = self.logger
         func_name = f'calculate_{self.statistic}'
-        safe_log.logger(logger, "info", f"Starting statistic calculation using function: {func_name}")
+        safe_log(logger, "info", f"Starting statistic calculation using function: {func_name}")
         if cases is not None and cases.ndim == 2:
             # The single value case
-            safe_log.logger(logger, "debug", "Processing single-value case.")
+            safe_log(logger, "debug", "Processing single-value case.")
 
             # build a data frame with the sampled data
             data_cases = np.asarray(self.series_data['case'])
             flat_cases = cases.flatten()
             values = self.series_data[np.in1d(data_cases, flat_cases)].to_numpy()
-            safe_log.logger(logger, "debug", f"Number of values selected for single case: {len(values)}")
+            safe_log(logger, "debug", f"Number of values selected for single case: {len(values)}")
             # Calculate the statistic for each bootstrap iteration
             try:
                 stat_value = globals()[func_name](values, self.column_names, logger=logger)
                 stat_values.append([stat_value])
-                safe_log.logger(logger, "info", f"Statistic calculated for bootstrap iteration: {stat_value}")
+                safe_log(logger, "info", f"Statistic calculated for bootstrap iteration: {stat_value}")
             except Exception as e:
-                safe_log.logger(logger, "error", f"Error calculating statistic for bootstrap iteration: {e}")
+                safe_log(logger, "error", f"Error calculating statistic for bootstrap iteration: {e}")
                 raise
 
         elif cases is not None and cases.ndim == 3:
@@ -319,17 +319,17 @@ def _calc_stats(self, cases):
             for row in cases:
                 values_ind = self.series_data['case'].isin(row.flatten())
                 values = self.series_data[values_ind]
-                safe_log.logger(logger, "debug", f"Number of values selected for bootstrap iteration: {len(values)}")
+                safe_log(logger, "debug", f"Number of values selected for bootstrap iteration: {len(values)}")
                 # Calculate the statistic for each bootstrap iteration
                 try:
                     stat_value = globals()[func_name](values, self.column_names, logger=logger)
                     stat_values.append([stat_value])
-                    safe_log.logger(logger, "info", f"Statistic calculated for bootstrap iteration: {stat_value}")
+                    safe_log(logger, "info", f"Statistic calculated for bootstrap iteration: {stat_value}")
                 except Exception as e:
-                    safe_log.logger(logger, "error", f"Error calculating statistic for bootstrap iteration: {e}")
+                    safe_log(logger, "error", f"Error calculating statistic for bootstrap iteration: {e}")
                     raise
         else:
-            safe_log.logger(logger, "error", "Invalid input for cases. Cannot calculate statistic.")
+            safe_log(logger, "error", "Invalid input for cases. Cannot calculate statistic.")
             raise KeyError("can't calculate statistic")
         return stat_values
 
@@ -338,46 +338,46 @@ def calculate_values(self):
             Writes output data to the file
         """
         logger = self.logger
-        safe_log.logger(logger, "info", "Starting calculation of values.")
+        safe_log(logger, "info", "Starting calculation of values.")
         if not self.input_data.empty:
-            safe_log.logger(logger, "debug", "Input data is not empty. Proceeding with calculations.")
+            safe_log(logger, "debug", "Input data is not empty. Proceeding with calculations.")
             if self.params['random_seed'] is not None and self.params['random_seed'] != 'None':
-                safe_log.logger(logger, "debug", f"Random seed set to: {self.params['random_seed']}")
+                safe_log(logger, "debug", f"Random seed set to: {self.params['random_seed']}")
                 np.random.seed(self.params['random_seed'])
 
             # perform EE if needed
             is_event_equal = parse_bool(self.params['event_equal'])
             if is_event_equal:
-                safe_log.logger(logger, "info", "Event equalization required. Performing event equalization.")
+                safe_log(logger, "info", "Event equalization required. Performing event equalization.")
                 self._perform_event_equalization()
-                safe_log.logger(logger, "debug", "Event equalization completed.")
+                safe_log(logger, "debug", "Event equalization completed.")
 
             # build the case information for each record
-            safe_log.logger(logger, "debug", "Building case information for each record.")
+            safe_log(logger, "debug", "Building case information for each record.")
             fcst_valid = self.input_data.loc[:, 'fcst_valid'].astype(str)
             indy_var = self.input_data.loc[:, self.params['indy_var']].astype(str)
             self.input_data['case'] = fcst_valid + '#' + indy_var
-            safe_log.logger(logger, "debug", "Case information added to the input data.")
+            safe_log(logger, "debug", "Case information added to the input data.")
 
             # get results for axis1
-            safe_log.logger(logger, "info", "Calculating results for axis 1.")
+            safe_log(logger, "info", "Calculating results for axis 1.")
             out_frame = self._proceed_with_axis("1")
             if self.params['series_val_2']:
-                safe_log.logger(logger, "info", "Series values for axis 2 detected. Calculating results for axis 2.")
+                safe_log(logger, "info", "Series values for axis 2 detected. Calculating results for axis 2.")
                 out_frame = pd.concat([out_frame, self._proceed_with_axis("2")])
-                safe_log.logger(logger, "debug", "Results for axis 2 calculated and combined with axis 1.")
+                safe_log(logger, "debug", "Results for axis 2 calculated and combined with axis 1.")
 
         else:
-            safe_log.logger(logger, "warning", "Input data is empty. Returning an empty DataFrame.")
+            safe_log(logger, "warning", "Input data is empty. Returning an empty DataFrame.")
             out_frame = pd.DataFrame()
 
         header = True
         mode = 'w'
-        safe_log.logger(logger, "info", f"Exporting results to {self.params['agg_stat_output']}")
+        safe_log(logger, "info", f"Exporting results to {self.params['agg_stat_output']}")
         export_csv = out_frame.to_csv(self.params['agg_stat_output'],
                                       index=None, header=header, mode=mode,
                                       sep="\t", na_rep="NA")
-        safe_log.logger(logger, "info", "Results successfully exported to CSV.")
+        safe_log(logger, "info", "Results successfully exported to CSV.")
 
 
     def _perform_event_equalization(self):