From ae20ef5ffadac1948275b31ba26509c0dc5b4781 Mon Sep 17 00:00:00 2001 From: Neuberger Date: Mon, 30 Oct 2023 11:21:01 +0100 Subject: [PATCH 001/191] initial commit --- src/pygama/cli.py | 4 ++-- src/pygama/evt/build_tcm.py | 2 +- src/pygama/flow/data_loader.py | 8 ++++---- src/pygama/flow/file_db.py | 26 +++++++++++------------- src/pygama/hit/build_hit.py | 2 +- src/pygama/pargen/AoE_cal.py | 2 +- src/pygama/pargen/cuts.py | 2 +- src/pygama/pargen/ecal_th.py | 2 +- src/pygama/pargen/energy_optimisation.py | 2 +- src/pygama/pargen/extract_tau.py | 2 +- tests/hit/test_build_hit.py | 2 +- 11 files changed, 26 insertions(+), 28 deletions(-) diff --git a/src/pygama/cli.py b/src/pygama/cli.py index a6b59abaf..fb05ef658 100644 --- a/src/pygama/cli.py +++ b/src/pygama/cli.py @@ -80,7 +80,7 @@ def pygama_cli(): def add_lh5ls_parser(subparsers): - """Configure :func:`.lgdo.lh5_store.show` command line interface.""" + """Configure :func:`.lgdo.lh5.show` command line interface.""" parser_lh5ls = subparsers.add_parser( "lh5ls", description="""Inspect LEGEND HDF5 (LH5) file contents""" @@ -99,7 +99,7 @@ def add_lh5ls_parser(subparsers): def lh5_show_cli(args): - """Passes command line arguments to :func:`.lgdo.lh5_store.show`.""" + """Passes command line arguments to :func:`.lgdo.lh5.show`.""" show(args.lh5_file, args.lh5_group, attrs=args.attributes) diff --git a/src/pygama/evt/build_tcm.py b/src/pygama/evt/build_tcm.py index 7bb0bbef3..be0f44ba5 100644 --- a/src/pygama/evt/build_tcm.py +++ b/src/pygama/evt/build_tcm.py @@ -49,7 +49,7 @@ def build_tcm( out_name name for the TCM table in the output file. wo_mode - mode to send to :meth:`~.lgdo.lh5_store.LH5Store.write_object`. + mode to send to :meth:`~.lgdo.lh5.LH5Store.write_object`. See Also -------- diff --git a/src/pygama/flow/data_loader.py b/src/pygama/flow/data_loader.py index 5acfa9d88..f0a0f6a2b 100644 --- a/src/pygama/flow/data_loader.py +++ b/src/pygama/flow/data_loader.py @@ -14,7 +14,9 @@ import numpy as np import pandas as pd from dspeed.vis import WaveformBrowser -from lgdo import Array, LH5Iterator, LH5Store, Struct, Table, lgdo_utils +from lgdo.lh5 import LH5Iterator, LH5Store +from lgdo.lh5.utils import expand_vars +from lgdo.types import Array, Struct, Table from lgdo.types.vectorofvectors import build_cl, explode_arrays, explode_cl from tqdm.auto import tqdm @@ -193,9 +195,7 @@ def set_config(self, config: dict | str) -> None: # look for info in configuration if FileDB is not set if self.filedb is None: # expand $_ variables - value = lgdo_utils.expand_vars( - config["filedb"], substitute={"_": config_dir} - ) + value = expand_vars(config["filedb"], substitute={"_": config_dir}) self.filedb = FileDB(value) if not os.path.isdir(self.filedb.data_dir): diff --git a/src/pygama/flow/file_db.py b/src/pygama/flow/file_db.py index c64e6b786..bf05a8f4f 100644 --- a/src/pygama/flow/file_db.py +++ b/src/pygama/flow/file_db.py @@ -9,11 +9,11 @@ import warnings import h5py -import lgdo import numpy as np import pandas as pd -from lgdo import Array, Scalar, VectorOfVectors -from lgdo import lh5_store as lh5 +from lgdo.lh5.store import LH5Store, ls +from lgdo.lh5.utils import expand_path, expand_vars +from lgdo.types import Array, Scalar, VectorOfVectors from parse import parse from . import utils @@ -185,14 +185,12 @@ def set_config(self, config: dict, config_path: str = None) -> None: if config_path is not None: subst_vars["_"] = os.path.dirname(str(config_path)) - data_dir = lgdo.lgdo_utils.expand_path( - self.config["data_dir"], substitute=subst_vars - ) + data_dir = expand_path(self.config["data_dir"], substitute=subst_vars) self.data_dir = data_dir tier_dirs = self.config["tier_dirs"] for k, val in tier_dirs.items(): - tier_dirs[k] = lgdo.lgdo_utils.expand_vars(val, substitute=subst_vars) + tier_dirs[k] = expand_vars(val, substitute=subst_vars) self.tier_dirs = tier_dirs def scan_files(self, dirs: list[str] = None) -> None: @@ -407,7 +405,7 @@ def update_tables_cols(row, tier: str, utc_cache: dict = None) -> pd.Series: ) # TODO this call here is really expensive! - groups = lh5.ls(f, wildcard) + groups = ls(f, wildcard) if len(groups) > 0 and parse(template, groups[0]) is None: log.warning(f"groups in {fpath} don't match template") else: @@ -431,7 +429,7 @@ def update_tables_cols(row, tier: str, utc_cache: dict = None) -> pd.Series: table_name = template try: - col = lh5.ls(f[table_name]) + col = ls(f[table_name]) except KeyError: log.warning(f"cannot find '{table_name}' in {fpath}") continue @@ -477,7 +475,7 @@ def update_tables_cols(row, tier: str, utc_cache: dict = None) -> pd.Series: columns_vov = VectorOfVectors( flattened_data=flattened, cumulative_length=length ) - sto = lh5.LH5Store() + sto = LH5Store() sto.write_object(columns_vov, "unique_columns", to_file) return self.columns @@ -501,12 +499,12 @@ def from_disk(self, path: str | list[str]) -> None: # expand wildcards paths = [] for p in path: - paths += lgdo.lgdo_utils.expand_path(p, list=True) + paths += expand_path(p, list=True) if not paths: raise FileNotFoundError(path) - sto = lh5.LH5Store() + sto = LH5Store() # objects/accumulators that will be used to configure the FileDB at the end _cfg = None _df = None @@ -599,11 +597,11 @@ def to_disk(self, filename: str, wo_mode="write_safe") -> None: filename output LH5 file name. wo_mode - passed to :meth:`~.lgdo.lh5_store.write_object`. + passed to :meth:`~.lgdo.lh5.write_object`. """ log.debug(f"writing database to {filename}") - sto = lh5.LH5Store() + sto = LH5Store() sto.write_object( Scalar(json.dumps(self.config)), "config", filename, wo_mode=wo_mode ) diff --git a/src/pygama/hit/build_hit.py b/src/pygama/hit/build_hit.py index e531fa872..8c7771b64 100644 --- a/src/pygama/hit/build_hit.py +++ b/src/pygama/hit/build_hit.py @@ -69,7 +69,7 @@ def build_hit( n_max maximum number of rows to process wo_mode - forwarded to :meth:`~.lgdo.lh5_store.write_object`. + forwarded to :meth:`~.lgdo.lh5.write_object`. """ store = LH5Store() diff --git a/src/pygama/pargen/AoE_cal.py b/src/pygama/pargen/AoE_cal.py index 114428d19..cc370f750 100644 --- a/src/pygama/pargen/AoE_cal.py +++ b/src/pygama/pargen/AoE_cal.py @@ -15,7 +15,7 @@ import matplotlib as mpl mpl.use("agg") -import lgdo.lh5_store as lh5 +import lgdo.lh5 as lh5 import matplotlib.cm as cmx import matplotlib.colors as mcolors import matplotlib.dates as mdates diff --git a/src/pygama/pargen/cuts.py b/src/pygama/pargen/cuts.py index e6c6b571e..c4ab158d7 100644 --- a/src/pygama/pargen/cuts.py +++ b/src/pygama/pargen/cuts.py @@ -9,7 +9,7 @@ import logging import os -import lgdo.lh5_store as lh5 +import lgdo.lh5 as lh5 import numpy as np import pandas as pd from scipy import stats diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py index eadc7b0ef..ec386ffb7 100644 --- a/src/pygama/pargen/ecal_th.py +++ b/src/pygama/pargen/ecal_th.py @@ -15,7 +15,7 @@ from scipy.stats import binned_statistic mpl.use("agg") -import lgdo.lh5_store as lh5 +import lgdo.lh5 as lh5 import matplotlib.pyplot as plt import numpy as np import pandas as pd diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index 1354f74cf..b8ce58580 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -13,7 +13,7 @@ import sys from collections import namedtuple -import lgdo.lh5_store as lh5 +import lgdo.lh5 as lh5 import matplotlib as mpl import matplotlib.pyplot as plt import numpy as np diff --git a/src/pygama/pargen/extract_tau.py b/src/pygama/pargen/extract_tau.py index 2c096161e..50d81839d 100644 --- a/src/pygama/pargen/extract_tau.py +++ b/src/pygama/pargen/extract_tau.py @@ -15,7 +15,7 @@ mpl.use("agg") import lgdo -import lgdo.lh5_store as lh5 +import lgdo.lh5 as lh5 import matplotlib.pyplot as plt import numpy as np diff --git a/tests/hit/test_build_hit.py b/tests/hit/test_build_hit.py index e497a1742..b2ebb3afd 100644 --- a/tests/hit/test_build_hit.py +++ b/tests/hit/test_build_hit.py @@ -1,7 +1,7 @@ import os from pathlib import Path -import lgdo.lh5_store as store +import lgdo.lh5 as store import numpy as np import pytest from lgdo import LH5Store, ls From 5328db6721179b4b05f6ec555e846d1a607b516b Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 29 Sep 2023 17:41:27 +0200 Subject: [PATCH 002/191] added more possible length combinations for getting parameters --- src/pygama/math/peak_fitting.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/pygama/math/peak_fitting.py b/src/pygama/math/peak_fitting.py index 9c60e2c8c..67e07409d 100644 --- a/src/pygama/math/peak_fitting.py +++ b/src/pygama/math/peak_fitting.py @@ -918,6 +918,8 @@ def get_mu_func(func, pars, cov = None, errors=None): n_sig, mu, sigma, n_bkg, hstep = pars elif len(pars) ==7: n_sig, mu, sigma, n_bkg, hstep, low_range, high_range = pars + elif len(pars) ==8: + n_sig, mu, sigma, n_bkg, hstep, low_range, high_range, components = pars if errors is not None: return mu, errors[1] elif cov is not None: @@ -930,6 +932,8 @@ def get_mu_func(func, pars, cov = None, errors=None): n_sig, mu, sigma, htail, tau, n_bkg, hstep = pars elif len(pars) ==9: n_sig, mu, sigma, htail, tau, n_bkg, hstep, low_range, high_range = pars + elif len(pars) ==10: + n_sig, mu, sigma, htail, tau, n_bkg, hstep, low_range, high_range, components = pars if errors is not None: return mu, errors[1] elif cov is not None: @@ -948,6 +952,8 @@ def get_fwhm_func(func, pars, cov = None): n_sig, mu, sigma, n_bkg, hstep = pars elif len(pars) ==7: n_sig, mu, sigma, n_bkg, hstep, low_range, high_range = pars + elif len(pars) ==8: + n_sig, mu, sigma, n_bkg, hstep, low_range, high_range, components = pars if cov is None: return sigma*2*np.sqrt(2*np.log(2)) else: @@ -958,6 +964,12 @@ def get_fwhm_func(func, pars, cov = None): n_sig, mu, sigma, htail, tau, n_bkg, hstep = pars elif len(pars) ==9: n_sig, mu, sigma, htail, tau, n_bkg, hstep, low_range, high_range = pars + if cov is not None: + cov = cov[:7,:][:,:7] + elif len(pars) ==10: + n_sig, mu, sigma, htail, tau, n_bkg, hstep, low_range, high_range, components = pars + if cov is not None: + cov = cov[:7,:][:,:7] return radford_fwhm(sigma, htail, tau, cov) else: @@ -971,6 +983,8 @@ def get_total_events_func(func, pars, cov = None, errors=None): n_sig, mu, sigma, n_bkg, hstep = pars elif len(pars) ==7: n_sig, mu, sigma, n_bkg, hstep, low_range, high_range = pars + elif len(pars) ==8: + n_sig, mu, sigma, n_bkg, hstep, low_range, high_range, components = pars if errors is not None: return n_sig+n_bkg, np.sqrt(errors[0]**2 + errors[3]**2) elif cov is not None: @@ -983,6 +997,8 @@ def get_total_events_func(func, pars, cov = None, errors=None): n_sig, mu, sigma, htail, tau, n_bkg, hstep = pars elif len(pars) ==9: n_sig, mu, sigma, htail, tau, n_bkg, hstep, low_range, high_range = pars + elif len(pars) ==10: + n_sig, mu, sigma, htail, tau, n_bkg, hstep, low_range, high_range, components = pars if errors is not None: return n_sig+n_bkg, np.sqrt(errors[0]**2 + errors[5]**2) elif cov is not None: From fc8b1299bd365b05f9f6a92a6130007bf7938e73 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 29 Sep 2023 17:42:50 +0200 Subject: [PATCH 003/191] updated for changes to calibrations --- src/pygama/pargen/energy_optimisation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index 1354f74cf..893a0a93f 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -1038,11 +1038,12 @@ def event_selection( e_upper_lim = peak_loc + (1.5 * kev_width[1]) / rough_adc_to_kev e_ranges = (int(peak_loc - e_lower_lim), int(e_upper_lim - peak_loc)) - params, errors, covs, bins, ranges, p_val, valid_pks = pgc.hpge_fit_E_peaks( + params, errors, covs, bins, ranges, p_val, valid_pks, pk_funcs = pgc.hpge_fit_E_peaks( energy, [peak_loc], [e_ranges], n_bins=(np.nanmax(energy) - np.nanmin(energy)) // 1, + uncal_is_int=True ) if params[0] is None: log.debug("Fit failed, using max guess") From 56136ff2f0beee0447ca5a6571ebf794d3c11b5f Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 29 Sep 2023 17:44:28 +0200 Subject: [PATCH 004/191] rewrote fitting to fit in stages dropping tail if unnecessary with prior on tail to remove degeneracy, added check on guess so no guess params are none, bounded mu to be in fit range, improved validity checks --- src/pygama/pargen/energy_cal.py | 294 +++++++++++++++++++++++--------- 1 file changed, 215 insertions(+), 79 deletions(-) diff --git a/src/pygama/pargen/energy_cal.py b/src/pygama/pargen/energy_cal.py index 6f520505c..8a54f3bfd 100644 --- a/src/pygama/pargen/energy_cal.py +++ b/src/pygama/pargen/energy_cal.py @@ -18,6 +18,7 @@ import pygama.math.histogram as pgh import pygama.math.peak_fitting as pgf import pygama.math.utils as pgu +from pygama.pargen.utils import * log = logging.getLogger(__name__) @@ -246,7 +247,7 @@ def hpge_fit_E_peak_tops( return np.array(pars_list, dtype=object), np.array(cov_list, dtype=object) -def get_hpge_E_peak_par_guess(hist, bins, var, func): +def get_hpge_E_peak_par_guess(hist, bins, var, func, mode_guess): """Get parameter guesses for func fit to peak in hist Parameters @@ -263,7 +264,7 @@ def get_hpge_E_peak_par_guess(hist, bins, var, func): or func == pgf.extended_gauss_step_pdf ): # get mu and height from a gauss fit, also sigma as fallback - pars, cov = pgf.gauss_mode_width_max(hist, bins, var) + pars, cov = pgf.gauss_mode_width_max(hist, bins, var, mode_guess=mode_guess, n_bins=10) bin_centres = pgh.get_bin_centers(bins) if pars is None: log.info("get_hpge_E_peak_par_guess: gauss_mode_width_max failed") @@ -316,7 +317,13 @@ def get_hpge_E_peak_par_guess(hist, bins, var, func): n_bkg = np.sum(hist) - n_sig hstep = step / (bg + np.mean(hist[:10])) - return [n_sig, mu, sigma / 2, n_bkg, hstep, bins[0], bins[-1], 0] + + parguess = [n_sig, mu, sigma / 2, n_bkg, hstep, bins[0], bins[-1], 0] + for i, guess in enumerate(parguess): + if np.isnan(guess): + parguess[i]=0 + + return parguess if ( func == pgf.radford_cdf @@ -324,7 +331,7 @@ def get_hpge_E_peak_par_guess(hist, bins, var, func): or func == pgf.extended_radford_pdf ): # guess mu, height - pars, cov = pgf.gauss_mode_width_max(hist, bins, var) + pars, cov = pgf.gauss_mode_width_max(hist, bins, var, mode_guess=mode_guess, n_bins=10) bin_centres = pgh.get_bin_centers(bins) if pars is None: log.info("get_hpge_E_peak_par_guess: gauss_mode_width_max failed") @@ -386,6 +393,10 @@ def get_hpge_E_peak_par_guess(hist, bins, var, func): parguess = [n_sig, mu, sigma, htail, tau, n_bkg, hstep, bins[0], bins[-1], 0] + for i, guess in enumerate(parguess): + if np.isnan(guess): + parguess[i]=0 + return parguess else: @@ -422,7 +433,7 @@ def get_hpge_E_fixed(func): return None -def get_hpge_E_bounds(func): +def get_hpge_E_bounds(func, parguess): if ( func == pgf.radford_cdf or func == pgf.radford_pdf @@ -430,7 +441,7 @@ def get_hpge_E_bounds(func): ): return [ (0, None), - (None, None), + (parguess[-3], parguess[-2]), (0, None), (0, 1), (None, None), @@ -448,7 +459,7 @@ def get_hpge_E_bounds(func): ): return [ (0, None), - (None, None), + (parguess[-3], parguess[-2]), (0, None), (0, None), (-1, 1), @@ -461,6 +472,98 @@ def get_hpge_E_bounds(func): log.error(f"get_hpge_E_bounds not implemented for {func.__name__}") return [] +class tail_prior: + """ + Generic least-squares cost function with error. + """ + verbose=0 + errordef = Minuit.LIKELIHOOD # for Minuit to compute errors correctly + + def __init__(self, data, model): + self.model = model # model predicts y for given x + self.data=data + #self.x = np.asarray(x) + + def _call(self, *pars): + return self.__call__( *pars[0]) + + def __call__(self, n_sig, mu, sigma, htail, + tau, n_bkg, hstep, + lower_range ,upper_range, components): + return 100 * np.log(htail+0.1) #len(self.data)/ + +def staged_fit(energies, hist, bins, var, func_i, gof_func_i, simplex, mode_guess): + par_guesses = get_hpge_E_peak_par_guess(hist, bins, var, func_i, mode_guess) + bounds = get_hpge_E_bounds(func_i, par_guesses) + fixed, mask = get_hpge_E_fixed(func_i) + + if func_i == pgf.extended_radford_pdf or func_i == pgf.radford_pdf: + cost_func = cost.ExtendedUnbinnedNLL(energies, func_i) +tail_prior(energies, func_i) + m = Minuit(cost_func, *par_guesses) + m.limits = bounds + for fix in fixed: + m.fixed[fix] = True + + m.values["htail"] = 0 + m.values["tau"] = 0 + m.fixed["htail"] = True + m.fixed["tau"] = True + if simplex == True: + m.simplex().migrad() + else: + m.migrad() + try: + #set htail to guess + m.values["htail"] = par_guesses[3] + m.values["tau"] = par_guesses[4] + m.fixed = False + for fix in fixed: + m.fixed[fix] = True + + if simplex == True: + m.simplex().migrad() + else: + m.migrad() + m.hesse() + pars_i = m.values + errs_i = m.errors + cov_i = m.covariance + valid_fit = m.valid + if valid_fit == False: + raise RuntimeError + except: + func_i = pgf.extended_gauss_step_pdf + gof_func_i = pgf.gauss_step_pdf + pars_i, errs_i, cov_i, func_i, gof_func_i, mask, valid_fit = staged_fit(energies, hist, bins, var, + func_i, gof_func_i, simplex, mode_guess) + + #check htail + if m.values["htail"]<0.01 or m.values["htail"]<2*m.errors["htail"] or np.isnan(m.values).any():# or + func_i = pgf.extended_gauss_step_pdf + gof_func_i = pgf.gauss_step_pdf + pars_i, errs_i, cov_i, func_i, gof_func_i, mask, valid_fit = staged_fit(energies, hist, bins, var, + func_i, gof_func_i, simplex, mode_guess) + + else: + cost_func = cost.ExtendedUnbinnedNLL(energies, func_i) + m = Minuit(cost_func, *par_guesses) + m.limits = bounds + for fix in fixed: + m.fixed[fix] = True + if simplex == True: + m.simplex().migrad() + else: + m.migrad() + + m.hesse() + + pars_i = m.values + errs_i = m.errors + cov_i = m.covariance + + valid_fit = m.valid + + return pars_i, errs_i, cov_i, func_i, gof_func_i, mask, valid_fit def hpge_fit_E_peaks( E_uncal, @@ -512,15 +615,16 @@ def hpge_fit_E_peaks( ranges: list of array a list of [Euc_min, Euc_max] used for each peak fit """ - pars = [] - covs = [] - binws = [] - ranges = [] - errors = [] - p_vals = [] - valid_pks = [] - - for i_peak in range(len(mode_guesses)): + pars = np.zeros(len(mode_guesses), dtype='object') + errors = np.zeros(len(mode_guesses), dtype='object') + covs = np.zeros(len(mode_guesses), dtype='object') + binws = np.zeros(len(mode_guesses)) + ranges = np.zeros(len(mode_guesses), dtype='object') + p_vals = np.zeros(len(mode_guesses)) + valid_pks = np.zeros(len(mode_guesses),dtype=bool) + out_funcs= np.zeros(len(mode_guesses), dtype='object') + + for i_peak, mode_guess in enumerate(mode_guesses): # get args for this peak wwidth_i = wwidths if not isinstance(wwidths, list) else wwidths[i_peak] n_bins_i = n_bins if np.isscalar(n_bins) else n_bins[i_peak] @@ -538,40 +642,51 @@ def hpge_fit_E_peaks( # bin a histogram Euc_min = mode_guesses[i_peak] - wleft_i Euc_max = mode_guesses[i_peak] + wright_i - Euc_min, Euc_max, n_bins_i = pgh.better_int_binning( - x_lo=Euc_min, x_hi=Euc_max, n_bins=n_bins_i - ) + if uncal_is_int ==True: + Euc_min, Euc_max, n_bins_i = pgh.better_int_binning( + x_lo=Euc_min, x_hi=Euc_max, n_bins=n_bins_i + ) if method == "unbinned": energies = E_uncal[(E_uncal > Euc_min) & (E_uncal < Euc_max)][:n_events] hist, bins, var = pgh.get_hist( energies, bins=n_bins_i, range=(Euc_min, Euc_max) ) - par_guesses = get_hpge_E_peak_par_guess(hist, bins, var, func_i) - bounds = get_hpge_E_bounds(func_i) - fixed, mask = get_hpge_E_fixed(func_i) - - cost_func = cost.ExtendedUnbinnedNLL(energies, func_i) - m = Minuit(cost_func, *par_guesses) - m.limits = bounds - for fix in fixed: - m.fixed[fix] = True - if simplex == True: - m.simplex().migrad() + if func_i == pgf.extended_radford_pdf or pgf.extended_gauss_step_pdf: + pars_i, errs_i, cov_i, func_i, gof_func_i, mask, valid_fit = staged_fit(energies, hist, bins, var, + func_i, gof_func_i, simplex, mode_guess) else: - m.migrad() - m.minos() - - pars_i = m.values - errs_i = m.errors - cov_i = m.covariance + + par_guesses = get_hpge_E_peak_par_guess(hist, bins, var, func_i) + bounds = get_hpge_E_bounds(func_i, par_guesses) + fixed, mask = get_hpge_E_fixed(func_i) + + cost_func = cost.ExtendedUnbinnedNLL(energies, func_i) + m = Minuit(cost_func, *par_guesses) + m.limits = bounds + for fix in fixed: + m.fixed[fix] = True + if simplex == True: + m.simplex().migrad() + else: + m.migrad() + m.hesse() + + pars_i = m.values + errs_i = m.errors + cov_i = m.covariance + valid_fit = m.valid + + csqr = pgf.goodness_of_fit( + hist, bins, None, gof_func_i, pars_i, method="Pearson", scale_bins=True + ) else: hist, bins, var = pgh.get_hist( E_uncal, bins=n_bins_i, range=(Euc_min, Euc_max) ) par_guesses = get_hpge_E_peak_par_guess(hist, bins, var, func_i) - bounds = get_hpge_E_bounds(func_i) + bounds = get_hpge_E_bounds(func_i, par_guesses) fixed, mask = get_hpge_E_fixed(func_i) pars_i, errs_i, cov_i = pgf.fit_binned( func_i, @@ -585,72 +700,90 @@ def hpge_fit_E_peaks( simplex=simplex, bounds=bounds, ) + valid_fit=True - csqr = pgf.goodness_of_fit( - hist, bins, None, gof_func_i, pars_i, method="Pearson" - ) - p_val = scipy.stats.chi2.sf(csqr[0], csqr[1]) + csqr = pgf.goodness_of_fit( + hist, bins, None, gof_func_i, pars_i, method="Pearson", scale_bins=False + ) + + if np.isnan(pars_i).any(): + log.debug( + f"hpge_fit_E_peaks: fit failed for i_peak={i_peak} at loc {mode_guesses[i_peak]:g}, par is nan : {pars_i}" + ) + raise RuntimeError - pars_i = np.array(pars_i)[mask] - errs_i = np.array(errs_i)[mask] - cov_i = np.array(cov_i)[mask, :][:, mask] + p_val = scipy.stats.chi2.sf(csqr[0], csqr[1]+ len(np.where(mask)[0])) - valid_pks.append(True) total_events = pgf.get_total_events_func(func_i, pars_i, errors=errs_i) if ( - sum(sum(c) if c is not None else 0 for c in cov_i) == np.inf - or sum(sum(c) if c is not None else 0 for c in cov_i) == 0 - or np.isnan(sum(sum(c) if c is not None else 0 for c in cov_i)) + sum(sum(c) if c is not None else 0 for c in cov_i[mask,:][:,mask]) == np.inf + or sum(sum(c) if c is not None else 0 for c in cov_i[mask,:][:,mask]) == 0 + or np.isnan(sum(sum(c) if c is not None else 0 for c in cov_i[mask,:][:,mask])) ): log.debug( f"hpge_fit_E_peaks: cov estimation failed for i_peak={i_peak} at loc {mode_guesses[i_peak]:g}" ) - valid_pks[-1] = False + valid_pks[i_peak] = False # pars_i, errs_i, cov_i, p_val = None, None, None, None - elif (np.abs(errs_i / pars_i) < 1e-7).any(): + elif valid_fit == False: log.debug( - f"hpge_fit_E_peaks: cov estimation failed for i_peak={i_peak} at loc {mode_guesses[i_peak]:g}, parameter error too low" + f"hpge_fit_E_peaks: peak fitting failed for i_peak={i_peak} at loc {mode_guesses[i_peak]:g}" ) - valid_pks[-1] = False + valid_pks[i_peak] = False + + elif ((np.abs(np.array(errs_i)[mask] / np.array(pars_i)[mask]) < 1e-7).any() + or np.isnan(np.array(errs_i)[mask]).any()): + log.debug( + f"hpge_fit_E_peaks: failed for i_peak={i_peak} at loc {mode_guesses[i_peak]:g}, parameter error too low" + ) + valid_pks[i_peak] = False # pars_i, errs_i, cov_i, p_val = None, None, None, None elif np.abs(total_events[0] - np.sum(hist)) / np.sum(hist) > 0.1: log.debug( f"hpge_fit_E_peaks: fit failed for i_peak={i_peak} at loc {mode_guesses[i_peak]:g}, total_events is outside limit" ) - valid_pks[-1] = False + valid_pks[i_peak] = False # pars_i, errs_i, cov_i, p_val = None, None, None, None - elif p_val < allowed_p_val: + elif p_val < allowed_p_val or np.isnan(p_val): log.debug( f"hpge_fit_E_peaks: fit failed for i_peak={i_peak}, p-value too low: {p_val}" ) - valid_pks[-1] = False + valid_pks[i_peak] = False # pars_i, errs_i, cov_i, p_val = None, None, None, None + else: + valid_pks[i_peak] = True except: - valid_pks.append(False) - pars_i, errs_i, cov_i, p_val = None, None, None, None + log.debug( + f"hpge_fit_E_peaks: fit failed for i_peak={i_peak}, unknown error" + ) + valid_pks[i_peak] = False + pars_i, errs_i, cov_i = return_nans(func_i)#None, None, None, None + p_val = 0 # get binning binw_1 = (bins[-1] - bins[0]) / (len(bins) - 1) - pars.append(pars_i) - errors.append(errs_i) - covs.append(cov_i) - binws.append(binw_1) - ranges.append([Euc_min, Euc_max]) - p_vals.append(p_val) + pars[i_peak] = pars_i + errors[i_peak] = errs_i + covs[i_peak] = cov_i + binws[i_peak] =binw_1 + ranges[i_peak] =[Euc_min, Euc_max] + p_vals[i_peak] =p_val + out_funcs[i_peak] =func_i return ( - np.array(pars, dtype=object), - np.array(errors, dtype=object), - np.array(covs, dtype=object), - np.array(binws), - np.array(ranges), - np.array(p_vals), - np.array(valid_pks, dtype=bool), + pars, + errors, + covs, + binws, + ranges, + p_vals, + valid_pks, + out_funcs ) @@ -877,7 +1010,8 @@ def hpge_E_calibration( idx = [i for i, E in enumerate(peaks_keV) if E in got_peaks_keV] range_keV = [range_keV[i] for i in idx] funcs = [funcs[i] for i in idx] - + gof_funcs = [gof_funcs[i] for i in idx] + # Drop peaks to not be fitted tmp = zip( *[ @@ -910,13 +1044,13 @@ def hpge_E_calibration( derco = np.polyder(np.poly1d(roughpars)).coefficients der = [pgf.poly(Ei, derco) for Ei in got_peaks_keV] range_uncal = [float(range_keV) / d for d in der] - n_bins = [range_keV / 0.5 / d for d in der] + n_bins = [int(range_keV / 0.5 / d) for d in der] elif isinstance(range_keV, tuple): rangeleft_keV, rangeright_keV = range_keV derco = np.polyder(np.poly1d(roughpars)).coefficients der = [pgf.poly(Ei, derco) for Ei in got_peaks_keV] range_uncal = [(rangeleft_keV / d, rangeright_keV / d) for d in der] - n_bins = [sum(range_keV) / 0.5 / d for d in der] + n_bins = [int(sum(range_keV) / 0.5 / d) for d in der] elif isinstance(range_keV, list): derco = np.polyder(np.poly1d(roughpars)).coefficients der = [pgf.poly(Ei, derco) for Ei in got_peaks_keV] @@ -925,7 +1059,7 @@ def hpge_E_calibration( for r, d in zip(range_keV, der) ] n_bins = [ - sum(r) / 0.5 / d if isinstance(r, tuple) else r / 0.2 / d + int(sum(r) / 0.5 / d) if isinstance(r, tuple) else int(r / 0.2 / d) for r, d in zip(range_keV, der) ] @@ -937,6 +1071,7 @@ def hpge_E_calibration( pk_ranges, pk_pvals, valid_pks, + pk_funcs ) = hpge_fit_E_peaks( E_uncal, got_peaks_locs, @@ -957,6 +1092,7 @@ def hpge_E_calibration( results["pk_ranges"] = pk_ranges results["pk_pvals"] = pk_pvals results["pk_validities"] = valid_pks + results["pk_funcs"] = pk_funcs # Drop failed fits fitidx = [i == True for i in valid_pks] fitted_peaks_keV = results["fitted_keV"] = got_peaks_keV[fitidx] @@ -967,11 +1103,11 @@ def hpge_E_calibration( pk_binws = np.asarray(pk_binws)[fitidx] pk_ranges = np.asarray(pk_ranges)[fitidx] pk_pvals = np.asarray(pk_pvals)[fitidx] + pk_funcs = np.asarray(pk_funcs)[fitidx] log.info(f"{sum(fitidx)} peaks fitted:") - for i, (Ei, parsi, errorsi, covsi) in enumerate( - zip(fitted_peaks_keV, pk_pars, pk_errors, pk_covs) + for i, (Ei, parsi, errorsi, covsi, func_i) in enumerate( + zip(fitted_peaks_keV, pk_pars, pk_errors, pk_covs, pk_funcs) ): - func_i = funcs[i] if hasattr(funcs, "__len__") else funcs varnames = func_i.__code__.co_varnames[1 : len(pk_pars[-1]) + 1] parsi = np.asarray(parsi, dtype=float) errorsi = np.asarray(errorsi, dtype=float) @@ -988,7 +1124,7 @@ def hpge_E_calibration( # Do a second calibration to the results of the full peak fits mus = [ pgf.get_mu_func(func_i, pars_i, errors=errors_i) - for func_i, pars_i, errors_i in zip(funcs, pk_pars, pk_errors) + for func_i, pars_i, errors_i in zip(pk_funcs, pk_pars, pk_errors) ] mus, mu_vars = zip(*mus) mus = np.asarray(mus) @@ -1008,7 +1144,7 @@ def hpge_E_calibration( # Finally, calculate fwhms in keV uncal_fwhms = [ pgf.get_fwhm_func(func_i, pars_i, cov=covs_i) - for func_i, pars_i, covs_i in zip(funcs, pk_pars, pk_covs) + for func_i, pars_i, covs_i in zip(pk_funcs, pk_pars, pk_covs) ] uncal_fwhms, uncal_fwhm_errs = zip(*uncal_fwhms) uncal_fwhms = np.asarray(uncal_fwhms) From 25ce6a17006facea4a494c5d0d7d4101dff05805 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 29 Sep 2023 17:46:05 +0200 Subject: [PATCH 005/191] changes for new cal fitting, added high stats fitting for super calibrations, wrote resolution fitting to include both linear and quadratic fits, changed results output for clarity --- src/pygama/pargen/ecal_th.py | 1040 ++++++++++++++++++++++------------ 1 file changed, 675 insertions(+), 365 deletions(-) diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py index eadc7b0ef..578d44867 100644 --- a/src/pygama/pargen/ecal_th.py +++ b/src/pygama/pargen/ecal_th.py @@ -28,6 +28,7 @@ import pygama.math.peak_fitting as pgf import pygama.pargen.cuts as cts import pygama.pargen.energy_cal as cal +from pygama.pargen.utils import * log = logging.getLogger(__name__) @@ -42,63 +43,6 @@ def fwhm_slope(x: np.array, m0: float, m1: float, m2: float = None) -> np.array: return np.sqrt(m0 + m1 * x + m2 * x**2) -def load_data( - files: list[str], - lh5_path: str, - energy_params: list[str], - hit_dict: dict = {}, - cut_parameters: list[str] = ["bl_mean", "bl_std", "pz_std"], -) -> pd.DataFrame: - df = lh5.load_dfs(files, ["timestamp", "trapTmax"], lh5_path) - pulser_props = cts.find_pulser_properties(df, energy="trapTmax") - if len(pulser_props) > 0: - final_mask = None - for entry in pulser_props: - e_cut = (df.trapTmax.values < entry[0] + entry[1]) & ( - df.trapTmax.values > entry[0] - entry[1] - ) - if final_mask is None: - final_mask = e_cut - else: - final_mask = final_mask | e_cut - ids = ~(final_mask) - log.debug(f"pulser found: {pulser_props}") - - else: - ids = np.ones(len(df), dtype=bool) - log.debug(f"no pulser found") - - sto = lh5.LH5Store() - table = sto.read_object(lh5_path, files)[0] - - if len(hit_dict.keys()) == 0: - out_df = df.copy() - for param in energy_params: - try: - out_df[param] = table[param].nda - - except RuntimeError: - param = param.split("_")[0] - out_df[param] = table[param].nda - - else: - out_df = table.eval(hit_dict).get_dataframe() - out_df = pd.concat([df, out_df], axis=1) - out_df["is_not_pulser"] = ids - - cut_parameters = cts.get_keys(table, cut_parameters) - - for param in energy_params: - if param not in out_df: - out_df[param] = table[param].nda - if cut_parameters is not None: - for param in cut_parameters: - if param not in df: - out_df[param] = table[param].nda - log.debug("Data Loaded") - return out_df - - def apply_cuts( data: pd.DataFrame, hit_dict, @@ -112,13 +56,12 @@ def apply_cuts( ) mask = cts.get_cut_indexes(data, cut_dict) - data["is_valid_cal"] = mask + data[final_cut_field] = mask else: - data["is_valid_cal"] = np.ones(len(data), dtype=bool) - data["is_usable"] = data["is_valid_cal"] & data["is_not_pulser"] + data[final_cut_field] = np.ones(len(data), dtype=bool) - events_pqc = len(data.query("is_usable")) + events_pqc = len(data.query(f"{final_cut_field}&is_not_pulser")) log.debug(f"{events_pqc} events valid for calibration") return data, hit_dict @@ -146,6 +89,32 @@ def gen_pars_dict(pars, deg, energy_param): return out_dict +class fwhm_linear: + def func(x,a,b): + return np.sqrt(a + b * x) + + def string_func(input_param): + return f"(a+b*{input_param})**(0.5)" + + def guess(xs, ys, y_errs): + return [np.nanmin(ys), 10**-3] + + def bounds(): + return [(0,None),(0,None)] + +class fwhm_quadratic: + + def func(x, a, b, c): + return np.sqrt(a + b * x + c*x**2) + + def string_func(input_param): + return f"(a+b*{input_param}+c*{input_param}**2)**(0.5)" + + def guess(xs, ys, y_errs): + return [np.nanmin(ys), 10**-3, 10**-5] + + def bounds(): + return [(0,None),(0,None),(0,None)] class calibrate_parameter: glines = [ @@ -191,8 +160,8 @@ class calibrate_parameter: def __init__( self, - data, energy_param, + selection_string = "is_usable", plot_options: dict = None, guess_keV: float | None = None, threshold: int = 0, @@ -200,9 +169,14 @@ def __init__( n_events: int = None, simplex: bool = True, deg: int = 1, + cal_energy_param:str = None ): - self.data = data self.energy_param = energy_param + if cal_energy_param is None: + self.cal_energy_param = f"{self.energy_param}_cal" + else: + self.cal_energy_param = cal_energy_param + self.selection_string = selection_string self.guess_keV = guess_keV self.threshold = threshold self.p_val = p_val @@ -213,7 +187,6 @@ def __init__( self.output_dict = {} self.hit_dict = {} - self.plot_dict = {} def fit_energy_res(self): fitted_peaks = self.results["fitted_keV"] @@ -235,6 +208,10 @@ def fit_energy_res(self): log.info(f"Tl DEP found at index {i}") indexes.append(i) continue + elif peak == 511.0: + log.info(f"e annhilation found at index {i}") + indexes.append(i) + continue elif np.isnan(dfwhms[i]): log.info(f"{peak} failed") indexes.append(i) @@ -244,49 +221,130 @@ def fit_energy_res(self): fit_fwhms = np.delete(fwhms, [indexes]) fit_dfwhms = np.delete(dfwhms, [indexes]) ##### - param_guess = [2, 0.001] - param_bounds = (0, np.inf) for i, peak in enumerate(fwhm_peaks): log.info( f"FWHM of {peak} keV peak is: {fit_fwhms[i]:1.2f} +- {fit_dfwhms[i]:1.2f} keV" ) try: - self.fit_pars, self.fit_covs = curve_fit( - fwhm_slope, - fwhm_peaks, - fit_fwhms, - sigma=fit_dfwhms, - p0=param_guess, - bounds=param_bounds, - absolute_sigma=True, + if 2614.50 not in fwhm_peaks: + raise RuntimeError + + c_lin = cost.LeastSquares( + fwhm_peaks, fit_fwhms, fit_dfwhms, fwhm_linear.func ) - rng = np.random.default_rng(1) - pars_b = rng.multivariate_normal(self.fit_pars, self.fit_covs, size=1000) - fits = np.array([fwhm_slope(fwhm_peaks, *par_b) for par_b in pars_b]) - qbb_vals = np.array([fwhm_slope(2039.0, *par_b) for par_b in pars_b]) - self.qbb_err = np.nanstd(qbb_vals) - predicted_fwhms = fwhm_slope(fwhm_peaks, *self.fit_pars) - self.fit_qbb = fwhm_slope(2039.0, *self.fit_pars) + c_lin.loss = "soft_l1" + m_lin = Minuit(c_lin, *fwhm_linear.guess(fwhm_peaks, fit_fwhms, fit_dfwhms)) + m_lin.limits = fwhm_linear.bounds() + m_lin.simplex() + m_lin.migrad() + m_lin.hesse() - if 2614.50 not in fwhm_peaks: - self.fit_qbb = np.nan - self.qbb_err = np.nan - log.info(f"FWHM curve fit: {self.fit_pars}") + rng = np.random.default_rng(1) + pars_b = rng.multivariate_normal(m_lin.values, + m_lin.covariance, size=1000) + fits = np.array([fwhm_linear.func(fwhm_peaks, *par_b) for par_b in pars_b]) + qbb_vals = np.array([fwhm_linear.func(2039.0, *par_b) for par_b in pars_b]) + qbb_err = np.nanstd(qbb_vals) + predicted_fwhms = fwhm_linear.func(fwhm_peaks, *m_lin.values) + fit_qbb = fwhm_linear.func(2039.0, *m_lin.values) + + p_val = scipy.stats.chi2.sf(m_lin.fval, len(fwhm_peaks)-len(m_lin.values)) + + self.fwhm_fit_linear = {"function":fwhm_linear.__name__, + "module":fwhm_linear.__module__, + "expression":fwhm_linear.string_func("x"), + "Qbb_fwhm(keV)": fit_qbb, + "Qbb_fwhm_err(keV)":qbb_err, + "pars":m_lin.values, + "errors":m_lin.errors, + "cov":m_lin.covariance, + "csqr": (m_lin.fval, len(fwhm_peaks)-len(m_lin.values)), + "p_val":p_val} + + + log.info(f'FWHM linear fit: {self.fwhm_fit_linear["pars"].to_dict()}') log.info(f"FWHM fit values:") - for peak in fwhm_peaks: - log.info( - f"Predicted FWHM of {peak} keV peak is: {fwhm_slope(peak, *self.fit_pars):.2f} keV" - ) + log.info(f"\t Energy | FWHM (keV) | Predicted (keV)") + for i, (peak, fwhm, fwhme) in enumerate( + zip(fwhm_peaks, fit_fwhms, fit_dfwhms) + ): + log.info( + f"\t{i}".ljust(4) + + str(peak).ljust(9) + + f"| {fwhm:.2f}+-{fwhme:.2f} ".ljust(5) + +f"| {fwhm_linear.func(peak, *self.fwhm_fit_linear['pars']):.2f}".ljust(5) + ) + log.info( - f"FWHM energy resolution at Qbb: {self.fit_qbb:1.2f} +- {self.qbb_err:1.2f} keV" + f"FWHM energy resolution at Qbb (linear fit): {fit_qbb:1.2f} +- {qbb_err:1.2f} keV" + ) + except RuntimeError: + log.error(f"FWHM linear fit failed for {self.energy_param}") + pars, errs, cov = return_nans(fwhm_linear.func) + self.fwhm_fit_linear = {"function":fwhm_linear.__name__, + "module":fwhm_linear.__module__, + "expression":fwhm_linear.string_func("x"), + "Qbb_fwhm(keV)": np.nan, + "Qbb_fwhm_err(keV)":np.nan, + "pars":pars, + "errors":errs, + "cov":cov, + "csqr":(np.nan, np.nan), + "p_val":0} + log.error("FWHM linear fit failed to converge") + try: + if 2614.50 not in fwhm_peaks: + raise RuntimeError + c_quad = cost.LeastSquares( + fwhm_peaks, fit_fwhms, fit_dfwhms, fwhm_quadratic.func ) + c_quad.loss = "soft_l1" + m_quad = Minuit(c_quad, *fwhm_quadratic.guess(fwhm_peaks, fit_fwhms, fit_dfwhms)) + m_quad.limits = fwhm_quadratic.bounds() + m_quad.simplex() + m_quad.migrad() + m_quad.hesse() + + rng = np.random.default_rng(1) + pars_b = rng.multivariate_normal(m_quad.values, + m_quad.covariance, size=1000) + fits = np.array([fwhm_quadratic.func(fwhm_peaks, *par_b) for par_b in pars_b]) + qbb_vals = np.array([fwhm_quadratic.func(2039.0, *par_b) for par_b in pars_b]) + qbb_err = np.nanstd(qbb_vals) + predicted_fwhms = fwhm_quadratic.func(fwhm_peaks, *m_quad.values) + fit_qbb = fwhm_quadratic.func(2039.0, *m_quad.values) + + p_val = scipy.stats.chi2.sf(m_quad.fval, len(fwhm_peaks)-len(m_quad.values)) + + self.fwhm_fit_quadratic = {"function":fwhm_quadratic.__name__, + "module":fwhm_quadratic.__module__, + "expression":fwhm_quadratic.string_func("x"), + "Qbb_fwhm(keV)": fit_qbb, + "Qbb_fwhm_err(keV)":qbb_err, + "pars":m_quad.values, + "errors":m_quad.errors, + "cov":m_quad.covariance, + "csqr": (m_quad.fval, len(fwhm_peaks)-len(m_quad.values)), + "p_val":p_val + } + log.info(f'FWHM quadratic fit: {self.fwhm_fit_quadratic["pars"].to_dict()}') + log.info( + f"FWHM energy resolution at Qbb (quadratic fit): {fit_qbb:1.2f} +- {qbb_err:1.2f} keV" + ) except RuntimeError: - log.error(f"FWHM fit failed for {energy_param}") - self.fit_pars = np.array([np.nan, np.nan]) - self.fit_covs = np.array([[np.nan, np.nan], [np.nan, np.nan]]) - self.fit_qbb = np.nan - self.qbb_err = np.nan - log.error("FWHM fit failed to converge") + log.error(f"FWHM quadratic fit failed for {self.energy_param}") + pars, errs, cov = return_nans(fwhm_quadratic.func) + self.fwhm_fit_quadratic = {"function":fwhm_quadratic.__name__, + "module":fwhm_quadratic.__module__, + "expression":fwhm_quadratic.string_func("x"), + "Qbb_fwhm(keV)": np.nan, + "Qbb_fwhm_err(keV)":np.nan, + "pars":pars, + "errors":errs, + "cov":cov, + "csqr":(np.nan, np.nan), + "p_val":0} + log.error("FWHM quadratic fit failed to converge") def gen_pars_dict(self): if self.deg == 1: @@ -310,11 +368,66 @@ def gen_pars_dict(self): return out_dict - def calibrate_parameter(self): + def get_results_dict(self, data): + if np.isnan(self.pars).all(): + return {} + else: + fwhm_linear = self.fwhm_fit_linear.copy() + fwhm_linear["pars"] = fwhm_linear['pars'].to_dict() + fwhm_linear["errors"] = fwhm_linear['errors'].to_dict() + fwhm_linear["cov"] = fwhm_linear["cov"].tolist() + fwhm_quad = self.fwhm_fit_quadratic.copy() + fwhm_quad["pars"] = fwhm_quad['pars'].to_dict() + fwhm_quad["errors"] = fwhm_quad['errors'].to_dict() + fwhm_quad["cov"] = fwhm_quad["cov"].tolist() + + pk_dict = {Ei:{"function":func_i.__name__, + "module":func_i.__module__, + "pars(uncal)":parsi.to_dict(), + "errs(uncal)":errorsi.to_dict(), + "p_val": pvali, + "fwhm (keV)": list(fwhmi)} + for i, (Ei, parsi, errorsi, pvali, fwhmi, func_i) in enumerate( + zip(self.results["fitted_keV"], + self.results["pk_pars"][self.results["pk_validities"]], + self.results["pk_errors"][self.results["pk_validities"]], + self.results["pk_pvals"][self.results["pk_validities"]], + self.results["pk_fwhms"], + self.funcs) + )} + + return { + "total_fep": len( + data.query( + f"{self.cal_energy_param}>2604&{self.cal_energy_param}<2624" + ) + ), + "total_dep": len( + data.query( + f"{self.cal_energy_param}>1587&{self.cal_energy_param}<1597" + ) + ), + "pass_fep": len( + data.query( + f"{self.cal_energy_param}>2604&{self.cal_energy_param}<2624&{self.selection_string}" + ) + ), + "pass_dep": len( + data.query( + f"{self.cal_energy_param}>1587&{self.cal_energy_param}<1597&{self.selection_string}" + ) + ), + "eres_linear": fwhm_linear, + "eres_quadratic":fwhm_quad, + "fitted_peaks": self.results["fitted_keV"].tolist(), + "pk_fits":pk_dict + } + + def calibrate_parameter(self, data): kev_ranges = self.range_keV.copy() if self.guess_keV is None: self.guess_keV = 2620 / np.nanpercentile( - self.data.query(f"is_usable & {self.energy_param}>{self.threshold}")[ + data.query(f"{self.selection_string} & {self.energy_param}>{self.threshold}")[ self.energy_param ], 99, @@ -325,7 +438,7 @@ def calibrate_parameter(self): try: self.pars, self.cov, self.results = cal.hpge_E_calibration( - self.data.query("is_usable")[self.energy_param], + data.query(self.selection_string)[self.energy_param], self.glines, self.guess_keV, deg=self.deg, @@ -340,51 +453,49 @@ def calibrate_parameter(self): pk_pars = self.results["pk_pars"] found_peaks = self.results["got_peaks_locs"] fitted_peaks = self.results["fitted_keV"] + fitted_funcs = self.results["pk_funcs"] + if self.pars is None: + raise ValueError + + for i, peak in enumerate(self.results["got_peaks_keV"]): + idx = np.where(peak ==self.glines)[0][0] + self.funcs[idx] = fitted_funcs[i] + if fitted_funcs[i] == pgf.extended_radford_pdf: + self.gof_funcs[idx] = pgf.radford_pdf + else: + self.gof_funcs[idx] = pgf.gauss_step_pdf except: found_peaks = np.array([]) fitted_peaks = np.array([]) + fitted_funcs = np.array([]) + + if len(fitted_peaks) != len(self.glines) or self.gof_funcs[-1]==pgf.gauss_step_pdf: + if self.glines[-1] in fitted_peaks: + if fitted_funcs[-1] == pgf.extended_gauss_step_pdf: + self.funcs = [pgf.extended_gauss_step_pdf for entry in self.glines] + self.gof_funcs = [pgf.gauss_step_pdf for entry in self.glines] + + for i, peak in enumerate(self.glines): + if peak not in fitted_peaks: + kev_ranges[i] = (kev_ranges[i][0] - 5, kev_ranges[i][1] - 5) + for i, peak in enumerate(self.glines): + if peak not in fitted_peaks: + kev_ranges[i] = (kev_ranges[i][0] - 5, kev_ranges[i][1] - 5) + for i, peak in enumerate(fitted_peaks): + try: + if ( + self.results["pk_fwhms"][:, 1][i] + / self.results["pk_fwhms"][:, 0][i] + > 0.05 + ): + index = np.where(self.glines == peak)[0][0] + kev_ranges[i] = (kev_ranges[index][0] - 5, kev_ranges[index][1] - 5) + except: + pass - for i, peak in enumerate(self.glines): - if peak not in fitted_peaks: - kev_ranges[i] = (kev_ranges[i][0] - 5, kev_ranges[i][1] - 5) - for i, peak in enumerate(self.glines): - if peak not in fitted_peaks: - kev_ranges[i] = (kev_ranges[i][0] - 5, kev_ranges[i][1] - 5) - for i, peak in enumerate(fitted_peaks): - try: - if ( - self.results["pk_fwhms"][:, 1][i] - / self.results["pk_fwhms"][:, 0][i] - > 0.05 - ): - index = np.where(self.glines == peak)[0][0] - kev_ranges[i] = (kev_ranges[index][0] - 5, kev_ranges[index][1] - 5) - except: - pass - - try: - self.pars, self.cov, self.results = cal.hpge_E_calibration( - self.data.query("is_usable")[self.energy_param], - self.glines, - self.guess_keV, - deg=self.deg, - range_keV=kev_ranges, - funcs=self.funcs, - gof_funcs=self.gof_funcs, - n_events=self.n_events, - allowed_p_val=self.p_val, - simplex=self.simplex, - verbose=False, - ) - except: - self.pars = None - if self.pars is None: - log.error( - f"Calibration failed for {self.energy_param}, trying with 0 p_val" - ) try: self.pars, self.cov, self.results = cal.hpge_E_calibration( - self.data.query("is_usable")[self.energy_param], + data.query(self.selection_string)[self.energy_param], self.glines, self.guess_keV, deg=self.deg, @@ -392,147 +503,282 @@ def calibrate_parameter(self): funcs=self.funcs, gof_funcs=self.gof_funcs, n_events=self.n_events, - allowed_p_val=0, + allowed_p_val=self.p_val, simplex=self.simplex, verbose=False, ) + fitted_peaks = self.results["fitted_keV"] + fitted_funcs = self.results["pk_funcs"] + + log.debug("Calibrated found") + log.info(f"Calibration pars are {self.pars}") + + for i, peak in enumerate(self.results["got_peaks_keV"]): + idx = np.where(peak ==self.glines)[0][0] + self.funcs[idx] = fitted_funcs[i] + if fitted_funcs[i] == pgf.extended_radford_pdf: + self.gof_funcs[idx] = pgf.radford_pdf + else: + self.gof_funcs[idx] = pgf.gauss_step_pdf if self.pars is None: raise ValueError - - self.fit_energy_res() - self.data[f"{self.energy_param}_cal"] = pgf.poly( - self.data[self.energy_param], self.pars - ) - self.hit_dict[f"{self.energy_param}_cal"] = self.gen_pars_dict() - self.output_dict[f"{self.energy_param}_cal"] = { - "Qbb_fwhm": np.nan, - "Qbb_fwhm_err": np.nan, - "2.6_fwhm": np.nan, - "2.6_fwhm_err": np.nan, - "eres_pars": self.fit_pars.tolist(), - "fitted_peaks": np.nan, - "p_vals": np.nan, - "fwhms": np.nan, - "peak_fit_pars": np.nan, - "peak_fit_errs": np.nan, - "total_fep": len( - self.data.query( - f"{self.energy_param}_cal>2604&{self.energy_param}_cal<2624" - ) - ), - "total_dep": len( - self.data.query( - f"{self.energy_param}_cal>1587&{self.energy_param}_cal<1597" - ) - ), - "pass_fep": len( - self.data.query( - f"{self.energy_param}_cal>2604&{self.energy_param}_cal<2624&is_usable" - ) - ), - "pass_dep": len( - self.data.query( - f"{self.energy_param}_cal>1587&{self.energy_param}_cal<1597&is_usable" - ) - ), - } + except: - log.error( - f"Calibration failed completely for {self.energy_param} even with 0 p_val" - ) self.pars = np.full(self.deg + 1, np.nan) - - self.hit_dict[f"{self.energy_param}_cal"] = self.gen_pars_dict() - - self.output_dict[f"{self.energy_param}_cal"] = { - "Qbb_fwhm": np.nan, - "Qbb_fwhm_err": np.nan, - "2.6_fwhm": np.nan, - "2.6_fwhm_err": np.nan, - "eres_pars": [np.nan, np.nan], - "fitted_peaks": np.nan, - "fwhms": np.nan, - "peak_fit_pars": np.nan, - "peak_fit_errs": np.nan, - "p_vals": np.nan, - "total_fep": np.nan, - "total_dep": np.nan, - "pass_fep": np.nan, - "pass_dep": np.nan, - } - + self.results = None + + log.error( + f"Calibration failed completely for {self.energy_param}" + ) else: - log.debug("done") + log.debug("Calibrated found") log.info(f"Calibration pars are {self.pars}") + if ~np.isnan(self.pars).all(): + self.fit_energy_res() + self.hit_dict[self.cal_energy_param] = self.gen_pars_dict() + data[f"{self.energy_param}_cal"] = pgf.poly( + data[self.energy_param], self.pars + ) - self.data[f"{self.energy_param}_cal"] = pgf.poly( - self.data[self.energy_param], self.pars - ) + def fill_plot_dict(self, data, plot_dict={}): + for key, item in self.plot_options.items(): + if item["options"] is not None: + plot_dict[key] = item["function"](self, data, **item["options"]) + else: + plot_dict[key] = item["function"](self, data) + return plot_dict - pk_rs_dict = { - peak: self.results["pk_pars"][self.results["pk_validities"]][i].tolist() - for i, peak in enumerate(self.results["fitted_keV"]) - } - pk_errs_dict = { - peak: self.results["pk_errors"][self.results["pk_validities"]][ - i - ].tolist() - for i, peak in enumerate(self.results["fitted_keV"]) - } - self.fit_energy_res() - self.hit_dict[f"{self.energy_param}_cal"] = self.gen_pars_dict() +class high_stats_fitting(calibrate_parameter): + glines = [ + 238.632, + 511, + 583.191, + 727.330, + 763, + 785, + 860.564, + 893, + 1079, + 1513, + 1592.53, + 1620.50, + 2103.53, + 2614.50, + 3125, + 3198, + 3474 + ] # gamma lines used for calibration + range_keV = [ + (10, 10), + (30,30), + (30, 30), + (30, 30), + (30, 15), + (15, 30), + (30, 25), + (25, 30), + (30, 30), + (30, 30), + (30, 20), + (20, 30), + (30, 30), + (30, 30), + (30, 30), + (30, 30), + (30, 30), + ] # side bands width + funcs = [ + pgf.extended_gauss_step_pdf, #probably should be gauss on exp + pgf.extended_gauss_step_pdf, + pgf.extended_radford_pdf, + pgf.extended_radford_pdf, + pgf.extended_gauss_step_pdf, + pgf.extended_gauss_step_pdf, + pgf.extended_radford_pdf, + pgf.extended_gauss_step_pdf, + pgf.extended_gauss_step_pdf, + pgf.extended_gauss_step_pdf, + pgf.extended_radford_pdf, + pgf.extended_radford_pdf, + pgf.extended_radford_pdf, + pgf.extended_radford_pdf, + pgf.extended_gauss_step_pdf, + pgf.extended_gauss_step_pdf, + pgf.extended_gauss_step_pdf, + ] + gof_funcs = [ + pgf.gauss_step_pdf, + pgf.gauss_step_pdf, + pgf.radford_pdf, + pgf.radford_pdf, + pgf.gauss_step_pdf, + pgf.gauss_step_pdf, + pgf.gauss_step_pdf, + pgf.radford_pdf, + pgf.gauss_step_pdf, + pgf.gauss_step_pdf, + pgf.gauss_step_pdf, + pgf.radford_pdf, + pgf.radford_pdf, + pgf.radford_pdf, + pgf.radford_pdf, + pgf.gauss_step_pdf, + pgf.gauss_step_pdf, + pgf.gauss_step_pdf, + ] - if self.results["fitted_keV"][-1] == 2614.50: - fep_fwhm = round(self.results["pk_fwhms"][-1, 0], 2) - fep_dwhm = round(self.results["pk_fwhms"][-1, 1], 2) - else: - fep_fwhm = np.nan - fep_dwhm = np.nan - - self.output_dict[f"{self.energy_param}_cal"] = { - "Qbb_fwhm": round(self.fit_qbb, 2), - "Qbb_fwhm_err": round(self.qbb_err, 2), - "2.6_fwhm": fep_fwhm, - "2.6_fwhm_err": fep_dwhm, - "eres_pars": self.fit_pars.tolist(), - "fitted_peaks": self.results["fitted_keV"].tolist(), - "fwhms": self.results["pk_fwhms"].tolist(), - "peak_fit_pars": pk_rs_dict, - "peak_fit_errs": pk_errs_dict, - "p_vals": self.results["pk_pvals"].tolist(), - "total_fep": len( - self.data.query( - f"{self.energy_param}_cal>2604&{self.energy_param}_cal<2624" - ) - ), - "total_dep": len( - self.data.query( - f"{self.energy_param}_cal>1587&{self.energy_param}_cal<1597" - ) - ), - "pass_fep": len( - self.data.query( - f"{self.energy_param}_cal>2604&{self.energy_param}_cal<2624&is_usable" - ) - ), - "pass_dep": len( - self.data.query( - f"{self.energy_param}_cal>1587&{self.energy_param}_cal<1597&is_usable" - ) - ), + def __init__(self, energy_param, selection_string, threshold, p_val, + plot_options={}, simplex=False): + self.energy_param = energy_param + self.cal_energy_param = energy_param + self.selection_string = selection_string + self.threshold = threshold + self.p_val = p_val + self.plot_options = plot_options + self.simplex = simplex + self.results = {} + self.plot_dict = {} + self.n_events=None + self.output_dict = {} + self.pars=[1,0] + + def get_results_dict(self, data): + if self.results: + fwhm_linear = self.fwhm_fit_linear.copy() + fwhm_linear["pars"] = fwhm_linear['pars'].to_dict() + fwhm_linear["errors"] = fwhm_linear['errors'].to_dict() + fwhm_linear["cov"] = fwhm_linear["cov"].tolist() + fwhm_quad = self.fwhm_fit_quadratic.copy() + fwhm_quad["pars"] = fwhm_quad['pars'].to_dict() + fwhm_quad["errors"] = fwhm_quad['errors'].to_dict() + fwhm_quad["cov"] = fwhm_quad["cov"].tolist() + + pk_dict = {Ei:{"function":func_i.__name__, + "module":func_i.__module__, + "pars(cal)":parsi.to_dict(), + "errs(cal)":errorsi.to_dict(), + "p_val": pvali, + "fwhm (keV)": list(fwhmi)} + for i, (Ei, parsi, errorsi, pvali, fwhmi, func_i) in enumerate( + zip(self.results["fitted_keV"], + self.results["pk_pars"][self.results["pk_validities"]], + self.results["pk_errors"][self.results["pk_validities"]], + self.results["pk_pvals"][self.results["pk_validities"]], + self.results["pk_fwhms"], + self.funcs) + )} + + return { + "eres_linear": fwhm_linear, + "eres_quadratic":fwhm_quad, + "fitted_peaks": self.results["fitted_keV"].tolist(), + "pk_fits":pk_dict } - log.info( - f"Results {self.energy_param}: {json.dumps(self.output_dict[f'{self.energy_param}_cal'], indent=2)}" + else: + return {} + + + def fit_peaks(self, data): + log.debug(f"Fitting {self.energy_param}") + try: + n_bins = [int((self.range_keV[i][1]+self.range_keV[i][0]) /0.2) for i in range(len(self.glines))] + pk_pars, pk_errors, pk_covs, pk_binws, pk_ranges, pk_pvals, valid_pks, pk_funcs = cal.hpge_fit_E_peaks( + data.query(self.selection_string)[self.energy_param], + self.glines, + self.range_keV, + n_bins=n_bins, + funcs=self.funcs, + method="unbinned", + gof_funcs=self.gof_funcs, + n_events=None, + allowed_p_val=self.p_val ) + for idx, peak in enumerate(self.glines): + #idx = np.where(peak ==self.glines)[0][0] + self.funcs[idx] = pk_funcs[idx] + if pk_funcs[idx] == pgf.extended_radford_pdf: + self.gof_funcs[idx] = pgf.radford_pdf + else: + self.gof_funcs[idx] = pgf.gauss_step_pdf + + self.results["got_peaks_keV"] = self.glines + self.results["pk_pars"] = pk_pars + self.results["pk_errors"] = pk_errors + self.results["pk_covs"] = pk_covs + self.results["pk_binws"] = pk_binws + self.results["pk_ranges"] = pk_ranges + self.results["pk_pvals"] = pk_pvals + + + for i, pk in enumerate(self.results["got_peaks_keV"]): + try: + if self.results["pk_pars"][i]["n_sig"]<10: + valid_pks[i] = False + elif 2*self.results["pk_errors"][i]["n_sig"]>self.results["pk_pars"][i]["n_sig"]: + valid_pks[i] = False + except: + pass + + self.results["pk_validities"] = valid_pks + + # Drop failed fits + fitted_peaks_keV = self.results["fitted_keV"] = np.asarray(self.glines)[valid_pks] + pk_pars = np.asarray(pk_pars, dtype=object)[valid_pks] # ragged + pk_errors = np.asarray(pk_errors, dtype=object)[valid_pks] + pk_covs = np.asarray(pk_covs, dtype=object)[valid_pks] + pk_binws = np.asarray(pk_binws)[valid_pks] + pk_ranges = np.asarray(pk_ranges)[valid_pks] + pk_pvals = np.asarray(pk_pvals)[valid_pks] + pk_funcs = np.asarray(pk_funcs)[valid_pks] + + + + log.info(f"{len(np.where(valid_pks)[0])} peaks fitted:") + for i, (Ei, parsi, errorsi, covsi, func_i) in enumerate( + zip(fitted_peaks_keV, pk_pars, pk_errors, pk_covs, pk_funcs) + ): + varnames = func_i.__code__.co_varnames[1 : len(pk_pars[-1]) + 1] + parsi = np.asarray(parsi, dtype=float) + errorsi = np.asarray(errorsi, dtype=float) + covsi = np.asarray(covsi, dtype=float) + # parsigsi = np.sqrt(covsi.diagonal()) + log.info(f"\tEnergy: {str(Ei)}") + log.info(f"\t\tParameter | Value +/- Sigma ") + for vari, pari, errorsi in zip(varnames, parsi, errorsi): + log.info( + f'\t\t{str(vari).ljust(10)} | {("%4.2f" % pari).rjust(8)} +/- {("%4.2f" % errorsi).ljust(8)}' + ) - def fill_plot_dict(self): - for key, item in self.plot_options.items(): - if item["options"] is not None: - self.plot_dict[key] = item["function"](self, **item["options"]) - else: - self.plot_dict[key] = item["function"](self) + cal_fwhms = [ + pgf.get_fwhm_func(func_i, pars_i, cov=covs_i) + for func_i, pars_i, covs_i in zip(pk_funcs, pk_pars, pk_covs) + ] + + cal_fwhms, cal_fwhms_errs = zip(*cal_fwhms) + cal_fwhms = np.asarray(cal_fwhms) + cal_fwhms_errs = np.asarray(cal_fwhms_errs) + self.results["pk_fwhms"] = np.asarray( + [(u, e) for u, e in zip(cal_fwhms, cal_fwhms_errs)] + ) + + log.info(f"{len(cal_fwhms)} FWHMs found:") + log.info(f"\t Energy | FWHM ") + for i, (Ei, fwhm, fwhme) in enumerate( + zip(fitted_peaks_keV, cal_fwhms, cal_fwhms_errs) + ): + log.info( + f"\t{i}".ljust(4) + + str(Ei).ljust(9) + + f"| {fwhm:.2f}+-{fwhme:.2f} keV".ljust(5) + ) + self.fit_energy_res() + log.debug(f"high stats fitting successful") + except: + self.results = {} + log.debug(f"high stats fitting failed") + def get_peak_labels( @@ -564,9 +810,11 @@ def get_peak_label(peak: float) -> str: return "Tl SEP" elif peak == 2614.5: return "Tl FEP" + else: + return "" -def plot_fits(ecal_class, figsize=[12, 8], fontsize=12, ncols=3, n_rows=3): +def plot_fits(ecal_class, data, figsize=[12, 8], fontsize=12, ncols=3, nrows=3, binning_keV=5): plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize @@ -580,6 +828,7 @@ def plot_fits(ecal_class, figsize=[12, 8], fontsize=12, ncols=3, n_rows=3): if peak in fitted_peaks: fitted_gof_funcs.append(ecal_class.gof_funcs[i]) + mus = [ pgf.get_mu_func(func_i, pars_i) if pars_i is not None else np.nan for func_i, pars_i in zip(fitted_gof_funcs, pk_pars) @@ -590,43 +839,45 @@ def plot_fits(ecal_class, figsize=[12, 8], fontsize=12, ncols=3, n_rows=3): der = [pgf.poly(5, derco) for Ei in fitted_peaks] for i, peak in enumerate(mus): range_adu = 5 / der[i] - # plt.subplot(math.ceil((len(mus)) / 2), 2, i + 1) - plt.subplot(n_rows, ncols, i + 1) - binning = np.arange(pk_ranges[i][0], pk_ranges[i][1], 1) - bin_cs = (binning[1:] + binning[:-1]) / 2 - energies = ecal_class.data.query( - f"{ecal_class.energy_param}>{pk_ranges[i][0]}&{ecal_class.energy_param}<{pk_ranges[i][1]}&is_usable" - )[ecal_class.energy_param] - energies = energies.iloc[: ecal_class.n_events] - - counts, bs, bars = plt.hist(energies, bins=binning, histtype="step") - if pk_pars[i] is not None: - fit_vals = fitted_gof_funcs[i](bin_cs, *pk_pars[i]) * np.diff(bs) - plt.plot(bin_cs, fit_vals) - plt.step( - bin_cs, - [ - (fval - count) / count if count != 0 else (fval - count) - for count, fval in zip(counts, fit_vals) - ], - ) + plt.subplot(nrows, ncols, i + 1) + try: + binning = np.arange(pk_ranges[i][0], pk_ranges[i][1], 0.1/ der[i]) + bin_cs = (binning[1:] + binning[:-1]) / 2 + energies = data.query( + f"{ecal_class.energy_param}>{pk_ranges[i][0]}&{ecal_class.energy_param}<{pk_ranges[i][1]}&{ecal_class.selection_string}" + )[ecal_class.energy_param] + energies = energies.iloc[: ecal_class.n_events] + + counts, bs, bars = plt.hist(energies, bins=binning, histtype="step") + if pk_pars[i] is not None: + fit_vals = fitted_gof_funcs[i](bin_cs, *pk_pars[i][:-1], 0) * np.diff(bs)[0] + plt.plot(bin_cs, fit_vals) + plt.step( + bin_cs, + [ + (fval - count) / count if count != 0 else (fval - count) + for count, fval in zip(counts, fit_vals) + ], + ) - plt.annotate( - get_peak_label(fitted_peaks[i]), (0.02, 0.9), xycoords="axes fraction" - ) - plt.annotate( - f"{fitted_peaks[i]:.1f} keV", (0.02, 0.8), xycoords="axes fraction" - ) - plt.annotate( - f"p-value : {p_vals[i]:.4f}", (0.02, 0.7), xycoords="axes fraction" - ) - plt.xlabel("Energy (keV)") - plt.ylabel("Counts") - plt.legend(loc="upper left", frameon=False) - plt.xlim([peak - range_adu, peak + range_adu]) - locs, labels = plt.xticks() - new_locs, new_labels = get_peak_labels(locs, ecal_class.pars) - plt.xticks(ticks=new_locs, labels=new_labels) + plt.annotate( + get_peak_label(fitted_peaks[i]), (0.02, 0.9), xycoords="axes fraction" + ) + plt.annotate( + f"{fitted_peaks[i]:.1f} keV", (0.02, 0.8), xycoords="axes fraction" + ) + plt.annotate( + f"p-value : {p_vals[i]:.4f}", (0.02, 0.7), xycoords="axes fraction" + ) + plt.xlabel("Energy (keV)") + plt.ylabel("Counts") + plt.legend(loc="upper left", frameon=False) + plt.xlim([peak - range_adu, peak + range_adu]) + locs, labels = plt.xticks() + new_locs, new_labels = get_peak_labels(locs, ecal_class.pars) + plt.xticks(ticks=new_locs, labels=new_labels) + except: + pass plt.tight_layout() plt.close() @@ -634,28 +885,28 @@ def plot_fits(ecal_class, figsize=[12, 8], fontsize=12, ncols=3, n_rows=3): def plot_2614_timemap( - ecal_class, figsize=[12, 8], fontsize=12, erange=[2580, 2630], dx=1, time_dx=180 + ecal_class, data, figsize=[12, 8], fontsize=12, erange=[2580, 2630], dx=1, time_dx=180 ): plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize - selection = ecal_class.data.query( - f"{ecal_class.energy_param}_cal>2560&{ecal_class.energy_param}_cal<2660&is_usable" + selection = data.query( + f"{ecal_class.cal_energy_param}>2560&{ecal_class.cal_energy_param}<2660&{ecal_class.selection_string}" ) + fig = plt.figure() if len(selection) == 0: pass else: time_bins = np.arange( - (np.amin(ecal_class.data["timestamp"]) // time_dx) * time_dx, - ((np.amax(ecal_class.data["timestamp"]) // time_dx) + 2) * time_dx, + (np.amin(data["timestamp"]) // time_dx) * time_dx, + ((np.amax(data["timestamp"]) // time_dx) + 2) * time_dx, time_dx, ) - fig = plt.figure() plt.hist2d( selection["timestamp"], - selection[f"{ecal_class.energy_param}_cal"], + selection[ecal_class.cal_energy_param], bins=[time_bins, np.arange(erange[0], erange[1] + dx, dx)], norm=LogNorm(), ) @@ -676,31 +927,31 @@ def plot_2614_timemap( def plot_pulser_timemap( - ecal_class, figsize=[12, 8], fontsize=12, dx=0.2, time_dx=180, n_spread=3 + ecal_class, data, figsize=[12, 8], fontsize=12, dx=0.2, time_dx=180, n_spread=3 ): plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize time_bins = np.arange( - (np.amin(ecal_class.data["timestamp"]) // time_dx) * time_dx, - ((np.amax(ecal_class.data["timestamp"]) // time_dx) + 2) * time_dx, + (np.amin(data["timestamp"]) // time_dx) * time_dx, + ((np.amax(data["timestamp"]) // time_dx) + 2) * time_dx, time_dx, ) - selection = ecal_class.data.query(f"~is_not_pulser") + selection = data.query(f"~is_not_pulser") fig = plt.figure() if len(selection) == 0: pass else: - mean = np.nanpercentile(selection[f"{ecal_class.energy_param}_cal"], 50) + mean = np.nanpercentile(selection[ecal_class.cal_energy_param], 50) spread = mean - np.nanpercentile( - selection[f"{ecal_class.energy_param}_cal"], 10 + selection[ecal_class.cal_energy_param], 10 ) plt.hist2d( selection["timestamp"], - selection[f"{ecal_class.energy_param}_cal"], + selection[ecal_class.cal_energy_param], bins=[ time_bins, np.arange(mean - n_spread * spread, mean + n_spread * spread + dx, dx), @@ -722,11 +973,11 @@ def plot_pulser_timemap( return fig -def bin_pulser_stability(ecal_class, time_slice=180): - selection = ecal_class.data.query(f"~is_not_pulser") +def bin_pulser_stability(ecal_class, data, time_slice=180): + selection = data.query(f"~is_not_pulser") - utime_array = ecal_class.data["timestamp"] - select_energies = selection[f"{ecal_class.energy_param}_cal"].to_numpy() + utime_array = data["timestamp"] + select_energies = selection[ecal_class.cal_energy_param].to_numpy() time_bins = np.arange( (np.amin(utime_array) // time_slice) * time_slice, @@ -762,13 +1013,13 @@ def bin_pulser_stability(ecal_class, time_slice=180): return {"time": times_average, "energy": par_average, "spread": par_error} -def bin_stability(ecal_class, time_slice=180, energy_range=[2585, 2660]): - selection = ecal_class.data.query( - f"{ecal_class.energy_param}_cal>{energy_range[0]}&{ecal_class.energy_param}_cal<{energy_range[1]}&is_usable" +def bin_stability(ecal_class, data, time_slice=180, energy_range=[2585, 2660]): + selection = data.query( + f"{ecal_class.cal_energy_param}>{energy_range[0]}&{ecal_class.cal_energy_param}<{energy_range[1]}&{ecal_class.selection_string}" ) - utime_array = ecal_class.data["timestamp"] - select_energies = selection[f"{ecal_class.energy_param}_cal"].to_numpy() + utime_array = data["timestamp"] + select_energies = selection[ecal_class.cal_energy_param].to_numpy() time_bins = np.arange( (np.amin(utime_array) // time_slice) * time_slice, @@ -804,7 +1055,7 @@ def bin_stability(ecal_class, time_slice=180, energy_range=[2585, 2660]): return {"time": times_average, "energy": par_average, "spread": par_error} -def plot_cal_fit(ecal_class, figsize=[12, 8], fontsize=12, erange=[200, 2700]): +def plot_cal_fit(ecal_class, data, figsize=[12, 8], fontsize=12, erange=[200, 2700]): pk_pars = ecal_class.results["pk_pars"] fitted_peaks = ecal_class.results["got_peaks_keV"] pk_errs = ecal_class.results["pk_errors"] @@ -856,7 +1107,7 @@ def plot_cal_fit(ecal_class, figsize=[12, 8], fontsize=12, erange=[200, 2700]): return fig -def plot_eres_fit(ecal_class, figsize=[12, 8], fontsize=12, erange=[200, 2700]): +def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsize=12): plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize @@ -881,6 +1132,10 @@ def plot_eres_fit(ecal_class, figsize=[12, 8], fontsize=12, erange=[200, 2700]): log.info(f"{peak} failed") indexes.append(i) continue + elif peak == 511.0: + log.info(f"e annhilation found at index {i}") + indexes.append(i) + continue else: fwhm_peaks = np.append(fwhm_peaks, peak) fit_fwhms = np.delete(fwhms, [indexes]) @@ -889,32 +1144,37 @@ def plot_eres_fit(ecal_class, figsize=[12, 8], fontsize=12, erange=[200, 2700]): fig, (ax1, ax2) = plt.subplots( 2, 1, sharex=True, gridspec_kw={"height_ratios": [3, 1]} ) - ax1.errorbar(fwhm_peaks, fit_fwhms, yerr=fit_dfwhms, marker="x", lw=0, c="b") + ax1.errorbar(fwhm_peaks, fit_fwhms, yerr=fit_dfwhms, marker="x", lw=0, c="black") fwhm_slope_bins = np.arange(erange[0], erange[1], 10) qbb_line_vx = [2039.0, 2039.0] qbb_line_vy = [ - 0.9 * np.nanmin(fwhm_slope(fwhm_slope_bins, *ecal_class.fit_pars)), - ecal_class.fit_qbb, + 0.9 * np.nanmin(fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["pars"])), + np.nanmax([ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"],ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"]]) ] qbb_line_hx = [erange[0], 2039.0] - qbb_line_hy = [ecal_class.fit_qbb, ecal_class.fit_qbb] ax1.plot( - fwhm_slope_bins, fwhm_slope(fwhm_slope_bins, *ecal_class.fit_pars), lw=1, c="g" + fwhm_slope_bins, fwhm_linear.func(fwhm_slope_bins, + *ecal_class.fwhm_fit_linear["pars"]), lw=1, c="g", + label=f'linear, Qbb fwhm: {ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"]:1.2f} +- {ecal_class.fwhm_fit_linear["Qbb_fwhm_err(keV)"]:1.2f} keV' ) - ax1.plot(qbb_line_hx, qbb_line_hy, lw=1, c="r") - ax1.plot(qbb_line_vx, qbb_line_vy, lw=1, c="r") ax1.plot( - np.nan, - np.nan, - "-", - color="none", - label=f"Qbb fwhm: {ecal_class.fit_qbb:1.2f} +- {ecal_class.qbb_err:1.2f} keV", + fwhm_slope_bins, fwhm_quadratic.func(fwhm_slope_bins, + *ecal_class.fwhm_fit_quadratic["pars"]), lw=1, c="b", + label=f'quadratic, Qbb fwhm: {ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"]:1.2f} +- {ecal_class.fwhm_fit_quadratic["Qbb_fwhm_err(keV)"]:1.2f} keV' ) + ax1.plot(qbb_line_hx, [ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"], + ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"]], + lw=1, c="r", ls="--") + ax1.plot(qbb_line_hx, [ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"], + ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"]], + lw=1, c="r", ls="--") + ax1.plot(qbb_line_vx, qbb_line_vy, lw=1, c="r", ls="--") + ax1.legend(loc="upper left", frameon=False) - if np.isnan(ecal_class.fit_pars).all(): + if np.isnan(ecal_class.fwhm_fit_linear["pars"]).all(): [ 0.9 * np.nanmin(fit_fwhms), 1.1 * np.nanmax(fit_fwhms), @@ -922,56 +1182,62 @@ def plot_eres_fit(ecal_class, figsize=[12, 8], fontsize=12, erange=[200, 2700]): else: ax1.set_ylim( [ - 0.9 * np.nanmin(fwhm_slope(fwhm_slope_bins, *ecal_class.fit_pars)), - 1.1 * np.nanmax(fwhm_slope(fwhm_slope_bins, *ecal_class.fit_pars)), + 0.9 * np.nanmin(fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["pars"])), + 1.1 * np.nanmax(fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["pars"])), ] ) - ax1.set_xlim([200, 2700]) - ax1.grid() + ax1.set_xlim(erange) ax1.set_ylabel("FWHM energy resolution (keV)") ax2.plot( fwhm_peaks, - (fit_fwhms - fwhm_slope(fwhm_peaks, *ecal_class.fit_pars)) / fit_dfwhms, + (fit_fwhms - fwhm_linear.func(fwhm_peaks, *ecal_class.fwhm_fit_linear["pars"])) / fit_dfwhms, + lw=0, + marker="x", + c="g", + ) + ax2.plot( + fwhm_peaks, + (fit_fwhms - fwhm_quadratic.func(fwhm_peaks, *ecal_class.fwhm_fit_quadratic["pars"])) / fit_dfwhms, lw=0, marker="x", c="b", ) + ax2.plot(erange,[0,0], color="black",lw=0.5) ax2.set_xlabel("Energy (keV)") ax2.set_ylabel("Normalised Residuals") - ax2.grid() plt.tight_layout() plt.close() return fig -def bin_spectrum(ecal_class, erange=[0, 3000], dx=2): +def bin_spectrum(ecal_class, data, erange=[0, 3000], dx=2): bins = np.arange(erange[0], erange[1] + dx, dx) return { "bins": pgh.get_bin_centers(bins), "counts": np.histogram( - ecal_class.data.query("is_usable")[f"{ecal_class.energy_param}_cal"], bins + data.query(ecal_class.selection_string)[ecal_class.cal_energy_param], bins )[0], "cut_counts": np.histogram( - ecal_class.data.query("~is_valid_cal&is_not_pulser")[ - f"{ecal_class.energy_param}_cal" + data.query("~is_valid_cal&is_not_pulser")[ + ecal_class.cal_energy_param ], bins, )[0], "pulser_counts": np.histogram( - ecal_class.data.query("~is_not_pulser")[f"{ecal_class.energy_param}_cal"], + data.query("~is_not_pulser")[ecal_class.cal_energy_param], bins, )[0], } -def bin_survival_fraction(ecal_class, erange=[0, 3000], dx=6): +def bin_survival_fraction(ecal_class, data, erange=[0, 3000], dx=6): counts_pass, bins_pass, _ = pgh.get_hist( - ecal_class.data.query("is_usable")[f"{ecal_class.energy_param}_cal"], + data.query(ecal_class.selection_string)[ecal_class.cal_energy_param], bins=np.arange(erange[0], erange[1] + dx, dx), ) counts_fail, bins_fail, _ = pgh.get_hist( - ecal_class.data.query("~is_valid_cal&is_not_pulser")[ - f"{ecal_class.energy_param}_cal" + data.query("~is_valid_cal&is_not_pulser")[ + ecal_class.cal_energy_param ], bins=np.arange(erange[0], erange[1] + dx, dx), ) @@ -981,35 +1247,36 @@ def bin_survival_fraction(ecal_class, erange=[0, 3000], dx=6): def energy_cal_th( files: list[str], - energy_params: list[str], + energy_params: list[str] , + lh5_path: str = "dsp", hit_dict: dict = {}, cut_parameters: dict[str, int] = {"bl_mean": 4, "bl_std": 4, "pz_std": 4}, - lh5_path: str = "dsp", plot_options: dict = None, - guess_keV: float | None = None, threshold: int = 0, p_val: float = 0, n_events: int = None, final_cut_field: str = "is_valid_cal", simplex: bool = True, + guess_keV: float | None = None, deg: int = 1, -) -> tuple(dict, dict): +) -> tuple(dict, dict, dict, dict): + data = load_data( files, lh5_path, - energy_params, hit_dict, - cut_parameters=list(cut_parameters) if cut_parameters is not None else None, + params = energy_params + list(cut_parameters) + ["timestamp"] ) data, hit_dict = apply_cuts(data, hit_dict, cut_parameters, final_cut_field) - output_dict = {} + results_dict = {} plot_dict = {} + full_object_dict = {} for energy_param in energy_params: ecal = calibrate_parameter( - data, energy_param, + f"{final_cut_field}&is_not_pulser", plot_options, guess_keV, threshold, @@ -1018,12 +1285,55 @@ def energy_cal_th( simplex, deg, ) - ecal.calibrate_parameter() - output_dict.update(ecal.output_dict) + ecal.calibrate_parameter(data) + results_dict[ecal.cal_energy_param] = ecal.get_results_dict(data) hit_dict.update(ecal.hit_dict) + full_object_dict[ecal.cal_energy_param] = ecal if ~np.isnan(ecal.pars).all(): - ecal.fill_plot_dict() - plot_dict[energy_param] = ecal.plot_dict + plot_dict[ecal.cal_energy_param] = ecal.fill_plot_dict(data) + + + log.info(f"Finished all calibrations") + return hit_dict, results_dict, plot_dict, full_object_dict + + +def partition_energy_cal_th( + files: list[str], + energy_params: list[str], + lh5_path: str = "dsp", + hit_dict: dict = {}, + plot_options: dict = None, + threshold: int = 0, + p_val: float = 0, + n_events: int = None, + final_cut_field: str = "is_valid_cal", + simplex: bool = True, +) -> tuple(dict, dict, dict, dict): + + data = load_data( + files, + lh5_path, + hit_dict, + params = energy_params + [final_cut_field] + ["timestamp"] + ) + + results_dict = {} + plot_dict = {} + full_object_dict = {} + for energy_param in energy_params: + ecal = high_stats_fitting( + energy_param, + f"{final_cut_field}&is_not_pulser", + threshold, + p_val, + plot_options, + simplex, + ) + ecal.fit_peaks(data) + results_dict[energy_param] = ecal.get_results_dict(data) + full_object_dict[energy_param] = ecal + if ecal.results: + plot_dict[energy_param] = ecal.fill_plot_dict(data) log.info(f"Finished all calibrations") - return hit_dict, output_dict, plot_dict + return results_dict, plot_dict, full_object_dict \ No newline at end of file From 011838ac9f039a3275590e882a223c5e1f33fec0 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 29 Sep 2023 17:47:31 +0200 Subject: [PATCH 006/191] rewrite of aoe routines, better handling of guesses, improved clarity of results, split out plots into own functions --- src/pygama/pargen/AoE_cal.py | 3137 +++++++++++++++------------------- 1 file changed, 1395 insertions(+), 1742 deletions(-) diff --git a/src/pygama/pargen/AoE_cal.py b/src/pygama/pargen/AoE_cal.py index 114428d19..dc9f9ae3c 100644 --- a/src/pygama/pargen/AoE_cal.py +++ b/src/pygama/pargen/AoE_cal.py @@ -29,22 +29,12 @@ import pygama.math.histogram as pgh import pygama.math.peak_fitting as pgf -import pygama.pargen.cuts as cts -import pygama.pargen.ecal_th as thc -import pygama.pargen.energy_cal as pgc from pygama.math.peak_fitting import nb_erfc from pygama.pargen.energy_cal import get_i_local_maxima +from pygama.pargen.utils import * log = logging.getLogger(__name__) - -def return_nans(func): - args = func.__code__.co_varnames[: func.__code__.co_argcount][1:] - c = cost.UnbinnedNLL(np.array([0]), func) - m = Minuit(c, *[np.nan for arg in args]) - return m.values, m.errors, np.full((len(m.values), len(m.values)), np.nan) - - class PDF: """ @@ -54,13 +44,7 @@ class PDF: def pdf(x): return - def return_nans(self): - args = self.pdf.__code__.co_varnames[: self.pdf.__code__.co_argcount][2:] - c = cost.UnbinnedNLL(np.array([0]), self.pdf) - m = Minuit(c, *[np.nan for arg in args]) - return m.values, m.errors, np.full((len(m.values), len(m.values)), np.nan) - - def _replace_values(self, dic, **kwargs): + def _replace_values(dic, **kwargs): for item, value in kwargs.items(): dic[item] = value return dic @@ -68,7 +52,6 @@ def _replace_values(self, dic, **kwargs): class standard_aoe(PDF): def pdf( - self, x: np.array, n_sig: float, mu: float, @@ -97,7 +80,6 @@ def pdf( return sig, bkg def extended_pdf( - self, x: np.array, n_sig: float, mu: float, @@ -112,7 +94,7 @@ def extended_pdf( Extended PDF for A/E consists of a gaussian signal with gaussian tail background """ if components == True: - sig, bkg = self.pdf( + sig, bkg = standard_aoe.pdf( x, n_sig, mu, @@ -125,7 +107,7 @@ def extended_pdf( ) return n_sig + n_bkg, sig, bkg else: - return n_sig + n_bkg, self.pdf( + return n_sig + n_bkg, standard_aoe.pdf( x, n_sig, mu, @@ -137,7 +119,7 @@ def extended_pdf( components, ) - def guess(self, hist, bins, var, **kwargs): + def guess(hist, bins, var, **kwargs): bin_centers = (bins[:-1] + bins[1:]) / 2 mu = bin_centers[np.argmax(hist)] @@ -162,10 +144,13 @@ def guess(self, hist, bins, var, **kwargs): "upper_range": np.nanmax(bins), "components": 0, } + for key, guess in guess_dict.items(): + if np.isnan(guess): + guess_dict[key]=0 - return self._replace_values(guess_dict, **kwargs) + return standard_aoe._replace_values(guess_dict, **kwargs) - def bounds(self, guess, **kwargs): + def bounds(guess, **kwargs): bounds_dict = { "n_sig": (0, None), "mu": (None, None), @@ -179,10 +164,10 @@ def bounds(self, guess, **kwargs): return [ bound - for field, bound in self._replace_values(bounds_dict, **kwargs).items() + for field, bound in standard_aoe._replace_values(bounds_dict, **kwargs).items() ] - def fixed(self, **kwargs): + def fixed(**kwargs): fixed_dict = { "n_sig": False, "mu": False, @@ -195,19 +180,18 @@ def fixed(self, **kwargs): } return [ - fixed for field, fixed in self._replace_values(fixed_dict, **kwargs).items() + fixed for field, fixed in standard_aoe._replace_values(fixed_dict, **kwargs).items() ] - def width(self, pars, errs, cov): + def width(pars, errs, cov): return pars["sigma"], errs["sigma"] - def centroid(self, pars, errs, cov): + def centroid(pars, errs, cov): return pars["mu"], errs["mu"] class standard_aoe_with_high_tail(PDF): def pdf( - self, x: np.array, n_sig: float, mu: float, @@ -242,7 +226,6 @@ def pdf( return sig, bkg def extended_pdf( - self, x: np.array, n_sig: float, mu: float, @@ -259,7 +242,7 @@ def extended_pdf( Extended PDF for A/E consists of a gaussian signal with gaussian tail background """ if components == True: - sig, bkg = self.pdf( + sig, bkg = standard_aoe_with_high_tail.pdf( x, n_sig, mu, @@ -274,7 +257,7 @@ def extended_pdf( ) return n_sig + n_bkg, sig, bkg else: - return n_sig + n_bkg, self.pdf( + return n_sig + n_bkg, standard_aoe_with_high_tail.pdf( x, n_sig, mu, @@ -288,7 +271,7 @@ def extended_pdf( components, ) - def guess(self, hist, bins, var, **kwargs): + def guess(hist, bins, var, **kwargs): bin_centers = (bins[:-1] + bins[1:]) / 2 mu = bin_centers[np.argmax(hist)] try: @@ -314,10 +297,13 @@ def guess(self, hist, bins, var, **kwargs): "upper_range": np.nanmax(bins), "components": 0, } + for key, guess in guess_dict.items(): + if np.isnan(guess): + guess_dict[key]=0 - return self._replace_values(guess_dict, **kwargs) + return standard_aoe_with_high_tail._replace_values(guess_dict, **kwargs) - def bounds(self, guess, **kwargs): + def bounds(guess, **kwargs): bounds_dict = { "n_sig": (0, None), "mu": (None, None), @@ -333,10 +319,10 @@ def bounds(self, guess, **kwargs): return [ bound - for field, bound in self._replace_values(bounds_dict, **kwargs).items() + for field, bound in standard_aoe_with_high_tail._replace_values(bounds_dict, **kwargs).items() ] - def fixed(self, **kwargs): + def fixed(**kwargs): fixed_dict = { "n_sig": False, "mu": False, @@ -351,22 +337,21 @@ def fixed(self, **kwargs): } return [ - fixed for field, fixed in self._replace_values(fixed_dict, **kwargs).items() + fixed for field, fixed in standard_aoe_with_high_tail._replace_values(fixed_dict, **kwargs).items() ] - def width(self, pars, errs, cov): + def width(pars, errs, cov): fwhm, fwhm_err = pgf.radford_fwhm( pars[2], pars[3], np.abs(pars[4]), cov=cov[:7, :7] ) return fwhm / 2.355, fwhm_err / 2.355 - def centroid(self, pars, errs, cov): + def centroid(pars, errs, cov): return pars["mu"], errs["mu"] class standard_aoe_bkg(PDF): def pdf( - self, x: np.array, n_events: float, mu: float, @@ -388,7 +373,6 @@ def pdf( return sig def extended_pdf( - self, x: np.array, n_events: float, mu: float, @@ -400,11 +384,11 @@ def extended_pdf( """ Extended PDF for A/E consists of a gaussian signal with gaussian tail background """ - return n_events, self.pdf( + return n_events, standard_aoe_bkg.pdf( x, n_events, mu, sigma, tau_bkg, lower_range, upper_range ) - def guess(self, hist, bins, var, **kwargs): + def guess(hist, bins, var, **kwargs): bin_centers = (bins[:-1] + bins[1:]) / 2 mu = bin_centers[np.argmax(hist)] @@ -427,10 +411,13 @@ def guess(self, hist, bins, var, **kwargs): "lower_range": np.nanmin(bins), "upper_range": np.nanmax(bins), } + for key, guess in guess_dict.items(): + if np.isnan(guess): + guess_dict[key]=0 - return self._replace_values(guess_dict, **kwargs) + return standard_aoe_bkg._replace_values(guess_dict, **kwargs) - def bounds(self, guess, **kwargs): + def bounds(guess, **kwargs): bounds_dict = { "n_events": (0, None), "mu": (None, None), @@ -442,10 +429,10 @@ def bounds(self, guess, **kwargs): return [ bound - for field, bound in self._replace_values(bounds_dict, **kwargs).items() + for field, bound in standard_aoe_bkg._replace_values(bounds_dict, **kwargs).items() ] - def fixed(self, **kwargs): + def fixed(**kwargs): fixed_dict = { "n_bkg": False, "mu": False, @@ -456,12 +443,12 @@ def fixed(self, **kwargs): } return [ - fixed for field, fixed in self._replace_values(fixed_dict, **kwargs).items() + fixed for field, fixed in standard_aoe_bkg._replace_values(fixed_dict, **kwargs).items() ] class gaussian(PDF): - def pdf(self, x: np.array, n_events: float, mu: float, sigma: float) -> np.array: + def pdf(x: np.array, n_events: float, mu: float, sigma: float) -> np.array: """ PDF for A/E consists of a gaussian signal with tail with gaussian tail background """ @@ -473,16 +460,16 @@ def pdf(self, x: np.array, n_events: float, mu: float, sigma: float) -> np.array return sig def extended_pdf( - self, x: np.array, n_events: float, mu: float, sigma: float + x: np.array, n_events: float, mu: float, sigma: float ) -> tuple(float, np.array): """ Extended PDF for A/E consists of a gaussian signal with gaussian tail background """ - return n_events, self.pdf( - x, n_events, mu, sigma, tau_bkg, lower_range, upper_range + return n_events, gaussian.pdf( + x, n_events, mu, sigma ) - def guess(self, hist, bins, var, **kwargs): + def guess(hist, bins, var, **kwargs): bin_centers = (bins[:-1] + bins[1:]) / 2 mu = bin_centers[np.argmax(hist)] try: @@ -497,18 +484,21 @@ def guess(self, hist, bins, var, **kwargs): ) guess_dict = {"n_events": ls_guess, "mu": mu, "sigma": sigma} + for key, guess in guess_dict.items(): + if np.isnan(guess): + guess_dict[key]=0 - return self._replace_values(guess_dict, **kwargs) + return gaussian._replace_values(guess_dict, **kwargs) - def bounds(self, gpars, **kwargs): + def bounds(gpars, **kwargs): bounds_dict = {"n_events": (0, None), "mu": (None, None), "sigma": (0, None)} return [ bound - for field, bound in self._replace_values(bounds_dict, **kwargs).items() + for field, bound in gaussian._replace_values(bounds_dict, **kwargs).items() ] - def fixed(self, **kwargs): + def fixed(**kwargs): fixed_dict = { "n_events": False, "mu": False, @@ -516,13 +506,12 @@ def fixed(self, **kwargs): } return [ - fixed for field, fixed in self._replace_values(fixed_dict, **kwargs).items() + fixed for field, fixed in gaussian._replace_values(fixed_dict, **kwargs).items() ] class drift_time_distribution(PDF): def pdf( - self, x, n_sig1, mu1, @@ -544,7 +533,6 @@ def pdf( return gauss1 + gauss2 def extended_pdf( - self, x, n_sig1, mu1, @@ -559,7 +547,7 @@ def extended_pdf( components, ): if components is True: - gauss1, gauss2 = self.pdf( + gauss1, gauss2 = drift_time_distribution.pdf( x, n_sig1, mu1, @@ -576,7 +564,7 @@ def extended_pdf( return n_sig1 + n_sig2, gauss1, gauss2 else: - return n_sig1 + n_sig2, self.pdf( + return n_sig1 + n_sig2, drift_time_distribution.pdf( x, n_sig1, mu1, @@ -591,7 +579,7 @@ def extended_pdf( components, ) - def guess(self, hist: np.array, bins: np.array, var: np.array, **kwargs) -> list: + def guess(hist: np.array, bins: np.array, var: np.array, **kwargs) -> list: """ Guess for fitting dt spectrum """ @@ -642,10 +630,13 @@ def guess(self, hist: np.array, bins: np.array, var: np.array, **kwargs) -> list "tau2": 0.1, "components": 0, } + for key, guess in guess_dict.items(): + if np.isnan(guess): + guess_dict[key]=0 - return self._replace_values(guess_dict, **kwargs) + return drift_time_distribution._replace_values(guess_dict, **kwargs) - def bounds(self, guess, **kwargs): + def bounds(guess, **kwargs): bounds_dict = { "n_sig1": (0, None), "mu1": (None, None), @@ -662,10 +653,10 @@ def bounds(self, guess, **kwargs): return [ bound - for field, bound in self._replace_values(bounds_dict, **kwargs).items() + for field, bound in drift_time_distribution._replace_values(bounds_dict, **kwargs).items() ] - def fixed(self, **kwargs): + def fixed(**kwargs): fixed_dict = { "n_sig1": False, "mu1": False, @@ -681,93 +672,39 @@ def fixed(self, **kwargs): } return [ - fixed for field, fixed in self._replace_values(fixed_dict, **kwargs).items() + fixed for field, fixed in drift_time_distribution._replace_values(fixed_dict, **kwargs).items() ] - -def tag_pulser(files, lh5_path): - pulser_df = lh5.load_dfs(files, ["timestamp", "trapTmax"], lh5_path) - pulser_props = cts.find_pulser_properties(pulser_df, energy="trapTmax") - if len(pulser_props) > 0: - final_mask = None - for entry in pulser_props: - e_cut = (pulser_df.trapTmax.values < entry[0] + entry[1]) & ( - pulser_df.trapTmax.values > entry[0] - entry[1] - ) - if final_mask is None: - final_mask = e_cut - else: - final_mask = final_mask | e_cut - ids = ~(final_mask) - log.debug(f"pulser found: {pulser_props}") - else: - ids = np.ones(len(pulser_df), dtype=bool) - log.debug(f"no pulser found") - return ids - - -def load_aoe( - files: list, - lh5_path: str, - cal_dict: dict, - params: [ - A_max, - tp_0_est, - tp_99, - dt_eff, - A_max_tri, - cuspEmax, - cuspEmax_ctc_cal, - is_valid_cal, - ], - energy_param: str, - current_param: str, -) -> tuple(np.array, np.array, np.array, np.array): - """ - Loads in the A/E parameters needed and applies calibration constants to energy - """ - - # switch this to dataframes, include timestamp - - sto = lh5.LH5Store() - - if isinstance(files, dict): - df = [] - all_files = [] - for tstamp, tfiles in files.items(): - table = sto.read_object(lh5_path, tfiles)[0] - if tstamp in cal_dict: - file_df = table.eval(cal_dict[tstamp]).get_dataframe() - else: - file_df = table.eval(cal_dict).get_dataframe() - file_df["timestamp"] = np.full(len(file_df), tstamp, dtype=object) - params.append("timestamp") - df.append(file_df) - all_files += tfiles - - df = pd.concat(df) - - elif isinstance(files, list): - table = sto.read_object(lh5_path, files)[0] - df = table.eval(cal_dict).get_dataframe() - all_files = files - - ids = tag_pulser(all_files, lh5_path) - df["is_not_pulser"] = ids - params.append("is_not_pulser") - - for col in list(df.keys()): - if col not in params: - df.drop(col, inplace=True, axis=1) - - param_dict = {} - for param in params: - # add cuts in here - if param not in df: - df[param] = lh5.load_nda(all_files, [param], lh5_path)[param] - - df["AoE_uncorr"] = np.divide(df[current_param], df[energy_param]) - return df +class pol1: + + def func(x, a, b): + return x*a + b + + def string_func(input_param): + return f"{input_param}*a+b" + + def guess(bands, means, mean_errs): + return [-1e-06, 5e-01] + +class sigma_fit: + + def func(x, a, b, c): + return np.sqrt(a + (b / (x + 10**-99)) ** c) + + def string_func(input_param): + return f"(a+(b/({input_param}+10**-99))**c)**(0.5)" + + def guess(bands, sigmas, sigma_errs): + return [np.nanpercentile(sigmas, 50) ** 2, 2, 2] + +class sigmoid_fit: + + def func(x, a, b, c, d): + return (a + b * x) * nb_erfc(c * x + d) + + + def guess(xs, ys, y_errs): + return [np.nanmax(ys) / 2, 0, 1, 1.5] def unbinned_aoe_fit( @@ -779,13 +716,12 @@ def unbinned_aoe_fit( """ hist, bins, var = pgh.get_hist(aoe, bins=500) - gauss = gaussian() - gpars = gauss.guess(hist, bins, var) + gpars = gaussian.guess(hist, bins, var) c1_min = gpars["mu"] - 2 * gpars["sigma"] c1_max = gpars["mu"] + 3 * gpars["sigma"] # Initial fit just using Gaussian - c1 = cost.UnbinnedNLL(aoe[(aoe < c1_max) & (aoe > c1_min)], gauss.pdf) + c1 = cost.UnbinnedNLL(aoe[(aoe < c1_max) & (aoe > c1_min)], gaussian.pdf) m1 = Minuit(c1, **gpars) m1.limits = [ @@ -793,7 +729,7 @@ def unbinned_aoe_fit( (gpars["mu"] * 0.8, gpars["mu"] * 1.2), (0.8 * gpars["sigma"], gpars["sigma"] * 1.2), ] - m1.fixed = gauss.fixed() + m1.fixed = gaussian.fixed() m1.migrad() if verbose: @@ -808,8 +744,7 @@ def unbinned_aoe_fit( n_bkg_guess = len(aoe[(aoe < fmax) & (aoe > fmin)]) - m1.values["n_events"] - aoe_bkg = standard_aoe_bkg() - bkg_guess = aoe_bkg.guess( + bkg_guess = standard_aoe_bkg.guess( hist, bins, var, @@ -821,19 +756,18 @@ def unbinned_aoe_fit( ) c2 = cost.ExtendedUnbinnedNLL( - aoe[(aoe < fmax_bkg) & (aoe > fmin)], aoe_bkg.extended_pdf + aoe[(aoe < fmax_bkg) & (aoe > fmin)], standard_aoe_bkg.extended_pdf ) m2 = Minuit(c2, **bkg_guess) - m2.fixed = aoe_bkg.fixed(mu=True) - m2.limits = aoe_bkg.bounds( + m2.fixed = standard_aoe_bkg.fixed(mu=True) + m2.limits = standard_aoe_bkg.bounds( bkg_guess, n_events=(0, 2 * len(aoe[(aoe < fmax_bkg) & (aoe > fmin)])) ) m2.simplex().migrad() m2.hesse() - aoe_pdf = pdf() - x0 = aoe_pdf.guess( + x0 = pdf.guess( hist, bins, var, @@ -849,14 +783,14 @@ def unbinned_aoe_fit( print(x0) # Full fit using gaussian signal with gaussian tail background - c = cost.ExtendedUnbinnedNLL(aoe[(aoe < fmax) & (aoe > fmin)], aoe_pdf.extended_pdf) + c = cost.ExtendedUnbinnedNLL(aoe[(aoe < fmax) & (aoe > fmin)], pdf.extended_pdf) m = Minuit(c, **x0) - m.limits = aoe_pdf.bounds( + m.limits = pdf.bounds( x0, n_sig=(0, 2 * len(aoe[(aoe < fmax) & (aoe > fmin)])), n_bkg=(0, 2 * len(aoe[(aoe < fmax) & (aoe > fmin)])), ) - m.fixed = aoe_pdf.fixed() + m.fixed = pdf.fixed() m.migrad() m.hesse() @@ -868,7 +802,7 @@ def unbinned_aoe_fit( m.simplex.migrad() m.hesse() except: - return aoe_pdf.return_nans() + return return_nans(pdf) if display > 1: plt.figure() @@ -877,12 +811,12 @@ def unbinned_aoe_fit( aoe[(aoe < fmax) & (aoe > fmin)], bins=200, histtype="step", label="Data" ) dx = np.diff(bins) - plt.plot(xs, aoe_pdf.pdf(xs, *m.values) * dx[0], label="Full fit") - sig, bkg = aoe_pdf.pdf(xs, *m.values[:-1], True) + plt.plot(xs, pdf.pdf(xs, *m.values) * dx[0], label="Full fit") + sig, bkg = pdf.pdf(xs, *m.values[:-1], True) plt.plot(xs, sig * dx[0], label="Signal") plt.plot(xs, bkg * dx[0], label="Background") - plt.plot(xs, gauss.pdf(xs, *m1.values) * dx[0], label="Initial Gaussian") - plt.plot(xs, aoe_bkg.pdf(xs, *m2.values) * dx[0], label="Bkg guess") + plt.plot(xs, gaussian.pdf(xs, *m1.values) * dx[0], label="Initial Gaussian") + plt.plot(xs, standard_aoe_bkg.pdf(xs, *m2.values) * dx[0], label="Bkg guess") plt.xlabel("A/E") plt.ylabel("Counts") plt.legend(loc="upper left") @@ -890,7 +824,7 @@ def unbinned_aoe_fit( plt.figure() bin_centers = (bins[1:] + bins[:-1]) / 2 - res = (aoe_pdf.pdf(bin_centers, *m.values) * dx[0]) - counts + res = (pdf.pdf(bin_centers, *m.values) * dx[0]) - counts plt.plot( bin_centers, [re / count if count != 0 else re for re, count in zip(res, counts)], @@ -953,519 +887,13 @@ def fit_time_means(tstamps, means, reses): return out_dict -def aoe_timecorr( - df, energy_param, current_param, pdf=standard_aoe, plot_dict={}, display=0 -): - if "timestamp" in df: - tstamps = sorted(np.unique(df["timestamp"])) - if len(tstamps) > 1: - means = [] - errors = [] - reses = [] - res_errs = [] - final_tstamps = [] - for tstamp, time_df in df.groupby("timestamp", sort=True): - pars, errs, cov = unbinned_aoe_fit( - time_df.query( - f"is_usable_fits & cuspEmax_ctc_cal>1000 & cuspEmax_ctc_cal<1300" - )["AoE_uncorr"], - pdf=pdf, - display=display, - ) - final_tstamps.append(tstamp) - means.append(pars["mu"]) - errors.append(errs["mu"]) - reses.append(pars["sigma"] / pars["mu"]) - res_errs.append( - reses[-1] - * np.sqrt(errs["sigma"] / pars["sigma"] + errs["mu"] / pars["mu"]) - ) - mean_dict = fit_time_means(tstamps, means, reses) - - df["AoE_timecorr"] = df["AoE_uncorr"] / np.array( - [mean_dict[tstamp] for tstamp in df["timestamp"]] - ) - out_dict = { - tstamp: { - "AoE_Timecorr": { - "expression": f"({current_param}/{energy_param})/a", - "parameters": {"a": mean_dict[tstamp]}, - } - } - for tstamp in mean_dict - } - res_dict = { - "times": tstamps, - "mean": means, - "mean_errs": errors, - "res": reses, - "res_errs": res_errs, - } - if display > 0: - fig1, ax = plt.subplots(1, 1) - ax.errorbar( - [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in tstamps], - means, - yerr=errors, - linestyle=" ", - ) - ax.step( - [ - datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") - for tstamp in list(mean_dict) - ], - [mean_dict[tstamp] for tstamp in mean_dict], - where="post", - ) - ax.fill_between( - [ - datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") - for tstamp in list(mean_dict) - ], - y1=np.array([mean_dict[tstamp] for tstamp in mean_dict]) - - 0.2 * np.array(reses), - y2=np.array([mean_dict[tstamp] for tstamp in mean_dict]) - + 0.2 * np.array(reses), - color="green", - alpha=0.2, - ) - ax.fill_between( - [ - datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") - for tstamp in list(mean_dict) - ], - y1=np.array([mean_dict[tstamp] for tstamp in mean_dict]) - - 0.4 * np.array(reses), - y2=np.array([mean_dict[tstamp] for tstamp in mean_dict]) - + 0.4 * np.array(reses), - color="yellow", - alpha=0.2, - ) - ax.set_xlabel("time") - ax.set_ylabel("A/E mean") - myFmt = mdates.DateFormatter("%b %d") - ax.xaxis.set_major_formatter(myFmt) - plot_dict["aoe_time"] = fig1 - if display > 1: - plt.show() - else: - plt.close() - fig2, ax = plt.subplots(1, 1) - ax.errorbar( - [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in tstamps], - reses, - yerr=res_errs, - linestyle=" ", - ) - ax.set_xlabel("time") - ax.set_ylabel("A/E res") - myFmt = mdates.DateFormatter("%b %d") - ax.xaxis.set_major_formatter(myFmt) - plot_dict["aoe_res"] = fig2 - if display > 1: - plt.show() - else: - plt.close() - return df, out_dict, res_dict, plot_dict - else: - return df, out_dict, res_dict - else: - pars, errs, cov = unbinned_aoe_fit( - df.query("is_usable_fits & cuspEmax_ctc_cal>1000 & cuspEmax_ctc_cal<1300")[ - "AoE_uncorr" - ] - ) - df["AoE_timecorr"] = df["AoE_uncorr"] / pars["mu"] - out_dict = { - "AoE_Timecorr": { - "expression": f"({current_param}/{energy_param})/a", - "parameters": {"a": pars["mu"]}, - } - } - res_err = (pars["sigma"] / pars["mu"]) * np.sqrt( - errs["sigma"] / pars["sigma"] + errs["mu"] / pars["mu"] - ) - fit_result = { - "times": [np.nan], - "mean": [pars["mu"]], - "mean_errs": [errs["mu"]], - "res": [pars["sigma"] / pars["mu"]], - "res_errs": [res_err], - } - if display > 0: - return df, out_dict, fit_result, plot_dict - else: - return df, out_dict, fit_result - - -def pol1(x: np.array, a: float, b: float) -> np.array: - """Basic Polynomial for fitting A/E centroid against energy""" - return a * x + b - - -def sigma_fit(x: np.array, a: float, b: float, c: float) -> np.array: - """Function definition for fitting A/E sigma against energy""" - return np.sqrt(a + (b / (x + 10**-99)) ** c) - - -def AoEcorrection( - energy: np.array, - aoe: np.array, - eres: list, - pdf=standard_aoe, - plot_dict: dict = {}, - display: int = 0, - comptBands_width=20, - sigma_func=sigma_fit, -) -> tuple(np.array, np.array): - """ - Calculates the corrections needed for the energy dependence of the A/E. - Does this by fitting the compton continuum in slices and then applies fits to the centroid and variance. - """ - - comptBands = np.arange(900, 2350, comptBands_width) - peaks = np.array([1080, 1094, 1459, 1512, 1552, 1592, 1620, 1650, 1670, 1830, 2105]) - allowed = np.array([], dtype=bool) - for i, band in enumerate(comptBands): - allow = True - for peak in peaks: - if (peak - 5) > band and (peak - 5) < (band + comptBands_width): - allow = False - elif (peak + 5 > band) and (peak + 5) < (band + comptBands_width): - allow = False - allowed = np.append(allowed, allow) - comptBands = comptBands[allowed] - - results_dict = {} - comptBands = comptBands[::-1] # Flip so color gets darker when plotting - compt_aoe = np.zeros(len(comptBands)) - aoe_sigmas = np.zeros(len(comptBands)) - compt_aoe_err = np.zeros(len(comptBands)) - aoe_sigmas_err = np.zeros(len(comptBands)) - ratio = np.zeros(len(comptBands)) - ratio_err = np.zeros(len(comptBands)) - - copper = cm = plt.get_cmap("copper") - cNorm = mcolors.Normalize(vmin=0, vmax=len(comptBands)) - scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=copper) - - if display > 0: - fits_fig = plt.figure() - - # Fit each compton band - for i, band in enumerate(comptBands): - aoe_tmp = aoe[ - (energy > band) & (energy < band + comptBands_width) & (aoe > 0) - ] # [:20000] - try: - aoe_pdf = pdf() - pars, errs, cov = unbinned_aoe_fit(aoe_tmp, pdf=pdf, display=display) - compt_aoe[i], compt_aoe_err[i] = aoe_pdf.centroid(pars, errs, cov) - aoe_sigmas[i], aoe_sigmas_err[i] = aoe_pdf.width(pars, errs, cov) - - ratio[i] = pars["n_sig"] / pars["n_bkg"] - ratio_err[i] = ratio[i] * np.sqrt( - (errs["n_sig"] / pars["n_sig"]) ** 2 - + (errs["n_bkg"] / pars["n_bkg"]) ** 2 - ) - - except: - compt_aoe[i] = np.nan - aoe_sigmas[i] = np.nan - compt_aoe_err[i] = np.nan - aoe_sigmas_err[i] = np.nan - ratio[i] = np.nan - ratio_err[i] = np.nan - - if display > 0: - if ( - np.isnan(errs["mu"]) - | np.isnan(errs["sigma"]) - | (errs["mu"] == 0) - | (errs["sigma"] == 0) - ): - pass - else: - xs = np.arange( - pars["mu"] - 4 * pars["sigma"], - pars["mu"] + 3 * pars["sigma"], - pars["sigma"] / 10, - ) - colorVal = scalarMap.to_rgba(i) - aoe_pdf = pdf() - plt.plot(xs, aoe_pdf.pdf(xs, *pars), color=colorVal) - - if display > 0: - plt.xlabel("A/E") - plt.ylabel("Expected Counts") - plt.title("Compton Band Fits") - cbar = plt.colorbar( - cmx.ScalarMappable(norm=cNorm, cmap=plt.get_cmap("copper_r")), - orientation="horizontal", - label="Compton Band Energy", - ticks=[0, 16, 32, len(comptBands)], - ) # cax=ax, - cbar.ax.set_xticklabels( - [ - comptBands[::-1][0], - comptBands[::-1][16], - comptBands[::-1][32], - comptBands[::-1][-1], - ] - ) - plot_dict["band_fits"] = fits_fig - if display > 1: - plt.show() - else: - plt.close() - - ids = ( - np.isnan(compt_aoe_err) - | np.isnan(aoe_sigmas_err) - | (aoe_sigmas_err == 0) - | (compt_aoe_err == 0) - ) - results_dict["n_of_valid_fits"] = len(np.where(~ids)[0]) - # Fit mus against energy - p0_mu = [-1e-06, 5e-01] - c_mu = cost.LeastSquares( - comptBands[~ids], compt_aoe[~ids], compt_aoe_err[~ids], pol1 - ) - c_mu.loss = "soft_l1" - m_mu = Minuit(c_mu, *p0_mu) - m_mu.simplex() - m_mu.migrad() - m_mu.hesse() - - pars = m_mu.values - errs = m_mu.errors - - csqr_mu = np.sum( - ((compt_aoe[~ids] - pol1(comptBands[~ids], *pars)) ** 2) / compt_aoe_err[~ids] - ) - dof_mu = len(compt_aoe[~ids]) - len(pars) - results_dict["p_val_mu"] = chi2.sf(csqr_mu, dof_mu) - results_dict["csqr_mu"] = (csqr_mu, dof_mu) - - # Fit sigma against energy - p0_sig = [np.nanpercentile(aoe_sigmas[~ids], 50) ** 2, 2, 2] - c_sig = cost.LeastSquares( - comptBands[~ids], aoe_sigmas[~ids], aoe_sigmas_err[~ids], sigma_func - ) - c_sig.loss = "soft_l1" - m_sig = Minuit(c_sig, *p0_sig) - m_sig.simplex() - m_sig.migrad() - m_sig.hesse() - - sig_pars = m_sig.values - sig_errs = m_sig.errors - - csqr_sig = np.sum( - ((aoe_sigmas[~ids] - sigma_func(comptBands[~ids], *sig_pars)) ** 2) - / aoe_sigmas_err[~ids] - ) - dof_sig = len(aoe_sigmas[~ids]) - len(sig_pars) - results_dict["p_val_sig"] = chi2.sf(csqr_sig, dof_sig) - results_dict["csqr_sig"] = (csqr_sig, dof_sig) - - model = pol1(comptBands, *pars) - sig_model = sigma_func(comptBands, *sig_pars) - - # Get DEP fit - sigma = np.sqrt(eres[0] + 1592 * eres[1]) / 2.355 - n_sigma = 4 - peak = 1592 - emin = peak - n_sigma * sigma - emax = peak + n_sigma * sigma - try: - dep_pars, dep_err, dep_cov = unbinned_aoe_fit( - aoe[(energy > emin) & (energy < emax) & (aoe > 0)], pdf=pdf - ) - except: - dep_pars, dep_err, dep_cov = return_nans(pdf) - - if display > 0: - mean_fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True) - ax1.errorbar( - comptBands[~ids] + 10, - compt_aoe[~ids], - yerr=compt_aoe_err[~ids], - xerr=10, - label="data", - linestyle=" ", - ) - ax1.plot(comptBands[~ids] + 10, model[~ids], label="linear model") - ax1.errorbar( - 1592, - dep_pars["mu"], - xerr=n_sigma * sigma, - yerr=dep_err["mu"], - label="DEP", - color="green", - linestyle=" ", - ) - - ax1.legend(title="A/E mu energy dependence", frameon=False) - - ax1.set_ylabel("raw A/E (a.u.)", ha="right", y=1) - ax2.scatter( - comptBands[~ids] + 10, - 100 * (compt_aoe[~ids] - model[~ids]) / model[~ids], - lw=1, - c="b", - ) - ax2.scatter( - 1592, - 100 * (dep_pars["mu"] - pol1(1592, *pars)) / pol1(1592, *pars), - lw=1, - c="g", - ) - ax2.set_ylabel("Residuals %", ha="right", y=1) - ax2.set_xlabel("Energy (keV)", ha="right", x=1) - plt.tight_layout() - plot_dict["mean_fit"] = mean_fig - if display > 1: - plt.show() - else: - plt.close() - - sig_fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True) - ax1.errorbar( - comptBands[~ids] + 10, - aoe_sigmas[~ids], - yerr=aoe_sigmas_err[~ids], - xerr=10, - label="data", - linestyle=" ", - ) - if sigma_func == sigma_fit: - label = f"sqrt model: \nsqrt({sig_pars[0]:1.4f}+({sig_pars[1]:1.1f}/E)^{sig_pars[2]:1.1f})" - elif sigma_func == sigma_fit_quadratic: - label = f"quad model: \n({sig_pars[0]:1.4f}+({sig_pars[1]:1.6f}*E)+\n({sig_pars[2]:1.6f}*E)^2)" - else: - raise ValueError("unknown sigma function") - ax1.plot( - comptBands[~ids], - sig_model[~ids], - label=label, - ) - ax1.errorbar( - 1592, - dep_pars["sigma"], - xerr=n_sigma * sigma, - yerr=dep_err["sigma"], - label="DEP", - color="green", - ) - ax1.set_ylabel("A/E stdev (a.u.)", ha="right", y=1) - ax1.legend(title="A/E stdev energy dependence", frameon=False) - ax2.scatter( - comptBands[~ids] + 10, - 100 * (aoe_sigmas[~ids] - sig_model[~ids]) / sig_model[~ids], - lw=1, - c="b", - ) - ax2.scatter( - 1592, - 100 - * (dep_pars["sigma"] - sigma_func(1592, *sig_pars)) - / sigma_func(1592, *sig_pars), - lw=1, - c="g", - ) - ax2.set_ylabel("Residuals", ha="right", y=1) - ax2.set_xlabel("Energy (keV)", ha="right", x=1) - plt.tight_layout() - plot_dict["sigma_fit"] = sig_fig - if display > 1: - plt.show() - else: - plt.close() - return pars, sig_pars, results_dict, dep_pars, plot_dict - else: - return pars, sig_pars, results_dict, dep_pars - - -def plot_compt_bands_overlayed( - aoe: np.array, - energy: np.array, - eranges: list[tuple], - aoe_range: list[float] = None, - density=True, -) -> None: - """ - Function to plot various compton bands to check energy dependence and corrections - """ - - for erange in eranges: - range_idxs = (energy > erange - 10) & (energy < erange + 10) - hist, bins, var = pgh.get_hist( - aoe[range_idxs][ - (~np.isnan(aoe[range_idxs])) - & (aoe[range_idxs] > np.nanpercentile(aoe[range_idxs], 1)) - & (aoe[range_idxs] < np.nanpercentile(aoe[range_idxs], 99)) - ], - bins=100, - ) - bin_cs = (bins[1:] + bins[:-1]) / 2 - mu = bin_cs[np.argmax(hist)] - if aoe_range is not None: - idxs = ( - (energy > erange - 10) - & (energy < erange + 10) - & (aoe > aoe_range[0]) - & (aoe < aoe_range[1]) - & (~np.isnan(aoe)) - ) - bins = np.linspace(aoe_range[0], aoe_range[1], 50) - else: - idxs = (energy > erange - 10) & (energy < erange + 10) & (~np.isnan(aoe)) - bins = np.linspace(0.85, 1.05, 200) - plt.hist( - aoe[idxs], - bins=bins, - histtype="step", - label=f"{erange-10}-{erange+10}", - density=density, - ) - - -def plot_dt_dep( - aoe: np.array, energy: np.array, dt: np.array, erange: list[tuple], title: str -) -> None: - """ - Function to produce 2d histograms of A/E against drift time to check dependencies - """ - - hist, bins, var = pgh.get_hist( - aoe[(energy > erange[0]) & (energy < erange[1]) & (~np.isnan(aoe))], bins=500 - ) - bin_cs = (bins[1:] + bins[:-1]) / 2 - mu = bin_cs[np.argmax(hist)] - aoe_range = [mu * 0.9, mu * 1.1] - - idxs = ( - (energy > erange[0]) - & (energy < erange[1]) - & (aoe > aoe_range[0]) - & (aoe < aoe_range[1]) - & (dt < 2000) - ) - - plt.hist2d(aoe[idxs], dt[idxs], bins=[200, 100], norm=LogNorm()) - plt.ylabel("Drift Time (ns)") - plt.xlabel("A/E") - plt.title(title) - - -def energy_guess(hist, bins, var, func_i, peak, eres_pars, fit_range): +def energy_guess(hist, bins, var, func_i, peak, eres, fit_range): """ Simple guess for peak fitting """ if func_i == pgf.extended_radford_pdf: bin_cs = (bins[1:] + bins[:-1]) / 2 - sigma = thc.fwhm_slope(peak, *eres_pars) / 2.355 + sigma = eres / 2.355 i_0 = np.nanargmax(hist) mu = peak height = hist[i_0] @@ -1496,12 +924,15 @@ def energy_guess(hist, bins, var, func_i, peak, eres_pars, fit_range): fit_range[0], fit_range[1], 0, - ] # + ] + for i, guess in enumerate(parguess): + if np.isnan(guess): + parguess[i]=0 return parguess elif func_i == pgf.extended_gauss_step_pdf: mu = peak - sigma = thc.fwhm_slope(peak, *eres_pars) / 2.355 + sigma = eres / 2.355 i_0 = np.argmax(hist) bg = np.mean(hist[-10:]) step = bg - np.mean(hist[:10]) @@ -1514,13 +945,18 @@ def energy_guess(hist, bins, var, func_i, peak, eres_pars, fit_range): nbkg_guess = 0 if nsig_guess < 0: nsig_guess = 0 - return [nsig_guess, mu, sigma, nbkg_guess, hstep, fit_range[0], fit_range[1], 0] + + parguess=[nsig_guess, mu, sigma, nbkg_guess, hstep, fit_range[0], fit_range[1], 0] + for i, guess in enumerate(parguess): + if np.isnan(guess): + parguess[i]=0 + return parguess def unbinned_energy_fit( energy: np.array, peak: float, - eres_pars: list = None, + eres: list, simplex=False, guess=None, display=0, @@ -1536,7 +972,7 @@ def unbinned_energy_fit( except ValueError: pars, errs, cov = return_nans(pgf.radford_pdf) return pars, errs - sigma = thc.fwhm_slope(peak, *eres_pars) / 2.355 + sigma = eres / 2.355 if guess is None: x0 = energy_guess( hist, @@ -1544,7 +980,7 @@ def unbinned_energy_fit( var, pgf.extended_gauss_step_pdf, peak, - eres_pars, + eres, (np.nanmin(energy), np.nanmax(energy)), ) c = cost.ExtendedUnbinnedNLL(energy, pgf.extended_gauss_step_pdf) @@ -1588,7 +1024,7 @@ def unbinned_energy_fit( var, pgf.extended_radford_pdf, peak, - eres_pars, + eres, (np.nanmin(energy), np.nanmax(energy)), ) x0[0] = x1[0] @@ -1661,7 +1097,7 @@ def get_peak_label(peak: float) -> str: def get_survival_fraction( energy, - aoe, + cut_param, cut_val, peak, eres_pars, @@ -1669,17 +1105,23 @@ def get_survival_fraction( guess_pars_cut=None, guess_pars_surv=None, dt_mask=None, + mode= "greater", display=0, ): if dt_mask is None: - dt_mask = np.full(len(aoe), True, dtype=bool) + dt_mask = np.full(len(cut_param), True, dtype=bool) - nan_idxs = np.isnan(aoe) + nan_idxs = np.isnan(cut_param) if high_cut is not None: - idxs = (aoe > cut_val) & (aoe < high_cut) & dt_mask + idxs = (cut_param > cut_val) & (cut_param < high_cut) & dt_mask else: - idxs = (aoe > cut_val) & dt_mask - + if mode == "greater": + idxs = (cut_param > cut_val) & dt_mask + elif mode == "less": + idxs = (cut_param < cut_val) & dt_mask + else: + raise ValueError("mode not recognised") + if guess_pars_cut is None or guess_pars_surv is None: pars, errs = unbinned_energy_fit(energy, peak, eres_pars, simplex=True) guess_pars_cut = pars @@ -1717,1167 +1159,1378 @@ def get_survival_fraction( return sf, err, cut_pars, surv_pars -def get_aoe_cut_fit( - energy: np.array, - aoe: np.array, - peak: float, - ranges: tuple(int, int), - dep_acc: float, - eres_pars: list, - display: int = 1, - dep_correct: bool = False, - dep_mu: Callable = None, - sig_func: Callable = None, - plot_dict={}, -) -> float: - """ - Determines A/E cut by sweeping through values and for each one fitting the DEP to determine how many events survive. - Then interpolates to get cut value at desired DEP survival fraction (typically 90%) - """ - - min_range, max_range = ranges - - peak_energy = energy[ - (energy > peak - min_range) & (energy < peak + max_range) - ] # [:20000] - peak_aoe = aoe[ - (energy > peak - min_range) & (energy < peak + max_range) - ] # [:20000] - - if dep_correct is True: - peak_aoe = (peak_aoe / dep_mu(peak_energy)) - 1 - peak_aoe = peak_aoe / sig_func(peak_energy) - - cut_vals = np.arange(-8, 0, 0.2) - sfs = [] - sf_errs = [] - for cut_val in cut_vals: - sf, err, cut_pars, surv_pars = get_survival_fraction( - peak_energy, - peak_aoe, - cut_val, - peak, - eres_pars, - guess_pars_cut=None, - guess_pars_surv=None, - ) - sfs.append(sf) - sf_errs.append(err) - - # return cut_vals, sfs, sf_errs - ids = (sf_errs < (1.5 * np.nanpercentile(sf_errs, 85))) & (~np.isnan(sf_errs)) - - def fit_func(x, a, b, c, d): - return (a + b * x) * nb_erfc(c * x + d) - - c = cost.LeastSquares( - cut_vals[ids], np.array(sfs)[ids], np.array(sf_errs)[ids], fit_func - ) - c.loss = "soft_l1" - m1 = Minuit(c, np.nanmax(sfs) / 2, 0, 1, 1.5) - m1.simplex().migrad() - xs = np.arange(np.nanmin(cut_vals[ids]), np.nanmax(cut_vals[ids]), 0.01) - p = fit_func(xs, *m1.values) - cut_val = round(xs[np.argmin(np.abs(p - (100 * 0.9)))], 3) - - if display > 0: - fig = plt.figure() - plt.errorbar( - cut_vals[ids], - np.array(sfs)[ids], - yerr=np.array(sf_errs)[ids], - linestyle=" ", - ) - - plt.plot(xs, p) - plt.hlines((100 * dep_acc), -8.1, cut_val, color="red", linestyle="--") - plt.vlines( - cut_val, - np.nanmin(np.array(sfs)[ids]) * 0.9, - (100 * dep_acc), - color="red", - linestyle="--", - ) - plt.xlabel("cut value") - plt.ylabel("survival percentage") - plt.xlim([-8.1, 0.1]) - plt.ylim([np.nanmin(np.array(sfs)[ids]) * 0.9, 102]) - plot_dict["cut_determination"] = fig - if display > 1: - plt.show() - else: - plt.close() - return cut_val, plot_dict - else: - return cut_val - - -def get_sf( +def get_sf_sweep( energy: np.array, - aoe: np.array, + cut_param: np.array, + final_cut_value: float, peak: float, - fit_width: tuple(int, int), - aoe_cut_val: float, eres_pars: list, - dt_mask: np.array = None, - display: int = 0, -) -> tuple(np.array, np.array, np.array, float, float): + dt_mask = None, + cut_range = (-5,5), + n_samples = 51, + mode= "greater" +) -> tuple(pd.DataFrame, float, float): """ Calculates survival fraction for gamma lines using fitting method as in cut determination """ if dt_mask is None: - dt_mask = np.full(len(aoe), True, dtype=bool) + dt_mask = np.full(len(cut_param), True, dtype=bool) - min_range = peak - fit_width[0] - max_range = peak + fit_width[1] - if peak == "1592.5": - peak_energy = energy[ - (energy > min_range) & (energy < max_range) & (~np.isnan(aoe)) - ] - peak_aoe = aoe[(energy > min_range) & (energy < max_range) & (~np.isnan(aoe))] - peak_dt_mask = dt_mask[ - (energy > min_range) & (energy < max_range) & (~np.isnan(aoe)) - ] - else: - peak_energy = energy[ - (energy > min_range) & (energy < max_range) & (~np.isnan(aoe)) - ] - peak_aoe = aoe[(energy > min_range) & (energy < max_range) & (~np.isnan(aoe))] - peak_dt_mask = dt_mask[ - (energy > min_range) & (energy < max_range) & (~np.isnan(aoe)) - ] - # if len(peak_aoe)>50000: - # rng = np.random.default_rng(10) - # rands = rng.choice(len(peak_aoe),50000,replace=False) - # rands.sort() - # peak_energy = peak_energy[rands] - # peak_aoe = peak_aoe[rands] - # peak_dt_mask = peak_dt_mask[rands] - - pars, errors = unbinned_energy_fit(peak_energy, peak, eres_pars, simplex=False) - pc_n = pars["n_sig"] - pc_err = errors["n_sig"] - sfs = [] - sf_errs = [] - - cut_vals = np.arange(-5, 5, 0.2) - final_cut_vals = [] + cut_vals = np.linspace(cut_range[0], cut_range[1], n_samples) + out_df = pd.DataFrame(columns=["cut_val", "sf", "sf_err"]) for cut_val in cut_vals: try: sf, err, cut_pars, surv_pars = get_survival_fraction( - peak_energy, peak_aoe, cut_val, peak, eres_pars, dt_mask=peak_dt_mask + energy, cut_param, cut_val, peak, eres_pars, dt_mask=dt_mask, mode=mode ) - if np.isnan(cut_pars).all() == False and np.isnan(surv_pars).all() == False: - guess_pars_cut = cut_pars - guess_pars_surv = surv_pars + out_df = pd.concat([out_df, + pd.DataFrame([{"cut_val":cut_val, + "sf":sf, + "sf_err":err}])]) except: - sf = np.nan - err = np.nan - sfs.append(sf) - sf_errs.append(err) - final_cut_vals.append(cut_val) - ids = ( - (sf_errs < (5 * np.nanpercentile(sf_errs, 50))) - & (~np.isnan(sf_errs)) - & (np.array(sfs) < 100) - ) + pass + out_df.set_index("cut_val", inplace=True) sf, sf_err, cut_pars, surv_pars = get_survival_fraction( - peak_energy, peak_aoe, aoe_cut_val, peak, eres_pars, dt_mask=peak_dt_mask + energy, cut_param, final_cut_value, peak, eres_pars, dt_mask=dt_mask, mode=mode ) - - if display > 0: - plt.figure() - plt.errorbar(cut_vals, sfs, sf_errs) - plt.show() - return ( - np.array(final_cut_vals)[ids], - np.array(sfs)[ids], - np.array(sf_errs)[ids], + out_df.query(f'sf_err<5*{np.nanpercentile(out_df["sf_err"], 50)}& sf_err==sf_err & sf<=100'), sf, sf_err, - ) + ) +def compton_sf(cut_param, low_cut_val, high_cut_val = None, mode="greater", dt_mask=None): + + if dt_mask is None: + dt_mask = np.full(len(cut_param), True, dtype=bool) + + if high_cut_val is not None: + mask = (cut_param > low_cut_val) & (cut_param < high_cut_val) & dt_mask + else: + if mode == "greater": + mask = (cut_param > low_cut_val) & dt_mask + elif mode == "less": + mask = (cut_param < low_cut_val) & dt_mask + else: + raise ValueError("mode not recognised") + + sf = 100 * len(cut_param[mask]) / len(cut_param) + sf_err = sf* np.sqrt( + (1 / len(cut_param)) + 1 / (len(cut_param[mask]) + 10**-99) + ) + return {"low_cut":low_cut_val, "sf":sf, "sf_err":sf_err, "high_cut": high_cut_val} -def compton_sf( +def compton_sf_sweep( energy: np.array, - aoe: np.array, - cut: float, + cut_param: np.array, + final_cut_value: float, peak: float, eres: list[float, float], dt_mask: np.array = None, - display: int = 1, + cut_range = (-5,5), + n_samples = 51, + mode= "greater" ) -> tuple(float, np.array, list): """ Determines survival fraction for compton continuum by basic counting """ - fwhm = np.sqrt(eres[0] + peak * eres[1]) - - emin = peak - 2 * fwhm - emax = peak + 2 * fwhm - sfs = [] - sf_errs = [] - ids = (energy > emin) & (energy < emax) & (~np.isnan(aoe)) - aoe = aoe[ids] - if dt_mask is None: - dt_mask = np.full(len(aoe), True, dtype=bool) - else: - dt_mask = dt_mask[ids] - cut_vals = np.arange(-5, 5, 0.1) + cut_vals = np.linspace(cut_range[0], cut_range[1], n_samples) + out_df = pd.DataFrame(columns=["cut_val", "sf", "sf_err"]) + for cut_val in cut_vals: - sfs.append(100 * len(aoe[(aoe > cut_val) & dt_mask]) / len(aoe)) - sf_errs.append( - sfs[-1] - * np.sqrt( - (1 / len(aoe)) + 1 / (len(aoe[(aoe > cut_val) & dt_mask]) + 10**-99) + ct_dict = compton_sf(cut_param, cut_val, mode=mode, dt_mask=dt_mask) + df = pd.DataFrame([{"cut_val":ct_dict["low_cut"], "sf":ct_dict["sf"], "sf_err":ct_dict["sf_err"]}]) + out_df = pd.concat([out_df, df]) + out_df.set_index("cut_val", inplace=True) + + sf_dict = compton_sf(cut_param, final_cut_value, mode=mode, dt_mask=dt_mask) + + return out_df, sf_dict["sf"], sf_dict["sf_err"] + +class cal_aoe: + + def __init__(self, + cal_dicts: dict, + cal_energy_param: str, + eres_func: callable, + pdf=standard_aoe, + selection_string: str = "is_valid_cal&is_not_pulser", + dt_corr: bool = False, + dep_acc:float = 0.9, + dep_correct: bool = False, + dt_cut:dict = None, + dt_param:str = "dt_eff", + high_cut_val: int = 3, + mean_func:Callable=pol1, + sigma_func:Callable=sigma_fit, + comptBands_width:int=20, + plot_options:dict={} + ): + self.cal_dicts = cal_dicts + self.cal_energy_param = cal_energy_param + self.eres_func = eres_func + self.pdf =pdf + self.selection_string = selection_string + self.dt_corr = dt_corr + self.dt_param = "dt_eff" + self.dep_correct= dep_correct + self.dt_cut = dt_cut + self.dep_acc = dep_acc + if self.dt_cut is not None: + self.update_cal_dicts(dt_cut["cut"]) + self.dt_cut_param = dt_cut["out_param"] + self.fit_selection = f"{self.selection_string} & {self.dt_cut_param}" + self.dt_cut_hard = dt_cut["hard"] + else: + self.dt_cut_param = None + self.dt_cut_hard = False + self.fit_selection = self.selection_string + self.high_cut_val = high_cut_val + self.mean_func= mean_func + self.sigma_func=sigma_func + self.comptBands_width = comptBands_width + self.plot_options=plot_options + + def update_cal_dicts(self, update_dict): + if re.match(r"(\d{8})T(\d{6})Z", list(self.cal_dicts)[0]): + for tstamp in self.cal_dicts: + if tstamp in update_dict: + self.cal_dicts[tstamp].update(update_dict[tstamp]) + else: + self.cal_dicts[tstamp].update(update_dict) + else: + self.cal_dicts.update(update_dict) + + def aoe_timecorr( + self, + df, + aoe_param, + output_name = "AoE_Timecorr", + display=0 + ): + log.info("Starting A/E time correction") + self.timecorr_df = pd.DataFrame(columns=["timestamp", "mean", "mean_err", "res", "res_err"]) + try: + if "timestamp" in df: + tstamps = sorted(np.unique(df["timestamp"])) + means = [] + errors = [] + reses = [] + res_errs = [] + final_tstamps = [] + for tstamp, time_df in df.groupby("timestamp", sort=True): + try: + pars, errs, cov = unbinned_aoe_fit( + time_df.query( + f"{self.fit_selection} & ({self.cal_energy_param}>1000) & ({self.cal_energy_param}<1300)" + )[aoe_param], + pdf=self.pdf, + display=display, + ) + self.timecorr_df = pd.concat( + [ + self.timecorr_df, + pd.DataFrame([ + {"timestamp": tstamp, + "mean":pars["mu"], + "mean_err":errs["mu"], + "res":pars["sigma"] / pars["mu"], + "res_err":(pars["sigma"] / pars["mu"]) * np.sqrt(errs["sigma"] / pars["sigma"] + errs["mu"] / pars["mu"])} + ]), + ]) + except: + self.timecorr_df = pd.concat( + [ + self.timecorr_df, + pd.DataFrame([ + {"timestamp": tstamp, + "mean":np.nan, + "mean_err":np.nan, + "res":np.nan, + "res_err":np.nan} + ]), + ]) + self.timecorr_df.set_index("timestamp", inplace=True) + time_dict = fit_time_means(np.array(self.timecorr_df.index), + np.array(self.timecorr_df["mean"]), + np.array(self.timecorr_df["res"])) + + df[output_name] = df[aoe_param] / np.array( + [time_dict[tstamp] for tstamp in df["timestamp"]] + ) + self.update_cal_dicts({ + tstamp: { + output_name: { + "expression": f"{aoe_param}/a", + "parameters": {"a": t_dict}, + } + } + for tstamp, t_dict in time_dict.items() + }) + log.info("A/E time correction finished") + else: + try: + pars, errs, cov = unbinned_aoe_fit( + df.query(f"{self.fit_selection} & {self.cal_energy_param}>1000 & {self.cal_energy_param}<1300")[ + aoe_param + ], + pdf=self.pdf, + display=display, + ) + self.timecorr_df = pd.concat( + [ + self.timecorr_df, + pd.DataFrame([ + { + "mean":pars["mu"], + "mean_err":errs["mu"], + "res":pars["sigma"] / pars["mu"], + "res_err":(pars["sigma"] / pars["mu"]) * np.sqrt(errs["sigma"] / pars["sigma"] + errs["mu"] / pars["mu"])} + ]), + ]) + except: + self.timecorr_df = pd.concat( + [ + self.timecorr_df, + pd.DataFrame([ + { + "mean":np.nan, + "mean_err":np.nan, + "res":np.nan, + "res_err":np.nan} + ]), + ]) + df[output_name] = df[aoe_param] / pars["mu"] + self.update_cal_dicts({ + output_name: { + "expression": f"{aoe_param}/a", + "parameters": {"a": pars["mu"]}, + } + }) + log.info("A/E time correction finished") + except: + log.error("A/E time correction failed") + self.update_cal_dicts({ + output_name: { + "expression": f"{aoe_param}/a", + "parameters": {"a": np.nan}, + } + }) + + def drift_time_correction( + self, + data:pd.DataFrame, + aoe_param, + display: int = 0, + ): + """ + Calculates the correction needed to align the two drift time regions for ICPC detectors + """ + log.info("Starting A/E drift time correction") + self.dt_res_dict = {} + try: + dep_events = data.query(f"{self.fit_selection}&{self.cal_energy_param}>1582&{self.cal_energy_param}<1602&{self.cal_energy_param}=={self.cal_energy_param}&{aoe_param}=={aoe_param}") + + hist, bins, var = pgh.get_hist( + dep_events[aoe_param], + bins=500, ) - ) - sf = 100 * len(aoe[(aoe > cut) & dt_mask]) / len(aoe) - sf_err = sf * np.sqrt( - 1 / len(aoe) + 1 / (len(aoe[(aoe > cut) & dt_mask]) + 10**-99) - ) - return cut_vals, sfs, sf_errs, sf, sf_err + bin_cs = (bins[1:] + bins[:-1]) / 2 + mu = bin_cs[np.argmax(hist)] + aoe_range = [mu * 0.9, mu * 1.1] + dt_range = [np.nanpercentile(dep_events[self.dt_param], 1) , np.nanpercentile(dep_events[self.dt_param], 99)] -def get_sf_no_sweep( - energy: np.array, - aoe: np.array, - peak: float, - fit_width: tuple(int, int), - eres_pars: list, - aoe_low_cut_val: float, - aoe_high_cut_val: float = None, - dt_mask: np.array = None, - display: int = 1, -) -> tuple(float, float): - """ - Calculates survival fraction for gamma line without sweeping through values - """ + self.dt_res_dict['final_selection'] = f"{aoe_param}>{aoe_range[0]}&{aoe_param}<{aoe_range[1]}&{self.dt_param}>{dt_range[0]}&{self.dt_param}<{dt_range[1]}&{self.dt_param}=={self.dt_param}" - if dt_mask is None: - dt_mask = np.full(len(aoe), True, dtype=bool) + final_df = dep_events.query(self.dt_res_dict['final_selection']) - min_range = peak - fit_width[0] - max_range = peak + fit_width[1] - if peak == "1592.5": - peak_energy = energy[ - (energy > min_range) & (energy < max_range) & (~np.isnan(aoe)) - ] - peak_aoe = aoe[(energy > min_range) & (energy < max_range) & (~np.isnan(aoe))] - else: - peak_energy = energy[ - (energy > min_range) & (energy < max_range) & (~np.isnan(aoe)) - ] - peak_aoe = aoe[(energy > min_range) & (energy < max_range) & (~np.isnan(aoe))] - peak_dt_mask = dt_mask[ - (energy > min_range) & (energy < max_range) & (~np.isnan(aoe)) - ] - # if len(peak_aoe)>50000: - # rng = np.random.default_rng(10) - # rands = rng.choice(len(peak_aoe),50000,replace=False) - # rands.sort() - # peak_energy = peak_energy[rands] - # peak_aoe = peak_aoe[rands] - # peak_dt_mask = peak_dt_mask[rands] + hist, bins, var = pgh.get_hist( + final_df[self.dt_param], dx=10, range=(np.nanmin(final_df[self.dt_param]), np.nanmax(final_df[self.dt_param])) + ) + + gpars = self.dt_res_dict['dt_guess'] =drift_time_distribution.guess(hist, bins, var) + cost_func = cost.ExtendedUnbinnedNLL(final_df[self.dt_param], drift_time_distribution.extended_pdf) + m = Minuit(cost_func, **gpars) + m.limits = drift_time_distribution.bounds(gpars) + m.fixed = drift_time_distribution.fixed() + m.simplex().migrad() + m.hesse() + + self.dt_res_dict["dt_fit"]={"pars": m.values,"errs":m.errors, "object":m} + aoe_grp1 = self.dt_res_dict["aoe_grp1"] = f'{self.dt_param}>{m.values["mu1"] - 2 * m.values["sigma1"]} & {self.dt_param}<{m.values["mu1"] + 2 * m.values["sigma1"]}' + aoe_grp2 = self.dt_res_dict["aoe_grp2"] = f'{self.dt_param}>{m.values["mu2"] - 2 * m.values["sigma2"]} & {self.dt_param}<{m.values["mu2"] + 2 * m.values["sigma2"]}' + + aoe_pars, aoe_errs, _ = unbinned_aoe_fit(final_df.query(aoe_grp1)[aoe_param], + pdf=self.pdf, display=display) - sf, sf_err, cut_pars, surv_pars = get_survival_fraction( - peak_energy, - peak_aoe, - aoe_low_cut_val, - peak, - eres_pars, - high_cut=aoe_high_cut_val, - dt_mask=peak_dt_mask, - ) - return sf, sf_err + self.dt_res_dict["aoe_fit1"] = {"pars":aoe_pars, "errs": aoe_errs} + aoe_pars2, aoe_errs2, _ = unbinned_aoe_fit(final_df.query(aoe_grp2)[aoe_param], + pdf=self.pdf, display=display) -def compton_sf_no_sweep( - energy: np.array, - aoe: np.array, - peak: float, - eres: list[float, float], - aoe_low_cut_val: float, - aoe_high_cut_val: float = None, - dt_mask: np.array = None, - display: int = 1, -) -> float: - """ - Calculates survival fraction for compton contiuum without sweeping through values - """ + self.dt_res_dict["aoe_fit2"] = {"pars":aoe_pars2, "errs": aoe_errs2} - fwhm = np.sqrt(eres[0] + peak * eres[1]) + try: + self.alpha = (aoe_pars["mu"] - aoe_pars2["mu"]) / ( + (m.values["mu2"] * aoe_pars2["mu"]) - (m.values["mu1"] * aoe_pars["mu"]) + ) + except ZeroDivisionError: + self.alpha = 0 + self.dt_res_dict["alpha"] = self.alpha + log.info(f"dtcorr successful alpha:{self.alpha}") + data["AoE_DTcorr"] = data[aoe_param] * (1 + self.alpha * data[self.dt_param]) + except: + log.error("Drift time correction failed") + self.alpha=np.nan - emin = peak - 2 * fwhm - emax = peak + 2 * fwhm - sfs = [] - ids = (energy > emin) & (energy < emax) & (~np.isnan(aoe)) - aoe = aoe[ids] - if dt_mask is None: - dt_mask = np.full(len(aoe), True, dtype=bool) - else: - dt_mask = dt_mask[ids] - if aoe_high_cut_val is None: - sf = 100 * len(aoe[(aoe > aoe_low_cut_val)]) / len(aoe) - sf_err = sf * np.sqrt( - 1 / len(aoe) + 1 / len(aoe[(aoe > aoe_low_cut_val) & dt_mask]) - ) - else: - sf = ( - 100 - * len(aoe[(aoe > aoe_low_cut_val) & (aoe < aoe_high_cut_val) & dt_mask]) - / len(aoe) - ) - sf_err = sf * np.sqrt( - 1 / len(aoe) - + 1 / len(aoe[(aoe > aoe_low_cut_val) & (aoe < aoe_high_cut_val) & dt_mask]) - ) - return sf, sf_err + self.update_cal_dicts({ + "AoE_DTcorr": { + "expression": f"{aoe_param}*(1+a*{self.dt_param})", + "parameters": {"a": self.alpha}, + } + }) + def AoEcorrection( + self, + data:pd.DataFrame, + aoe_param:str, + display:int=0 + ): + """ + Calculates the corrections needed for the energy dependence of the A/E. + Does this by fitting the compton continuum in slices and then applies fits to the centroid and variance. + """ -def apply_dtcorr(aoe: np.array, dt: np.array, alpha: float) -> np.array: - """Aligns dt regions""" - return aoe * (1 + alpha * dt) + log.info("Starting A/E energy correction") + self.energy_corr_res_dict = {} + + comptBands = np.arange(900, 2350, self.comptBands_width) + peaks = np.array([1080, 1094, 1459, 1512, 1552, 1592, 1620, 1650, 1670, 1830, 2105]) + allowed = np.array([], dtype=bool) + for i, band in enumerate(comptBands): + allow = True + for peak in peaks: + if (peak - 5) > band and (peak - 5) < (band + self.comptBands_width): + allow = False + elif (peak + 5 > band) and (peak + 5) < (band +self.comptBands_width): + allow = False + allowed = np.append(allowed, allow) + comptBands = comptBands[allowed] + + self.energy_corr_fits = pd.DataFrame(columns=["compt_bands", "mean", "mean_err", + "sigma", "sigma_err", "ratio", "ratio_err"], dtype=float) + try: + select_df = data.query(f"{self.fit_selection} & {aoe_param}>0") + # Fit each compton band + for band in comptBands: + try: + pars, errs, cov = unbinned_aoe_fit( + select_df.query(f"{self.cal_energy_param}>{band}&{self.cal_energy_param}< {self.comptBands_width+band}")[aoe_param], + pdf=self.pdf, display=display) -def drift_time_correction( - aoe: np.array, - energy: np.array, - dt: np.array, - pdf=standard_aoe, - display: int = 0, - plot_dict: dict = {}, -) -> tuple(np.array, float): - """ - Calculates the correction needed to align the two drift time regions for ICPC detectors - """ - hist, bins, var = pgh.get_hist( - aoe[(energy > 1582) & (energy < 1602) & (~np.isnan(energy)) & (~np.isnan(aoe))], - bins=500, - ) - bin_cs = (bins[1:] + bins[:-1]) / 2 - mu = bin_cs[np.argmax(hist)] - aoe_range = [mu * 0.9, mu * 1.1] - - idxs = ( - (energy > 1582) - & (energy < 1602) - & (aoe > aoe_range[0]) - & (aoe < aoe_range[1]) - & (dt > np.nanpercentile(dt, 1)) - & (dt < np.nanpercentile(dt, 99)) - & (~np.isnan(dt)) - & (~np.isnan(aoe)) - & (~np.isnan(energy)) - ) + mean, mean_err = self.pdf.centroid(pars, errs, cov) + sigma, sigma_err = self.pdf.width(pars, errs, cov) - hist, bins, var = pgh.get_hist( - dt[idxs], dx=10, range=(np.nanmin(dt[idxs]), np.nanmax(dt[idxs])) - ) - dt_distrib = drift_time_distribution() - - gpars = dt_distrib.guess(hist, bins, var) - cost_func = cost.ExtendedUnbinnedNLL(dt[idxs], dt_distrib.extended_pdf) - m = Minuit(cost_func, **gpars) - m.limits = dt_distrib.bounds(gpars) - m.fixed = dt_distrib.fixed() - m.simplex().migrad() - m.hesse() + self.energy_corr_fits = pd.concat( + [ + self.energy_corr_fits, + pd.DataFrame([ + {"compt_bands": band+self.comptBands_width/2, + "mean":mean, + "mean_err":mean_err, + "sigma":sigma, + "sigma_err":sigma_err, + "ratio":pars["n_sig"] / pars["n_bkg"], + "ratio_err":(pars["n_sig"] / pars["n_bkg"]) *np.sqrt( + (errs["n_sig"] / pars["n_sig"]) ** 2 + + (errs["n_bkg"] / pars["n_bkg"]) ** 2 + ) + }] + ), + ]) + + except: + self.energy_corr_fits = pd.concat( + [ + self.energy_corr_fits, + pd.DataFrame( + [{"compt_bands": band, + "mean":np.nan, + "mean_err":np.nan, + "sigma":np.nan, + "sigma_err":np.nan, + "ratio":np.nan, + "ratio_err":np.nan + }] + ), + ]) + self.energy_corr_fits.set_index("compt_bands", inplace=True) + valid_fits = self.energy_corr_fits.query("mean_err==mean_err&sigma_err==sigma_err & sigma_err!=0 & mean_err!=0") + self.energy_corr_res_dict["n_of_valid_fits"] = len(valid_fits) + log.info(f"{len(valid_fits)} compton bands fit successfully") + # Fit mus against energy + p0_mu = self.mean_func.guess(valid_fits.index, valid_fits["mean"], valid_fits["mean_err"]) + c_mu = cost.LeastSquares( + valid_fits.index, valid_fits["mean"], valid_fits["mean_err"], self.mean_func.func + ) + c_mu.loss = "soft_l1" + m_mu = Minuit(c_mu, *p0_mu) + m_mu.simplex() + m_mu.migrad() + m_mu.hesse() - aoe_mask = ( - (idxs) - & (dt > m.values["mu1"] - 2 * m.values["sigma1"]) - & (dt < m.values["mu1"] + 2 * m.values["sigma1"]) - ) - aoe_pars, aoe_errs, _ = unbinned_aoe_fit(aoe[aoe_mask], pdf=pdf, display=display) + mu_pars = m_mu.values + mu_errs = m_mu.errors - aoe_mask2 = ( - (idxs) - & (dt > m.values["mu2"] - 2 * m.values["sigma2"]) - & (dt < m.values["mu2"] + 2 * m.values["sigma2"]) - ) + csqr_mu = np.sum( + ((valid_fits["mean"] - self.mean_func.func(valid_fits.index, *mu_pars)) ** 2) / valid_fits["mean_err"] + ) + dof_mu = len(valid_fits["mean"]) - len(pars) + p_val_mu = chi2.sf(csqr_mu, dof_mu) + self.mean_fit_obj = m_mu + + # Fit sigma against energy + p0_sig = self.sigma_func.guess(valid_fits.index, valid_fits["sigma"], valid_fits["sigma_err"]) + c_sig = cost.LeastSquares( + valid_fits.index, valid_fits["sigma"], valid_fits["sigma_err"], self.sigma_func.func + ) + c_sig.loss = "soft_l1" + m_sig = Minuit(c_sig, *p0_sig) + m_sig.simplex() + m_sig.migrad() + m_sig.hesse() + + sig_pars = m_sig.values + sig_errs = m_sig.errors + + csqr_sig = np.sum( + ((valid_fits["sigma"] - self.sigma_func.func(valid_fits.index, *sig_pars)) ** 2) + / valid_fits["sigma_err"] + ) + dof_sig = len(valid_fits["sigma"]) - len(sig_pars) + p_val_sig = chi2.sf(csqr_sig, dof_sig) + + self.sigma_fit_obj = m_sig + + # Get DEP fit + n_sigma = 4 + peak = 1592 + sigma = self.eres_func(peak) / 2.355 + emin = peak - n_sigma * sigma + emax = peak + n_sigma * sigma + try: + dep_pars, dep_err, _ = unbinned_aoe_fit( + select_df.query(f"{self.cal_energy_param}>{emin}&{self.cal_energy_param}<{emax}")[aoe_param], + pdf=self.pdf, + display=display + ) + except: + dep_pars, dep_err, _ = return_nans(self.pdf) + + data["AoE_Corrected"] = data[aoe_param] / self.mean_func.func(data[self.cal_energy_param], *mu_pars) + data["AoE_Classifier"] = (data["AoE_Corrected"] - 1) / self.sigma_func.func( + data[self.cal_energy_param], *sig_pars + ) + log.info("Finished A/E energy successful") + log.info(f"mean pars are {mu_pars.to_dict()}") + log.info(f"sigma pars are {sig_pars.to_dict()}") + except: + log.error("A/E energy correction failed") + mu_pars, mu_errs, mu_cov = return_nans(self.mean_func.func) + csqr_mu, dof_mu, p_val_mu = (np.nan, np.nan, np.nan) + csqr_sig, dof_sig, p_val_sig = (np.nan, np.nan, np.nan) + sig_pars, sig_errs, sig_cov = return_nans(self.sigma_func.func) + dep_pars, dep_err, dep_cov = return_nans(self.pdf) + + self.energy_corr_res_dict["mean_fits"] = {"func": self.mean_func.__name__, + "module": self.mean_func.__module__, + "expression":self.mean_func.string_func("x"), + "pars": mu_pars.to_dict(), + "errs": mu_errs.to_dict(), + "p_val_mu": p_val_mu, + "csqr_mu": (csqr_mu, dof_mu)} + + self.energy_corr_res_dict["sigma_fits"] = {"func": self.sigma_func.__name__, + "module": self.sigma_func.__module__, + "expression":self.sigma_func.string_func("x"), + "pars": sig_pars.to_dict(), + "errs": sig_errs.to_dict(), + "p_val_mu": p_val_sig, + "csqr_mu": (csqr_sig, dof_sig)} + + self.energy_corr_res_dict["dep_fit"]={"func": self.pdf.__name__, + "module": self.pdf.__module__, + "pars": dep_pars.to_dict(), + "errs": dep_err.to_dict()} + + self.update_cal_dicts({ + "AoE_Corrected": { + "expression": f"{aoe_param}/({self.mean_func.string_func(self.cal_energy_param)})", + "parameters": mu_pars.to_dict(), + }, + "AoE_Classifier": { + "expression": f"AoE_Corrected/({self.sigma_func.string_func(self.cal_energy_param)})", + "parameters": sig_pars.to_dict(), + } + }) + + def get_aoe_cut_fit( + self, + data:pd.DataFrame, + aoe_param:str, + peak: float, + ranges: tuple, + dep_acc: float, + display: int = 1 + ): + """ + Determines A/E cut by sweeping through values and for each one fitting the DEP to determine how many events survive. + Then interpolates to get cut value at desired DEP survival fraction (typically 90%) + """ - aoe_pars2, aoe_errs2, _ = unbinned_aoe_fit(aoe[aoe_mask2], pdf=pdf, display=display) + log.info("Starting A/E low cut determination") + self.low_cut_res_dict = {} + self.cut_fits = pd.DataFrame(columns=["cut_val", "sf", "sf_err"]) + min_range, max_range = ranges + + try: + select_df = data.query(f"{self.fit_selection}&({self.cal_energy_param} > {peak - min_range}) & ({self.cal_energy_param} < {peak + max_range})") + + # if dep_correct is True: + # peak_aoe = (select_df[aoe_param] / dep_mu(select_df[self.cal_energy_param])) - 1 + # peak_aoe = select_df[aoe_param] / sig_func(select_df[self.cal_energy_param]) + + cut_vals = np.arange(-8, 0, 0.2) + sfs = [] + sf_errs = [] + for cut_val in cut_vals: + sf, err, cut_pars, surv_pars = get_survival_fraction( + select_df[self.cal_energy_param].to_numpy(), + select_df[aoe_param].to_numpy(), + cut_val, + peak, + self.eres_func(peak), + guess_pars_cut=None, + guess_pars_surv=None + ) + self.cut_fits = pd.concat( + [ + self.cut_fits, + pd.DataFrame( + [{"cut_val": cut_val, + "sf":sf, + "sf_err":err, + }] + ), + ]) + self.cut_fits.set_index("cut_val", inplace=True) + valid_fits = self.cut_fits.query(f'sf_err<{(1.5 * np.nanpercentile(self.cut_fits["sf_err"],85))}&sf_err==sf_err') + + c = cost.LeastSquares( + valid_fits.index, valid_fits["sf"], valid_fits["sf_err"], sigmoid_fit.func + ) + c.loss = "soft_l1" + m1 = Minuit(c, *sigmoid_fit.guess(valid_fits.index, valid_fits["sf"], valid_fits["sf_err"])) + m1.simplex().migrad() + xs = np.arange(np.nanmin(valid_fits.index), np.nanmax(valid_fits.index), 0.01) + p = sigmoid_fit.func(xs, *m1.values) + self.cut_fit = {"function": sigmoid_fit.__name__ , "pars": m1.values.to_dict(), "errs": m1.errors.to_dict()} + self.low_cut_val = round(xs[np.argmin(np.abs(p - (100 * self.dep_acc)))], 3) + log.info(f"Cut found at {self.low_cut_val}") + + data["AoE_Low_Cut"] = (data[aoe_param]>self.low_cut_val) + if self.dt_cut_param is not None: + data["AoE_Low_Cut"] = data["AoE_Low_Cut"] &(data[self.dt_cut_param]) + data["AoE_Double_Sided_Cut"] = data["AoE_Low_Cut"] & (data[aoe_param]a) & ({self.dt_cut_param})", + "parameters": {"a": self.low_cut_val}, + }}) + else: + self.update_cal_dicts({ + "AoE_Low_Cut": { + "expression": f"({aoe_param}>a)", + "parameters": {"a": self.low_cut_val}, + }}) + self.update_cal_dicts({"AoE_Double_Sided_Cut": { + "expression": f"(a>{aoe_param}) & (AoE_Low_Cut)", + "parameters": {"a": self.high_cut_val}, + }}) + + def get_results_dict(self): + return { + "cal_energy_param": self.cal_energy_param, + "dt_param": self.dt_param, + "rt_correction": self.dt_corr, + "pdf":self.pdf.__name__, + "1000-1300keV": self.timecorr_df.to_dict("index"), + "correction_fit_results": self.energy_corr_res_dict, + "low_cut": self.low_cut_val, + "high_cut": self.high_cut_val, + "low_side_sfs": self.low_side_sf.to_dict("index"), + "2_side_sfs": self.two_side_sf.to_dict("index"), + } + + def fill_plot_dict(self, data, plot_dict={}): + for key, item in self.plot_options.items(): + if item["options"] is not None: + plot_dict[key] = item["function"](self, data, **item["options"]) + else: + plot_dict[key] = item["function"](self, data) + return plot_dict + + + def calibrate(self, df, initial_aoe_param): + self.aoe_timecorr( + df, initial_aoe_param + ) + log.info("Finished A/E time correction") + + if self.dt_corr == True: + aoe_param = "AoE_DTcorr" + self.drift_time_correction( + df, + "AoE_Timecorr" + ) + else: + aoe_param = "AoE_Timecorr" + + self.AoEcorrection( + df, + aoe_param + ) + + + self.get_aoe_cut_fit( + df, + "AoE_Classifier", + 1592, + (40, 20), + 0.9 + ) + + aoe_param = "AoE_Classifier" + log.info(" Compute low side survival fractions: ") + self.low_side_sf = pd.DataFrame(columns=["peak", "sf", "sf_err"]) + peaks_of_interest = [1592.5, 1620.5, 2039, 2103.53, 2614.50] + fit_widths = [(40, 25), (25, 40), (0, 0), (25, 40), (50, 50)] + self.low_side_peak_dfs={} + + + for i, peak in enumerate(peaks_of_interest): + try: + select_df = df.query(f"{self.selection_string}&{aoe_param}=={aoe_param}") + fwhm = self.eres_func(peak) + if peak == 2039: + emin = 2 * fwhm + emax = 2 * fwhm + peak_df = select_df.query(f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})") + + cut_df, sf, sf_err = compton_sf_sweep( + peak_df[self.cal_energy_param].to_numpy(), + peak_df[aoe_param].to_numpy(), + self.low_cut_val, + peak, + fwhm, + dt_mask=peak_df[self.dt_cut_param].to_numpy() if self.dt_cut_param is not None else None + ) + self.low_side_sf = pd.concat([self.low_side_sf, pd.DataFrame([{"peak":peak, "sf":sf, "sf_err":sf_err}])]) + self.low_side_peak_dfs[peak]=cut_df + else: + emin,emax = fit_widths[i] + peak_df = select_df.query(f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})") + cut_df, sf, sf_err = get_sf_sweep( + peak_df[self.cal_energy_param].to_numpy(), + peak_df[aoe_param].to_numpy(), + self.low_cut_val, + peak, + fwhm, + dt_mask=peak_df[self.dt_cut_param].to_numpy() if self.dt_cut_param is not None else None + ) + self.low_side_sf = pd.concat([self.low_side_sf, pd.DataFrame([{"peak":peak, "sf":sf, "sf_err":sf_err}])]) + self.low_side_peak_dfs[peak]=cut_df + log.info(f"{peak}keV: {sf:2.1f} +/- {sf_err:2.1f} %") + except: + self.low_side_sf = pd.concat([self.low_side_sf, pd.DataFrame([{"peak":peak, "sf":np.nan, "sf_err":np.nan}])]) + log.error(f"A/E Low side Survival fraction determination failed for {peak} peak") + self.low_side_sf.set_index("peak", inplace=True) + + + self.two_side_sf = pd.DataFrame(columns=["peak", "sf", "sf_err"]) + log.info("Calculating 2 sided cut sfs") + for i, peak in enumerate(peaks_of_interest): + fwhm = self.eres_func(peak) + try: + if peak == 2039: + emin = 2 * fwhm + emax = 2 * fwhm + peak_df = select_df.query(f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})") + + sf_dict = compton_sf(peak_df[aoe_param].to_numpy(), + self.low_cut_val, + self.high_cut_val, + dt_mask=peak_df[self.dt_cut_param].to_numpy() if self.dt_cut_param is not None else None) + sf = sf_dict["sf"] + sf_err = sf_dict["sf_err"] + self.two_side_sf = pd.concat([self.two_side_sf, pd.DataFrame([{"peak":peak, + "sf":sf, + "sf_err":sf_err}])]) + else: + emin, emax = fit_widths[i] + peak_df = select_df.query(f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})") + sf, sf_err, _, _ = get_survival_fraction( + peak_df[self.cal_energy_param].to_numpy(), + peak_df[aoe_param].to_numpy(), + self.low_cut_val, + peak, + fwhm, + high_cut=self.high_cut_val, + dt_mask=peak_df[self.dt_cut_param].to_numpy() if self.dt_cut_param is not None else None) + self.two_side_sf = pd.concat([self.two_side_sf, pd.DataFrame([{"peak":peak, "sf":sf, "sf_err":sf_err}])]) + log.info(f"{peak}keV: {sf:2.1f} +/- {sf_err:2.1f} %") + + except: + self.two_side_sf = pd.concat([self.two_side_sf, pd.DataFrame([{"peak":peak, "sf":np.nan, "sf_err":np.nan}])]) + log.error(f"A/E two side Survival fraction determination failed for {peak} peak") + self.two_side_sf.set_index("peak", inplace=True) + +def plot_aoe_mean_time(aoe_class, data, time_param = "AoE_Timecorr", figsize=[12, 8], fontsize=12): + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + fig, ax = plt.subplots(1, 1) try: - alpha = (aoe_pars["mu"] - aoe_pars2["mu"]) / ( - (m.values["mu2"] * aoe_pars2["mu"]) - (m.values["mu1"] * aoe_pars["mu"]) + ax.errorbar( + [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in aoe_class.timecorr_df.index], + aoe_class.timecorr_df["mean"], + yerr=aoe_class.timecorr_df["mean_err"], + linestyle=" ", ) - except ZeroDivisionError: - alpha = 0 - aoe_corrected = apply_dtcorr(aoe, dt, alpha) - if display > 0: - aoe_pdf = pdf() - dt_fig = plt.figure() + grouped_means = [cal_dict[time_param]["parameters"]["a"] for tstamp, cal_dict in aoe_class.cal_dicts.items()] + ax.step( + [ + datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") + for tstamp in aoe_class.cal_dicts + ], + grouped_means, + where="post", + ) + ax.fill_between( + [ + datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") + for tstamp in aoe_class.cal_dicts + ], + y1=np.array(grouped_means) + - 0.2 * np.array(aoe_class.timecorr_df["res"]), + y2=np.array(grouped_means) + + 0.2 * np.array(aoe_class.timecorr_df["res"]), + color="green", + alpha=0.2, + ) + ax.fill_between( + [ + datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") + for tstamp in aoe_class.cal_dicts + ], + y1=np.array(grouped_means) + - 0.4 * np.array(aoe_class.timecorr_df["res"]), + y2=np.array(grouped_means) + + 0.4 * np.array(aoe_class.timecorr_df["res"]), + color="yellow", + alpha=0.2, + ) + except:pass + ax.set_xlabel("time") + ax.set_ylabel("A/E mean") + myFmt = mdates.DateFormatter("%b %d") + ax.xaxis.set_major_formatter(myFmt) + plt.close() + return fig + +def plot_aoe_res_time(aoe_class, data, time_param = "AoE_Timecorr", figsize=[12, 8], fontsize=12): + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + fig, ax = plt.subplots(1, 1) + try: + ax.errorbar( + [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in aoe_class.timecorr_df.index], + aoe_class.timecorr_df["res"], + yerr=aoe_class.timecorr_df["res_err"], + linestyle=" ", + ) + except:pass + ax.set_xlabel("time") + ax.set_ylabel("A/E res") + myFmt = mdates.DateFormatter("%b %d") + ax.xaxis.set_major_formatter(myFmt) + plt.close() + return fig + +def drifttime_corr_plot(aoe_class, data, aoe_param = "AoE_Timecorr", aoe_param_corr="AoE_DTcorr", + figsize=[12, 8], fontsize=12): + + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + + fig = plt.figure() + try: + + dep_events = data.query(f"{aoe_class.fit_selection}&{aoe_class.cal_energy_param}>1582&{aoe_class.cal_energy_param}<1602&{aoe_class.cal_energy_param}=={aoe_class.cal_energy_param}&{aoe_param}=={aoe_param}") + final_df = dep_events.query(aoe_class.dt_res_dict['final_selection']) + + plt.subplot(2, 2, 1) - xs = np.linspace(aoe_pars["lower_range"], aoe_pars["upper_range"], 1000) + aoe_pars = aoe_class.dt_res_dict["aoe_fit1"]["pars"] + + xs = np.linspace(aoe_pars["lower_range"], aoe_pars["upper_range"], 100) counts, aoe_bins, bars = plt.hist( - aoe[ - (aoe < aoe_pars["upper_range"]) - & (aoe > aoe_pars["lower_range"]) - & aoe_mask - ], + final_df.query(f'{aoe_class.dt_res_dict["aoe_grp1"]}&{aoe_param}<{aoe_pars["upper_range"]}&{aoe_param}>{aoe_pars["lower_range"]}')[aoe_param], bins=400, histtype="step", - label="Data", + label="data", ) dx = np.diff(aoe_bins) - plt.plot(xs, aoe_pdf.pdf(xs, *aoe_pars) * dx[0], label="Full fit") - sig, bkg = aoe_pdf.pdf(xs, *aoe_pars[:-1], True) - plt.plot(xs, sig * dx[0], label="Peak fit") - plt.plot(xs, bkg * dx[0], label="Bkg fit") + plt.plot(xs, aoe_class.pdf.pdf(xs, *aoe_pars) * dx[0], label="full fit") + sig, bkg = aoe_class.pdf.pdf(xs, *aoe_pars[:-1], True) + plt.plot(xs, sig * dx[0], label="peak fit") + plt.plot(xs, bkg * dx[0], label="bkg fit") plt.legend(loc="upper left") plt.xlabel("A/E") - plt.ylabel("Counts") - + plt.ylabel("counts") + + aoe_pars2 = aoe_class.dt_res_dict["aoe_fit2"]["pars"] plt.subplot(2, 2, 2) - xs = np.linspace(aoe_pars2["lower_range"], aoe_pars2["upper_range"], 1000) + xs = np.linspace(aoe_pars2["lower_range"], aoe_pars2["upper_range"], 100) counts, aoe_bins2, bars = plt.hist( - aoe[ - (aoe < aoe_pars2["upper_range"]) - & (aoe > aoe_pars2["lower_range"]) - & aoe_mask2 - ], + final_df.query(f'{aoe_class.dt_res_dict["aoe_grp2"]}&{aoe_param}<{aoe_pars2["upper_range"]}&{aoe_param}>{aoe_pars2["lower_range"]}')[aoe_param], bins=400, histtype="step", label="Data", ) dx = np.diff(aoe_bins2) - plt.plot(xs, aoe_pdf.pdf(xs, *aoe_pars2) * dx[0], label="Full fit") - sig, bkg = aoe_pdf.pdf(xs, *aoe_pars2[:-1], True) - plt.plot(xs, sig * dx[0], label="Peak fit") - plt.plot(xs, bkg * dx[0], label="Bkg fit") + plt.plot(xs, aoe_class.pdf.pdf(xs, *aoe_pars2) * dx[0], label="full fit") + sig, bkg = aoe_class.pdf.pdf(xs, *aoe_pars2[:-1], True) + plt.plot(xs, sig * dx[0], label="peak fit") + plt.plot(xs, bkg * dx[0], label="bkg fit") plt.legend(loc="upper left") plt.xlabel("A/E") - plt.ylabel("Counts") - + plt.ylabel("counts") + + hist, bins, var = pgh.get_hist( + final_df[aoe_class.dt_param], dx=10, range=(np.nanmin(final_df[aoe_class.dt_param]), + np.nanmax(final_df[aoe_class.dt_param])) + ) + plt.subplot(2, 2, 3) - plt.step(pgh.get_bin_centers(bins), hist, label="Data") + plt.step(pgh.get_bin_centers(bins), hist, label="data") plt.plot( pgh.get_bin_centers(bins), - dt_distrib.pdf(pgh.get_bin_centers(bins), **gpars) * np.diff(bins)[0], + drift_time_distribution.pdf(pgh.get_bin_centers(bins), + **aoe_class.dt_res_dict['dt_guess']) * np.diff(bins)[0], label="Guess", ) plt.plot( pgh.get_bin_centers(bins), - dt_distrib.pdf(pgh.get_bin_centers(bins), *m.values) * np.diff(bins)[0], - label="Fit", + drift_time_distribution.pdf(pgh.get_bin_centers(bins), + *aoe_class.dt_res_dict["dt_fit"]["pars"]) * np.diff(bins)[0], + label="fit", ) - plt.xlabel("Drift Time (ns)") + plt.xlabel("drift time (ns)") plt.ylabel("Counts") plt.legend(loc="upper left") plt.subplot(2, 2, 4) bins = np.linspace( - np.nanpercentile(aoe[idxs], 1), - np.nanpercentile(aoe_corrected[idxs], 99), + np.nanpercentile(final_df[aoe_param], 1), + np.nanpercentile(final_df[aoe_param_corr], 99), 200, ) - plt.hist(aoe[idxs], bins=bins, histtype="step", label="Uncorrected") - plt.hist(aoe_corrected[idxs], bins=bins, histtype="step", label="Corrected") + plt.hist(final_df[aoe_param], bins=bins, histtype="step", label="uncorrected") + plt.hist(final_df[aoe_param_corr], bins=bins, histtype="step", label="corrected") plt.xlabel("A/E") - plt.ylabel("Counts") + plt.ylabel("counts") plt.legend(loc="upper left") plt.tight_layout() plt.xlim( - np.nanpercentile(aoe[idxs], 1), np.nanpercentile(aoe_corrected[idxs], 99) + bins[0], bins[-1] ) - - plot_dict["dt_corr"] = dt_fig - if display > 1: - plt.show() - else: - plt.close() - return alpha, plot_dict - else: - return alpha - - -def cal_aoe( - files: list, - lh5_path, - cal_dict: dict, - current_param: str, - energy_param: str, - cal_energy_param: str, - eres_pars: list, - pdf=standard_aoe, - cut_field: str = "is_valid_cal", - dt_corr: bool = False, - dep_correct: bool = False, - dt_cut: dict = None, - aoe_high_cut: int = 4, - sigma_func=sigma_fit, - display: int = 0, -) -> tuple(dict, dict): + except:pass + plt.close() + return fig + +def plot_compt_bands_overlayed(aoe_class, + data, + eranges: list[tuple], + aoe_param = "AoE_Timecorr", + aoe_range: list[float] = None, + title= "Compton Bands", + density=True, + n_bins=50, + figsize=[12, 8], fontsize=12 + ) -> None: """ - Main function for running the a/e correction and cut determination. - - dt_cut: dictionary should contain two fields "cut" containing a dictionary of the form required by the hit_config and - hard specifying whether this is a hard cut so these events should be removed (e.g. tail to high A/E) or soft cut - where these events are just not used for the A/E fits and cut determination (e.g. tail to low A/E) + Function to plot various compton bands to check energy dependence and corrections """ - params = [ - current_param, - "tp_0_est", - "tp_99", - "dt_eff", - energy_param, - cal_energy_param, - cut_field, - ] - if dt_cut is not None: - if re.match(r"(\d{8})T(\d{6})Z", list(cal_dict)[0]): - for tstamp in cal_dict: - cal_dict[tstamp].update(dt_cut["cut"]) - else: - cal_dict.update(dt_cut["cut"]) - dt_cut_field = list(dt_cut["cut"])[0] - params.append(dt_cut_field) - else: - dt_cut_field = None - df = load_aoe( - files, - lh5_path, - cal_dict, - params, - energy_param=energy_param, - current_param=current_param, - ) - if dt_cut is not None: - df["dt_cut"] = df[list(dt_cut["cut"])[0]] - else: - df["dt_cut"] = np.full(len(df), True, dtype=bool) + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + + fig = plt.figure() + + for erange in eranges: + try: + select_df = data.query(f'{aoe_class.selection_string}&{aoe_class.cal_energy_param}>{erange[0]}&{aoe_class.cal_energy_param}<{erange[1]}&{aoe_param}=={aoe_param}') + if aoe_range is not None: + select_df = select_df.query(f'{aoe_param}>{aoe_range[0]}&{aoe_param}<{aoe_range[1]}') + bins = np.linspace(aoe_range[0], aoe_range[1], n_bins) + else: + bins = np.linspace(0.85, 1.05, n_bins) + plt.hist( + select_df[aoe_param], + bins=bins, + histtype="step", + label=f"{erange[0]}-{erange[1]}", + density=density, + ) + except:pass + plt.ylabel("counts") + plt.xlabel(aoe_param) + plt.title(title) + plt.legend(loc="upper left") + plt.close() + return fig + +def plot_dt_dep(aoe_class, + data, + eranges: list[tuple], + titles:list=None, + aoe_param = "AoE_Timecorr", + bins=[200, 100], + dt_max = 2000, + figsize=[12, 8], fontsize=12 + ) -> None: + """ + Function to produce 2d histograms of A/E against drift time to check dependencies + """ + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + + fig = plt.figure() + for i,erange in enumerate(eranges): + try: + plt.subplot(3, 2, i+1) + select_df = data.query(f'{aoe_class.selection_string}&{aoe_class.cal_energy_param}<{erange[1]}&{aoe_class.cal_energy_param}>{erange[0]}&{aoe_param}=={aoe_param}') - df["is_usable_fits"] = df[cut_field] & df["is_not_pulser"] & df["dt_cut"] + hist, bs, var = pgh.get_hist( + select_df[aoe_param], bins=500 + ) + bin_cs = (bs[1:] + bs[:-1]) / 2 + mu = bin_cs[np.argmax(hist)] + aoe_range = [mu * 0.9, mu * 1.1] + + + final_df = select_df.query(f'{aoe_param}<{aoe_range[1]}&{aoe_param}>{aoe_range[0]}&{aoe_class.dt_param}<{dt_max}') + plt.hist2d(final_df[aoe_param], final_df[aoe_class.dt_param], + bins=bins, norm=LogNorm()) + plt.ylabel("drift time (ns)") + plt.xlabel("A/E") + if titles is None: + plt.title(f'{erange[0]}-{erange[1]}') + else: + plt.title(titles[i]) + except:pass + plt.tight_layout() + plt.close() + return fig + +def plot_mean_fit(aoe_class, + data, + figsize=[12, 8], fontsize=12 + ) -> plt.figure: + + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True) try: - df, timecorr_dict, res_dict = aoe_timecorr( - df, energy_param, current_param, pdf=pdf + ax1.errorbar( + aoe_class.energy_corr_fits.index, + aoe_class.energy_corr_fits["mean"], + yerr=aoe_class.energy_corr_fits["mean_err"], + xerr=aoe_class.comptBands_width/2, + label="data", + linestyle=" ", ) - log.info("Finished A/E time correction") - except: - log.info("A/E time correction failed") - res_dict = {} - timecorr_dict = { - "AoE_Timecorr": { - "expression": f"({current_param}/{energy_param})/a", - "parameters": {"a": np.nan}, - } - } - if re.match(r"(\d{8})T(\d{6})Z", list(cal_dict)[0]): - for tstamp in cal_dict: - if tstamp in timecorr_dict: - cal_dict[tstamp].update(timecorr_dict[tstamp]) - else: - cal_dict[tstamp].update(timecorr_dict) - else: - cal_dict.update(timecorr_dict) + ax1.plot(aoe_class.energy_corr_fits.index, + aoe_class.mean_func.func(aoe_class.energy_corr_fits.index, + **aoe_class.energy_corr_res_dict["mean_fits"]["pars"]), label="linear model") + ax1.errorbar( + 1592, + aoe_class.energy_corr_res_dict["dep_fit"]["pars"]['mu'], + yerr=aoe_class.energy_corr_res_dict["dep_fit"]["errs"]['mu'], + label="DEP", + color="green", + linestyle=" ", + ) - if dt_corr == True: - aoe_param = "AoE_dtcorr" - try: - if np.isnan(df.query("is_usable_fits")["AoE_timecorr"]).all(): - raise ValueError - alpha = drift_time_correction( - df.query("is_usable_fits")["AoE_timecorr"], - df.query("is_usable_fits")[cal_energy_param], - df.query("is_usable_fits")["dt_eff"], - pdf=pdf, - ) - df["AoE_dtcorr"] = apply_dtcorr(df["AoE_timecorr"], df["dt_eff"], alpha) - log.info(f"dtcorr successful alpha:{alpha}") - except: - log.error("A/E dtcorr failed") - alpha = np.nan - else: - aoe_param = "AoE_timecorr" + ax1.legend(title="A/E mu energy dependence", frameon=False) - try: - log.info("Starting A/E energy correction") - mu_pars, sigma_pars, results_dict, dep_pars = AoEcorrection( - df.query("is_usable_fits")[cal_energy_param], - df.query("is_usable_fits")[aoe_param], - eres_pars, - pdf=pdf, - sigma_func=sigma_func, + ax1.set_ylabel("raw A/E (a.u.)", ha="right", y=1) + ax2.scatter( + aoe_class.energy_corr_fits.index, + 100 * (aoe_class.energy_corr_fits["mean"] - aoe_class.mean_func.func(aoe_class.energy_corr_fits.index, **aoe_class.energy_corr_res_dict["mean_fits"]["pars"])) / aoe_class.mean_func.func(aoe_class.energy_corr_fits.index, **aoe_class.energy_corr_res_dict["mean_fits"]["pars"]), + lw=1, + c="b", ) - dep_mu = dep_pars["mu"] - log.info("Finished A/E energy correction") - df["AoE_corrected"] = df[aoe_param] / pol1(df[cal_energy_param], *mu_pars) - df["AoE_classifier"] = (df["AoE_corrected"] - 1) / sigma_func( - df[cal_energy_param], *sigma_pars + ax2.scatter( + 1592, + 100 * (aoe_class.energy_corr_res_dict["dep_fit"]["pars"]['mu'] - aoe_class.mean_func.func(1592, **aoe_class.energy_corr_res_dict["mean_fits"]["pars"])) / aoe_class.mean_func.func(1592, **aoe_class.energy_corr_res_dict["mean_fits"]["pars"]), + lw=1, + c="g", ) - except: - log.error("A/E energy correction failed") - args = pol1.__code__.co_varnames[: pol1.__code__.co_argcount][1:] - c = cost.UnbinnedNLL(np.array([0]), pol1) - m = Minuit(c, *[np.nan for arg in args]) - mu_pars = m.values - args = sigma_func.__code__.co_varnames[: sigma_func.__code__.co_argcount][1:] - c = cost.UnbinnedNLL(np.array([0]), sigma_func) - m = Minuit(c, *[np.nan for arg in args]) - sigma_pars = m.values - dep_mu = np.nan - results_dict = {} - + except:pass + ax2.set_ylabel("residuals %", ha="right", y=1) + ax2.set_xlabel("energy (keV)", ha="right", x=1) + plt.tight_layout() + plt.close() + return fig + +def plot_sigma_fit(aoe_class, + data, + figsize=[12, 8], fontsize=12 + ) -> plt.figure: + + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + + fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True) try: - if dep_correct is True: - cut = get_aoe_cut_fit( - df.query("is_usable_fits")[cal_energy_param], - df.query("is_usable_fits")["AoE_corrected"], - 1592, - (40, 20), - 0.9, - eres_pars, - dep_correct=True, - dep_mu=lambda x: dep_mu / pol1(1592.5, *mu_pars), - sig_func=lambda x: sigma_func(x, *sig_pars), - display=0, - ) + ax1.errorbar( + aoe_class.energy_corr_fits.index, + aoe_class.energy_corr_fits["sigma"], + yerr=aoe_class.energy_corr_fits["sigma_err"], + xerr=aoe_class.comptBands_width/2, + label="data", + linestyle=" ", + ) + sig_pars = aoe_class.energy_corr_res_dict["sigma_fits"]["pars"] + if aoe_class.sigma_func == sigma_fit: + label = f'sqrt model: \nsqrt({sig_pars["a"]:1.4f}+({sig_pars["b"]:1.1f}/E)^{sig_pars["c"]:1.1f})' + elif aoe_class.sigma_func == sigma_fit_quadratic: + label = f'quad model: \n({sig_pars["a"]:1.4f}+({sig_pars["b"]:1.6f}*E)+\n({sig_pars["c"]:1.6f}*E)^2)' else: - cut = get_aoe_cut_fit( - df.query("is_usable_fits")[cal_energy_param], - df.query("is_usable_fits")["AoE_classifier"], - 1592, - (40, 20), - 0.9, - eres_pars, - display=0, - ) - - log.info(f"Cut found at {cut}") - except: - log.error("A/E cut determination failed") - cut = np.nan - - aoe_cal_dict = {} - if dt_corr == False: - aoe_uncorr_param = "AoE_Timecorr" - else: - aoe_cal_dict.update( - { - "AoE_DTcorr": { - "expression": f"AoE_Timecorr*(1+a*dt_eff)", - "parameters": {"a": alpha}, - } - } + raise ValueError("unknown sigma function") + ax1.plot( + aoe_class.energy_corr_fits.index, + aoe_class.sigma_func.func(aoe_class.energy_corr_fits.index,**sig_pars), + label=label, ) - aoe_uncorr_param = "AoE_DTcorr" - - aoe_cal_dict.update( - { - "AoE_Corrected": { - "expression": f"((({aoe_uncorr_param})/(a*{cal_energy_param} +b))-1)", - "parameters": mu_pars.to_dict(), - } - } - ) - if sigma_func == sigma_fit: - aoe_cal_dict.update( - { - "AoE_Classifier": { - "expression": f"AoE_Corrected/(sqrt(a+(b/{cal_energy_param})**c))", - "parameters": sigma_pars.to_dict(), - } - } + ax1.errorbar( + 1592, + aoe_class.energy_corr_res_dict["dep_fit"]["pars"]['sigma'], + yerr=aoe_class.energy_corr_res_dict["dep_fit"]["errs"]['sigma'], + label="DEP", + color="green", + linestyle=" ", ) - else: - raise ValueError("Unknown sigma func") - - if dt_cut is not None: - if dt_cut["hard"] is True: - aoe_cal_dict.update( - { - "AoE_Low_Cut": { - "expression": f"(AoE_Classifier>a) & ({list(dt_cut['cut'])[0]})", - "parameters": {"a": cut}, - }, - "AoE_Double_Sided_Cut": { - "expression": "(a>AoE_Classifier) & (AoE_Low_Cut)", - "parameters": {"a": aoe_high_cut}, - }, - } - ) - else: - aoe_cal_dict.update( - { - "AoE_Low_Cut": { - "expression": "AoE_Classifier>a", - "parameters": {"a": cut}, - }, - "AoE_Double_Sided_Cut": { - "expression": "(a>AoE_Classifier) & (AoE_Low_Cut)", - "parameters": {"a": aoe_high_cut}, - }, - } - ) - else: - aoe_cal_dict.update( - { - "AoE_Low_Cut": { - "expression": "AoE_Classifier>a", - "parameters": {"a": cut}, - }, - "AoE_Double_Sided_Cut": { - "expression": "(a>AoE_Classifier)&(AoE_Low_Cut)", - "parameters": {"a": aoe_high_cut}, - }, - } + ax1.set_ylabel("A/E stdev (a.u.)", ha="right", y=1) + ax1.legend(title="A/E stdev energy dependence", frameon=False) + ax2.scatter( + aoe_class.energy_corr_fits.index, + 100 * (aoe_class.energy_corr_fits["sigma"] - aoe_class.sigma_func.func(aoe_class.energy_corr_fits.index, **sig_pars)) / aoe_class.sigma_func.func(aoe_class.energy_corr_fits.index, **sig_pars), + lw=1, + c="b", ) - - if re.match(r"(\d{8})T(\d{6})Z", list(cal_dict)[0]): - for tstamp in cal_dict: - cal_dict[tstamp].update(aoe_cal_dict) - else: - cal_dict.update(aoe_cal_dict) - + ax2.scatter( + 1592, + 100 * (aoe_class.energy_corr_res_dict["dep_fit"]["pars"]['sigma'] - aoe_class.sigma_func.func(1592, **sig_pars)) / aoe_class.sigma_func.func(1592, **sig_pars), + lw=1, + c="g", + ) + except:pass + ax2.set_ylabel("residuals", ha="right", y=1) + ax2.set_xlabel("energy (keV)", ha="right", x=1) + plt.tight_layout() + plt.close() + return fig + +def plot_cut_fit(aoe_class, + data, + figsize=[12, 8], fontsize=12 + ) -> plt.figure: + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + fig = plt.figure() try: - log.info(" Compute low side survival fractions: ") - - peaks_of_interest = [1592.5, 1620.5, 2039, 2103.53, 2614.50] - sf = np.zeros(len(peaks_of_interest)) - sferr = np.zeros(len(peaks_of_interest)) - fit_widths = [(40, 25), (25, 40), (0, 0), (25, 40), (50, 50)] - full_sfs = [] - full_sf_errs = [] - full_cut_vals = [] - - for i, peak in enumerate(peaks_of_interest): - if peak == 2039: - cut_vals, sfs, sf_errs, sf[i], sferr[i] = compton_sf( - df.query(f"{cut_field}& is_not_pulser")[ - cal_energy_param - ].to_numpy(), - df.query(f"{cut_field}& is_not_pulser")[ - "AoE_classifier" - ].to_numpy(), - cut, - peak, - eres_pars, - dt_mask=df.query(f"{cut_field}& is_not_pulser")[ - "dt_cut" - ].to_numpy(), - ) - - full_cut_vals.append(cut_vals) - full_sfs.append(sfs) - full_sf_errs.append(sf_errs) - else: - cut_vals, sfs, sf_errs, sf[i], sferr[i] = get_sf( - df.query(f"{cut_field}& is_not_pulser")[ - cal_energy_param - ].to_numpy(), - df.query(f"{cut_field}& is_not_pulser")[ - "AoE_classifier" - ].to_numpy(), - peak, - fit_widths[i], - cut, - eres_pars, - dt_mask=df.query(f"{cut_field}& is_not_pulser")[ - "dt_cut" - ].to_numpy(), - ) - full_cut_vals.append(cut_vals) - full_sfs.append(sfs) - full_sf_errs.append(sf_errs) - - log.info(f"{peak}keV: {sf[i]:2.1f} +/- {sferr[i]:2.1f} %") - - sf_2side = np.zeros(len(peaks_of_interest)) - sferr_2side = np.zeros(len(peaks_of_interest)) - log.info("Calculating 2 sided cut sfs") - for i, peak in enumerate(peaks_of_interest): - if peak == 2039: - sf_2side[i], sferr_2side[i] = compton_sf_no_sweep( - df.query(f"{cut_field}& is_not_pulser")[ - cal_energy_param - ].to_numpy(), - df.query(f"{cut_field}& is_not_pulser")[ - "AoE_classifier" - ].to_numpy(), - peak, - eres_pars, - cut, - aoe_high_cut_val=aoe_high_cut, - dt_mask=df.query(f"{cut_field}& is_not_pulser")[ - "dt_cut" - ].to_numpy(), - ) - else: - sf_2side[i], sferr_2side[i] = get_sf_no_sweep( - df.query(f"{cut_field}& is_not_pulser")[ - cal_energy_param - ].to_numpy(), - df.query(f"{cut_field}& is_not_pulser")[ - "AoE_classifier" - ].to_numpy(), - peak, - fit_widths[i], - eres_pars, - cut, - aoe_high_cut_val=aoe_high_cut, - dt_mask=df.query(f"{cut_field}& is_not_pulser")[ - "dt_cut" - ].to_numpy(), - ) - - log.info(f"{peak}keV: {sf_2side[i]:2.1f} +/- {sferr_2side[i]:2.1f} %") + plt.errorbar( + aoe_class.cut_fits.index, + aoe_class.cut_fits["sf"], + yerr=aoe_class.cut_fits["sf_err"], + linestyle=" ", + ) - def convert_sfs_to_dict(peaks_of_interest, sfs, sf_errs): - out_dict = {} - for i, peak in enumerate(peaks_of_interest): - out_dict[str(peak)] = { - "sf": f"{sfs[i]:2f}", - "sf_err": f"{sf_errs[i]:2f}", - } - return out_dict - - out_dict = { - "correction_fit_results": results_dict, - "A/E_Energy_param": energy_param, - "Cal_energy_param": cal_energy_param, - "dt_param": "dt_eff", - "rt_correction": dt_corr, - "1000-1300keV": res_dict, - "Mean_pars": list(mu_pars), - "Sigma_pars": list(sigma_pars), - "Low_cut": cut, - "High_cut": aoe_high_cut, - "Low_side_sfs": convert_sfs_to_dict(peaks_of_interest, sf, sferr), - "2_side_sfs": convert_sfs_to_dict(peaks_of_interest, sf_2side, sferr_2side), - } - log.info("Done") - log.info(f"Results are {out_dict}") - - except: - log.error("A/E Survival fraction determination failed") - out_dict = { - "correction_fit_results": results_dict, - "A/E_Energy_param": energy_param, - "Cal_energy_param": cal_energy_param, - "dt_param": "dt_eff", - "rt_correction": False, - "1000-1300keV_mean": res_dict, - "Mean_pars": list(mu_pars), - "Sigma_pars": list(sigma_pars), - "Low_cut": cut, - "High_cut": aoe_high_cut, - } - if display <= 0: - return cal_dict, out_dict - else: - plot_dict = {} - try: - plt.rcParams["figure.figsize"] = (12, 8) - plt.rcParams["font.size"] = 16 - - fig1 = plt.figure() - plt.subplot(3, 2, 1) - plot_dt_dep( - df.query("is_valid_cal& is_not_pulser")["AoE_timecorr"], - df.query("is_valid_cal& is_not_pulser")[cal_energy_param], - df.query("is_valid_cal& is_not_pulser")["dt_eff"], - [1582, 1602], - f"Tl DEP", - ) - plt.subplot(3, 2, 2) - plot_dt_dep( - df.query("is_valid_cal& is_not_pulser")["AoE_timecorr"], - df.query("is_valid_cal& is_not_pulser")[cal_energy_param], - df.query("is_valid_cal& is_not_pulser")["dt_eff"], - [1510, 1630], - f"Bi FEP", - ) - plt.subplot(3, 2, 3) - plot_dt_dep( - df.query("is_valid_cal& is_not_pulser")["AoE_timecorr"], - df.query("is_valid_cal& is_not_pulser")[cal_energy_param], - df.query("is_valid_cal& is_not_pulser")["dt_eff"], - [2030, 2050], - "Qbb", - ) - plt.subplot(3, 2, 4) - plot_dt_dep( - df.query("is_valid_cal& is_not_pulser")["AoE_timecorr"], - df.query("is_valid_cal& is_not_pulser")[cal_energy_param], - df.query("is_valid_cal& is_not_pulser")["dt_eff"], - [2080, 2120], - f"Tl SEP", - ) - plt.subplot(3, 2, 5) - plot_dt_dep( - df.query("is_valid_cal& is_not_pulser")["AoE_timecorr"], - df.query("is_valid_cal& is_not_pulser")[cal_energy_param], - df.query("is_valid_cal& is_not_pulser")["dt_eff"], - [2584, 2638], - f"Tl FEP", - ) - plt.tight_layout() - plot_dict["dt_deps"] = fig1 - if display > 1: - plt.show() - else: - plt.close() - - if dt_corr == True: - alpha, plot_dict = drift_time_correction( - df.query("is_usable_fits")["AoE_timecorr"], - df.query("is_usable_fits")[cal_energy_param], - df.query("is_usable_fits")["dt_eff"], - display=display, - plot_dict=plot_dict, - ) + plt.plot(aoe_class.cut_fits.index, sigmoid_fit.func(aoe_class.cut_fits.index.to_numpy(), + **aoe_class.cut_fit["pars"])) + plt.hlines((100 * aoe_class.dep_acc), -8.1, aoe_class.low_cut_val, color="red", linestyle="--") + plt.vlines( + aoe_class.low_cut_val, + np.nanmin(aoe_class.cut_fits["sf"]) * 0.9, + (100 * aoe_class.dep_acc), + color="red", + linestyle="--", + ) + plt.xlim([-8.1, 0.1]) + vals, labels = plt.yticks() + plt.yticks(vals, [f'{x:,.0f} %' for x in vals]) + plt.ylim([np.nanmin(aoe_class.cut_fits["sf"]) * 0.9, 102]) + except:pass + plt.xlabel("cut value") + plt.ylabel("survival percentage") + plt.close() + return fig + +def plot_survival_fraction_curves(aoe_class, + data, + figsize=[12, 8], fontsize=12 + ) -> plt.figure: + + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + + fig = plt.figure() + try: + plt.vlines(aoe_class.low_cut_val, 0, 100, label=f"cut value: {aoe_class.low_cut_val:1.2f}", color="black") + - fig_dt = plt.figure() - plt.subplot(3, 2, 1) - plot_dt_dep( - df.query("is_valid_cal& is_not_pulser")["AoE_dtcorr"], - df.query("is_valid_cal& is_not_pulser")[cal_energy_param], - df.query("is_valid_cal& is_not_pulser")["dt_eff"], - [1582, 1602], - f"Tl DEP", - ) - plt.subplot(3, 2, 2) - plot_dt_dep( - df.query("is_valid_cal& is_not_pulser")["AoE_dtcorr"], - df.query("is_valid_cal& is_not_pulser")[cal_energy_param], - df.query("is_valid_cal& is_not_pulser")["dt_eff"], - [1510, 1630], - f"Bi FEP", - ) - plt.subplot(3, 2, 3) - plot_dt_dep( - df.query("is_valid_cal& is_not_pulser")["AoE_dtcorr"], - df.query("is_valid_cal& is_not_pulser")[cal_energy_param], - df.query("is_valid_cal& is_not_pulser")["dt_eff"], - [2030, 2050], - "Qbb", - ) - plt.subplot(3, 2, 4) - plot_dt_dep( - df.query("is_valid_cal& is_not_pulser")["AoE_dtcorr"], - df.query("is_valid_cal& is_not_pulser")[cal_energy_param], - df.query("is_valid_cal& is_not_pulser")["dt_eff"], - [2080, 2120], - f"Tl SEP", - ) - plt.subplot(3, 2, 5) - plot_dt_dep( - df.query("is_valid_cal& is_not_pulser")["AoE_dtcorr"], - df.query("is_valid_cal& is_not_pulser")[cal_energy_param], - df.query("is_valid_cal& is_not_pulser")["dt_eff"], - [2584, 2638], - f"Tl FEP", + for peak, survival_df in aoe_class.low_side_peak_dfs.items(): + try: + plt.errorbar( + survival_df.index, + survival_df["sf"], + yerr=survival_df["sf_err"], + label=f'{get_peak_label(peak)} {peak} keV: {aoe_class.low_side_sf.loc[peak]["sf"]:2.1f} +/- {aoe_class.low_side_sf.loc[peak]["sf_err"]:2.1f} %' ) - plt.tight_layout() - plot_dict["dt_deps_dtcorr"] = fig_dt - if display > 1: - plt.show() - else: - plt.close() - - fig2 = plt.figure() - plot_compt_bands_overlayed( - df.query("is_valid_cal& is_not_pulser")[aoe_param], - df.query("is_valid_cal& is_not_pulser")[cal_energy_param], - [950, 1250, 1460, 1660, 1860, 2060, 2270], - ) - plt.ylabel("Counts") - plt.xlabel("Raw A/E") - plt.title(f"Compton Bands before Correction") - plt.legend(loc="upper left") - plot_dict["compt_bands_nocorr"] = fig2 - if display > 1: - plt.show() - else: - plt.close() - - _, _, _, plot_dict = aoe_timecorr( - df, - energy_param, - current_param, - pdf=pdf, - plot_dict=plot_dict, - display=display, - ) - - _, _, _, _, plot_dict = AoEcorrection( - df.query("is_usable_fits")[cal_energy_param], - df.query("is_usable_fits")[aoe_param], - eres_pars, - pdf=pdf, - sigma_func=sigma_func, - plot_dict=plot_dict, - display=display, - ) + except:pass + except:pass + vals, labels = plt.yticks() + plt.yticks(vals, [f'{x:,.0f} %' for x in vals]) + plt.legend(loc="upper right") + plt.xlabel("cut value") + plt.ylabel("survival percentage") + plt.ylim([0, 105]) + plt.close() + return fig + +def plot_spectra(aoe_class, + data, + xrange=(900, 3000), + n_bins=2101, + xrange_inset = (1580, 1640), + n_bins_inset = 200, + figsize=[12, 8], fontsize=12 + ) -> plt.figure: + + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + + fig, ax = plt.subplots() + try: + bins = np.linspace(xrange[0], xrange[1], n_bins) + ax.hist( + data.query(aoe_class.selection_string)[aoe_class.cal_energy_param], + bins=bins, + histtype="step", + label="before PSD", + ) + ax.hist( + data.query(f"{aoe_class.selection_string}&AoE_Low_Cut")[aoe_class.cal_energy_param], + bins=bins, + histtype="step", + label="low side PSD cut", + ) + ax.hist( + data.query(f"{aoe_class.selection_string}&AoE_Double_Sided_Cut")[aoe_class.cal_energy_param], + bins=bins, + histtype="step", + label="double sided PSD cut", + ) + ax.hist( + data.query(f"{aoe_class.selection_string} & (~AoE_Double_Sided_Cut)")[aoe_class.cal_energy_param], + bins=bins, + histtype="step", + label="rejected by PSD cut", + ) - fig3 = plt.figure() - plot_compt_bands_overlayed( - df.query("is_valid_cal& is_not_pulser")["AoE_classifier"], - df.query("is_valid_cal& is_not_pulser")[cal_energy_param], - [950, 1250, 1460, 1660, 1860, 2060, 2270], - [-5, 5], - ) - plt.ylabel("Counts") - plt.xlabel("Corrected A/E") - plt.title(f"Compton Bands after Correction") - plt.legend(loc="upper left") - plot_dict["compt_bands_corr"] = fig3 - if display > 1: - plt.show() - else: - plt.close() - - if dep_correct is True: - _, plot_dict = get_aoe_cut_fit( - df.query("is_usable_fits")[cal_energy_param], - df.query("is_usable_fits")["AoE_corrected"], - 1592, - (40, 20), - 0.9, - eres_pars, - dep_correct=True, - dep_mu=lambda x: dep_mu / pol1(1592.5, *mu_pars), - sig_func=lambda x: sigma_func(x, *sig_pars), - display=display, - plot_dict=plot_dict, - ) - else: - _, plot_dict = get_aoe_cut_fit( - df.query("is_usable_fits")[cal_energy_param], - df.query("is_usable_fits")["AoE_classifier"], - 1592, - (40, 20), - 0.9, - eres_pars, - display=display, - plot_dict=plot_dict, + axins = ax.inset_axes([0.25, 0.07, 0.4, 0.3]) + bins = np.linspace(xrange_inset[0], xrange_inset[1], n_bins_inset) + select_df = data.query(f"{aoe_class.cal_energy_param}<{xrange_inset[1]}&{aoe_class.cal_energy_param}>{xrange_inset[0]}") + axins.hist( + select_df.query(aoe_class.selection_string)[aoe_class.cal_energy_param], + bins=bins, + histtype="step", + ) + axins.hist( + select_df.query(f"{aoe_class.selection_string}&AoE_Low_Cut")[aoe_class.cal_energy_param], + bins=bins, + histtype="step", + ) + axins.hist( + select_df.query(f"{aoe_class.selection_string}&AoE_Double_Sided_Cut")[aoe_class.cal_energy_param], + bins=bins, + histtype="step", + ) + axins.hist( + select_df.query(f"{aoe_class.selection_string} & (~AoE_Double_Sided_Cut)")[aoe_class.cal_energy_param], + bins=bins, + histtype="step", + ) + except:pass + ax.set_xlim(xrange) + ax.set_yscale("log") + plt.xlabel("energy (keV)") + plt.ylabel("counts") + plt.legend(loc="upper left") + plt.close() + return fig + +def plot_sf_vs_energy(aoe_class, + data, + xrange = (900, 3000), + n_bins=701, + figsize=[12, 8], fontsize=12 + ) -> plt.figure: + + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + + fig = plt.figure() + try: + bins = np.linspace(xrange[0], xrange[1], n_bins) + counts_pass, bins_pass, _ = pgh.get_hist( + data.query(f"{aoe_class.selection_string}&AoE_Double_Sided_Cut")[aoe_class.cal_energy_param], + bins=bins, + ) + counts, bins, _ = pgh.get_hist(data.query(aoe_class.selection_string)[aoe_class.cal_energy_param], bins=bins) + survival_fracs = counts_pass / (counts + 10**-99) + + plt.step(pgh.get_bin_centers(bins_pass), 100*survival_fracs) + except:pass + plt.ylim([0, 100]) + vals, labels = plt.yticks() + plt.yticks(vals, [f'{x:,.0f} %' for x in vals]) + plt.xlabel("energy (keV)") + plt.ylabel("survival percentage") + plt.close() + return fig + +def plot_classifier(aoe_class, + data, + aoe_param="AoE_Classifier", + xrange = (900, 3000), + yrange=(-50,10), + xn_bins=700, + yn_bins=500, + figsize=[12, 8], fontsize=12 + ) -> plt.figure: + + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + + fig = plt.figure() + try: + plt.hist2d(data.query(aoe_class.selection_string)[aoe_class.cal_energy_param] , + data.query(aoe_class.selection_string)[aoe_param], + bins=[np.linspace(xrange[0], xrange[1], xn_bins), + np.linspace(yrange[0], yrange[1], yn_bins)], + norm=LogNorm() ) + except:pass + plt.xlabel("energy (keV)") + plt.ylabel(aoe_param) + plt.xlim(xrange) + plt.ylim(yrange) + plt.close() + return fig + +def aoe_calibration(files, + lh5_path:str, + cal_dicts: dict, + current_param:str, + energy_param:str, + cal_energy_param: str, + eres_func: Callable, + pdf:Callable=standard_aoe, + cut_field:str = "is_valid_cal", + dt_corr: bool = False, + dep_correct: bool = False, + dt_cut: dict = None, + high_cut_val: int = 3, + mean_func:Callable=pol1, + sigma_func:Callable=sigma_fit, + dep_acc:float = 0.9, + dt_param:str = "dt_eff", + comptBands_width:int=20, + plot_options:dict={}, + threshold:int=800 + ): + params = [ + current_param, + "tp_0_est", + "tp_99", + dt_param, + energy_param, + cal_energy_param, + cut_field, + ] - fig4 = plt.figure() - plt.vlines(cut, 0, 100, label=f"Cut Value: {cut:1.2f}", color="black") - - for i, peak in enumerate(peaks_of_interest): - plt.errorbar( - full_cut_vals[i], - full_sfs[i], - yerr=full_sf_errs[i], - label=f"{get_peak_label(peak)} {peak} keV: {sf[i]:2.1f} +/- {sferr[i]:2.1f} %", + aoe = cal_aoe( + cal_dicts, + cal_energy_param, + eres_func, + pdf, + f"{cut_field}&is_not_pulser", + dt_corr, + dep_acc, + dep_correct, + dt_cut, + dt_param, + high_cut_val, + mean_func, + sigma_func, + comptBands_width, + plot_options ) + if dt_cut is not None: + params.append(dt_cut["out_param"]) - handles, labels = plt.gca().get_legend_handles_labels() - # order = [1, 2, 3, 0, 4, 5] - plt.legend( - # [handles[idx] for idx in order], - # [labels[idx] for idx in order], - loc="upper right", - ) - plt.xlabel("Cut Value") - plt.ylabel("Survival Fraction %") - plt.ylim([0, 105]) - plot_dict["surv_fracs"] = fig4 - if display > 1: - plt.show() - else: - plt.close() + data = load_data( + files, + lh5_path, + aoe.cal_dicts, + params, + cal_energy_param, + threshold + ) - fig5, ax = plt.subplots() - bins = np.arange(900, 3000, 1) - ax.hist( - df.query(f"is_valid_cal& is_not_pulser")[cal_energy_param], - bins=bins, - histtype="step", - label="Before PSD", - ) - ax.hist( - df.query(f"is_usable_fits & AoE_classifier > {cut}")[cal_energy_param], - bins=bins, - histtype="step", - label="Low side PSD cut", - ) - ax.hist( - df.query( - f"is_usable_fits & AoE_classifier > {cut} & AoE_classifier < {aoe_high_cut}" - )[cal_energy_param], - bins=bins, - histtype="step", - label="Double sided PSD cut", - ) - ax.hist( - df.query( - f"is_valid_cal& is_not_pulser & (AoE_classifier < {cut} | AoE_classifier > {aoe_high_cut} | (~is_usable_fits))" - )[cal_energy_param], - bins=bins, - histtype="step", - label="Rejected by PSD cut", - ) + data["AoE_Uncorr"] = np.divide(data[current_param], data[energy_param]) - axins = ax.inset_axes([0.25, 0.07, 0.4, 0.3]) - bins = np.linspace(1580, 1640, 200) - axins.hist( - df.query(f"is_valid_cal& is_not_pulser")[cal_energy_param], - bins=bins, - histtype="step", - ) - axins.hist( - df.query(f"is_usable_fits & AoE_classifier > {cut}")[cal_energy_param], - bins=bins, - histtype="step", - ) - axins.hist( - df.query( - f"is_usable_fits & AoE_classifier > {cut} & AoE_classifier < {aoe_high_cut}" - )[cal_energy_param], - bins=bins, - histtype="step", - ) - axins.hist( - df.query( - f"is_valid_cal& is_not_pulser & (AoE_classifier < {cut} | AoE_classifier > {aoe_high_cut}| (~is_usable_fits))" - )[cal_energy_param], - bins=bins, - histtype="step", - ) - ax.set_xlim([900, 3000]) - ax.set_yscale("log") - plt.xlabel("Energy (keV)") - plt.ylabel("Counts") - plt.legend(loc="upper left") - plot_dict["PSD_spectrum"] = fig5 - if display > 1: - plt.show() - else: - plt.close() - - fig6 = plt.figure() - bins = np.arange(900, 3000, 3) - counts_pass, bins_pass, _ = pgh.get_hist( - df.query( - f"is_usable_fits & AoE_classifier > {cut} & AoE_classifier < {aoe_high_cut}" - )[cal_energy_param], - bins=bins, - ) - counts, bins, _ = pgh.get_hist( - df.query(f"is_valid_cal& is_not_pulser")[cal_energy_param], bins=bins - ) - survival_fracs = counts_pass / (counts + 10**-99) - - plt.step(pgh.get_bin_centers(bins_pass), survival_fracs) - plt.xlabel("Energy (keV)") - plt.ylabel("Survival Fraction") - plt.ylim([0, 1]) - plot_dict["psd_sf"] = fig6 - if display > 1: - plt.show() - else: - plt.close() + + aoe.update_cal_dicts({"AoE_Uncorr": + {"expression":f"{current_param}/{energy_param}", + "parameters":{} + }} + ) - return cal_dict, out_dict, plot_dict - except: - return cal_dict, out_dict, plot_dict + aoe.calibrate(data, "AoE_Uncorr") + log.info(f"Calibrated A/E") + return cal_dicts, aoe.get_results_dict(), aoe.fill_plot_dict(data), aoe \ No newline at end of file From dfca8a05c0c86d77fa7860ac62ae99de3cc0d19b Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 29 Sep 2023 17:48:02 +0200 Subject: [PATCH 007/191] split loading routine into own file as well as function to handle failed fits --- src/pygama/pargen/utils.py | 127 +++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 src/pygama/pargen/utils.py diff --git a/src/pygama/pargen/utils.py b/src/pygama/pargen/utils.py new file mode 100644 index 000000000..86d1d94ba --- /dev/null +++ b/src/pygama/pargen/utils.py @@ -0,0 +1,127 @@ +from __future__ import annotations + +import numpy as np +import pandas as pd +import logging +from iminuit import Minuit, cost, util +from types import FunctionType + +import lgdo.lh5_store as lh5 +import pygama.pargen.cuts as cts + +log = logging.getLogger(__name__) + +def return_nans(input): + if isinstance(input, FunctionType): + args = input.__code__.co_varnames[: input.__code__.co_argcount][1:] + c = cost.UnbinnedNLL(np.array([0]), input) + m = Minuit(c, *[np.nan for arg in args]) + return m.values, m.errors, np.full((len(m.values), len(m.values)), np.nan) + else: + args = input.pdf.__code__.co_varnames[: input.pdf.__code__.co_argcount][1:] + c = cost.UnbinnedNLL(np.array([0]), input.pdf) + m = Minuit(c, *[np.nan for arg in args]) + return m.values, m.errors, np.full((len(m.values), len(m.values)), np.nan) + +def tag_pulser(files, lh5_path): + pulser_df = lh5.load_dfs(files, ["timestamp", "trapTmax"], lh5_path) + pulser_props = cts.find_pulser_properties(pulser_df, energy="trapTmax") + if len(pulser_props) > 0: + final_mask = None + for entry in pulser_props: + e_cut = (pulser_df.trapTmax.values < entry[0] + entry[1]) & ( + pulser_df.trapTmax.values > entry[0] - entry[1] + ) + if final_mask is None: + final_mask = e_cut + else: + final_mask = final_mask | e_cut + ids = ~(final_mask) + log.debug(f"pulser found: {pulser_props}") + else: + ids = np.ones(len(pulser_df), dtype=bool) + log.debug(f"no pulser found") + return ids + +def get_params(file_params, param_list): + out_params = [] + if isinstance(file_params, dict): + possible_keys = file_params.keys() + elif isinstance(file_params, list): + possible_keys = file_params + for param in param_list: + for key in possible_keys: + if key in param: + out_params.append(key) + return np.unique(out_params).tolist() + + +def load_data( + files: list, + lh5_path: str, + cal_dict: dict, + params = [ + "cuspEmax" + ], + cal_energy_param: str="cuspEmax_ctc_cal", + threshold = None +) -> tuple(np.array, np.array, np.array, np.array): + """ + Loads in the A/E parameters needed and applies calibration constants to energy + """ + + sto = lh5.LH5Store() + + if isinstance(files, dict): + df = [] + all_files = [] + masks=np.array([],dtype=bool) + for tstamp, tfiles in files.items(): + table = sto.read_object(lh5_path, tfiles)[0] + if tstamp in cal_dict: + file_df = table.eval(cal_dict[tstamp]).get_dataframe() + else: + file_df = table.eval(cal_dict).get_dataframe() + file_df["timestamp"] = np.full(len(file_df), tstamp, dtype=object) + params.append("timestamp") + if threshold is not None: + mask = file_df[cal_energy_param]threshold + df.drop(np.where(~masks)[0], inplace=True) + else: + masks = np.ones(len(df),dtype=bool) + all_files = files + + if lh5_path[-1] != "/": lh5_path+='/' + keys = lh5.ls(all_files[0], lh5_path) + keys = [key.split("/")[-1] for key in keys] + params = get_params(keys+list(df.keys()), params) + + ids = tag_pulser(all_files, lh5_path) + df["is_not_pulser"] = ids[masks] + params.append("is_not_pulser") + + for col in list(df.keys()): + if col not in params: + df.drop(col, inplace=True, axis=1) + + param_dict = {} + for param in params: + if param not in df: + df[param] = lh5.load_nda(all_files, [param], lh5_path)[param][masks] + log.debug(f"data loaded") + return df \ No newline at end of file From 4c55f013442e9537fe3d092e9e206ea819eace75 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sat, 30 Sep 2023 19:21:09 +0200 Subject: [PATCH 008/191] bugfix on selection when nan values from fit --- src/pygama/pargen/energy_optimisation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index 893a0a93f..3bcdb5e0a 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -1045,7 +1045,7 @@ def event_selection( n_bins=(np.nanmax(energy) - np.nanmin(energy)) // 1, uncal_is_int=True ) - if params[0] is None: + if params[0] is None or np.isnan(params[0]).any(): log.debug("Fit failed, using max guess") hist, bins, var = pgh.get_hist( energy, range=(int(e_lower_lim), int(e_upper_lim)), dx=1 From 4dc82721fc159e4a52f3ccf1c4dd81f06b651058 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sat, 30 Sep 2023 19:21:37 +0200 Subject: [PATCH 009/191] added ability to change tail weighting and changed binning on high stats fitting --- src/pygama/pargen/AoE_cal.py | 1551 +++++++++++++--------- src/pygama/pargen/ecal_th.py | 653 +++++---- src/pygama/pargen/energy_cal.py | 209 +-- src/pygama/pargen/energy_optimisation.py | 13 +- src/pygama/pargen/utils.py | 41 +- 5 files changed, 1508 insertions(+), 959 deletions(-) diff --git a/src/pygama/pargen/AoE_cal.py b/src/pygama/pargen/AoE_cal.py index dc9f9ae3c..2125f80c3 100644 --- a/src/pygama/pargen/AoE_cal.py +++ b/src/pygama/pargen/AoE_cal.py @@ -35,6 +35,7 @@ log = logging.getLogger(__name__) + class PDF: """ @@ -144,9 +145,9 @@ def guess(hist, bins, var, **kwargs): "upper_range": np.nanmax(bins), "components": 0, } - for key, guess in guess_dict.items(): + for key, guess in guess_dict.items(): if np.isnan(guess): - guess_dict[key]=0 + guess_dict[key] = 0 return standard_aoe._replace_values(guess_dict, **kwargs) @@ -164,7 +165,9 @@ def bounds(guess, **kwargs): return [ bound - for field, bound in standard_aoe._replace_values(bounds_dict, **kwargs).items() + for field, bound in standard_aoe._replace_values( + bounds_dict, **kwargs + ).items() ] def fixed(**kwargs): @@ -180,7 +183,10 @@ def fixed(**kwargs): } return [ - fixed for field, fixed in standard_aoe._replace_values(fixed_dict, **kwargs).items() + fixed + for field, fixed in standard_aoe._replace_values( + fixed_dict, **kwargs + ).items() ] def width(pars, errs, cov): @@ -297,9 +303,9 @@ def guess(hist, bins, var, **kwargs): "upper_range": np.nanmax(bins), "components": 0, } - for key, guess in guess_dict.items(): + for key, guess in guess_dict.items(): if np.isnan(guess): - guess_dict[key]=0 + guess_dict[key] = 0 return standard_aoe_with_high_tail._replace_values(guess_dict, **kwargs) @@ -319,7 +325,9 @@ def bounds(guess, **kwargs): return [ bound - for field, bound in standard_aoe_with_high_tail._replace_values(bounds_dict, **kwargs).items() + for field, bound in standard_aoe_with_high_tail._replace_values( + bounds_dict, **kwargs + ).items() ] def fixed(**kwargs): @@ -337,7 +345,10 @@ def fixed(**kwargs): } return [ - fixed for field, fixed in standard_aoe_with_high_tail._replace_values(fixed_dict, **kwargs).items() + fixed + for field, fixed in standard_aoe_with_high_tail._replace_values( + fixed_dict, **kwargs + ).items() ] def width(pars, errs, cov): @@ -411,9 +422,9 @@ def guess(hist, bins, var, **kwargs): "lower_range": np.nanmin(bins), "upper_range": np.nanmax(bins), } - for key, guess in guess_dict.items(): + for key, guess in guess_dict.items(): if np.isnan(guess): - guess_dict[key]=0 + guess_dict[key] = 0 return standard_aoe_bkg._replace_values(guess_dict, **kwargs) @@ -429,7 +440,9 @@ def bounds(guess, **kwargs): return [ bound - for field, bound in standard_aoe_bkg._replace_values(bounds_dict, **kwargs).items() + for field, bound in standard_aoe_bkg._replace_values( + bounds_dict, **kwargs + ).items() ] def fixed(**kwargs): @@ -443,7 +456,10 @@ def fixed(**kwargs): } return [ - fixed for field, fixed in standard_aoe_bkg._replace_values(fixed_dict, **kwargs).items() + fixed + for field, fixed in standard_aoe_bkg._replace_values( + fixed_dict, **kwargs + ).items() ] @@ -465,9 +481,7 @@ def extended_pdf( """ Extended PDF for A/E consists of a gaussian signal with gaussian tail background """ - return n_events, gaussian.pdf( - x, n_events, mu, sigma - ) + return n_events, gaussian.pdf(x, n_events, mu, sigma) def guess(hist, bins, var, **kwargs): bin_centers = (bins[:-1] + bins[1:]) / 2 @@ -484,9 +498,9 @@ def guess(hist, bins, var, **kwargs): ) guess_dict = {"n_events": ls_guess, "mu": mu, "sigma": sigma} - for key, guess in guess_dict.items(): + for key, guess in guess_dict.items(): if np.isnan(guess): - guess_dict[key]=0 + guess_dict[key] = 0 return gaussian._replace_values(guess_dict, **kwargs) @@ -506,7 +520,8 @@ def fixed(**kwargs): } return [ - fixed for field, fixed in gaussian._replace_values(fixed_dict, **kwargs).items() + fixed + for field, fixed in gaussian._replace_values(fixed_dict, **kwargs).items() ] @@ -630,9 +645,9 @@ def guess(hist: np.array, bins: np.array, var: np.array, **kwargs) -> list: "tau2": 0.1, "components": 0, } - for key, guess in guess_dict.items(): + for key, guess in guess_dict.items(): if np.isnan(guess): - guess_dict[key]=0 + guess_dict[key] = 0 return drift_time_distribution._replace_values(guess_dict, **kwargs) @@ -653,7 +668,9 @@ def bounds(guess, **kwargs): return [ bound - for field, bound in drift_time_distribution._replace_values(bounds_dict, **kwargs).items() + for field, bound in drift_time_distribution._replace_values( + bounds_dict, **kwargs + ).items() ] def fixed(**kwargs): @@ -672,37 +689,39 @@ def fixed(**kwargs): } return [ - fixed for field, fixed in drift_time_distribution._replace_values(fixed_dict, **kwargs).items() + fixed + for field, fixed in drift_time_distribution._replace_values( + fixed_dict, **kwargs + ).items() ] + class pol1: - def func(x, a, b): - return x*a + b - + return x * a + b + def string_func(input_param): return f"{input_param}*a+b" - + def guess(bands, means, mean_errs): return [-1e-06, 5e-01] - + + class sigma_fit: - def func(x, a, b, c): return np.sqrt(a + (b / (x + 10**-99)) ** c) - + def string_func(input_param): - return f"(a+(b/({input_param}+10**-99))**c)**(0.5)" - + return f"(a+(b/({input_param}+10**-99))**c)**(0.5)" + def guess(bands, sigmas, sigma_errs): return [np.nanpercentile(sigmas, 50) ** 2, 2, 2] + class sigmoid_fit: - def func(x, a, b, c, d): return (a + b * x) * nb_erfc(c * x + d) - - + def guess(xs, ys, y_errs): return [np.nanmax(ys) / 2, 0, 1, 1.5] @@ -766,7 +785,6 @@ def unbinned_aoe_fit( m2.simplex().migrad() m2.hesse() - x0 = pdf.guess( hist, bins, @@ -924,10 +942,10 @@ def energy_guess(hist, bins, var, func_i, peak, eres, fit_range): fit_range[0], fit_range[1], 0, - ] - for i, guess in enumerate(parguess): + ] + for i, guess in enumerate(parguess): if np.isnan(guess): - parguess[i]=0 + parguess[i] = 0 return parguess elif func_i == pgf.extended_gauss_step_pdf: @@ -946,10 +964,19 @@ def energy_guess(hist, bins, var, func_i, peak, eres, fit_range): if nsig_guess < 0: nsig_guess = 0 - parguess=[nsig_guess, mu, sigma, nbkg_guess, hstep, fit_range[0], fit_range[1], 0] - for i, guess in enumerate(parguess): + parguess = [ + nsig_guess, + mu, + sigma, + nbkg_guess, + hstep, + fit_range[0], + fit_range[1], + 0, + ] + for i, guess in enumerate(parguess): if np.isnan(guess): - parguess[i]=0 + parguess[i] = 0 return parguess @@ -1105,7 +1132,7 @@ def get_survival_fraction( guess_pars_cut=None, guess_pars_surv=None, dt_mask=None, - mode= "greater", + mode="greater", display=0, ): if dt_mask is None: @@ -1121,7 +1148,7 @@ def get_survival_fraction( idxs = (cut_param < cut_val) & dt_mask else: raise ValueError("mode not recognised") - + if guess_pars_cut is None or guess_pars_surv is None: pars, errs = unbinned_energy_fit(energy, peak, eres_pars, simplex=True) guess_pars_cut = pars @@ -1165,10 +1192,10 @@ def get_sf_sweep( final_cut_value: float, peak: float, eres_pars: list, - dt_mask = None, - cut_range = (-5,5), - n_samples = 51, - mode= "greater" + dt_mask=None, + cut_range=(-5, 5), + n_samples=51, + mode="greater", ) -> tuple(pd.DataFrame, float, float): """ Calculates survival fraction for gamma lines using fitting method as in cut determination @@ -1184,10 +1211,9 @@ def get_sf_sweep( sf, err, cut_pars, surv_pars = get_survival_fraction( energy, cut_param, cut_val, peak, eres_pars, dt_mask=dt_mask, mode=mode ) - out_df = pd.concat([out_df, - pd.DataFrame([{"cut_val":cut_val, - "sf":sf, - "sf_err":err}])]) + out_df = pd.concat( + [out_df, pd.DataFrame([{"cut_val": cut_val, "sf": sf, "sf_err": err}])] + ) except: pass out_df.set_index("cut_val", inplace=True) @@ -1195,16 +1221,18 @@ def get_sf_sweep( energy, cut_param, final_cut_value, peak, eres_pars, dt_mask=dt_mask, mode=mode ) return ( - out_df.query(f'sf_err<5*{np.nanpercentile(out_df["sf_err"], 50)}& sf_err==sf_err & sf<=100'), + out_df.query( + f'sf_err<5*{np.nanpercentile(out_df["sf_err"], 50)}& sf_err==sf_err & sf<=100' + ), sf, sf_err, - ) + ) + -def compton_sf(cut_param, low_cut_val, high_cut_val = None, mode="greater", dt_mask=None): - +def compton_sf(cut_param, low_cut_val, high_cut_val=None, mode="greater", dt_mask=None): if dt_mask is None: dt_mask = np.full(len(cut_param), True, dtype=bool) - + if high_cut_val is not None: mask = (cut_param > low_cut_val) & (cut_param < high_cut_val) & dt_mask else: @@ -1214,12 +1242,16 @@ def compton_sf(cut_param, low_cut_val, high_cut_val = None, mode="greater", dt_m mask = (cut_param < low_cut_val) & dt_mask else: raise ValueError("mode not recognised") - + sf = 100 * len(cut_param[mask]) / len(cut_param) - sf_err = sf* np.sqrt( - (1 / len(cut_param)) + 1 / (len(cut_param[mask]) + 10**-99) - ) - return {"low_cut":low_cut_val, "sf":sf, "sf_err":sf_err, "high_cut": high_cut_val} + sf_err = sf * np.sqrt((1 / len(cut_param)) + 1 / (len(cut_param[mask]) + 10**-99)) + return { + "low_cut": low_cut_val, + "sf": sf, + "sf_err": sf_err, + "high_cut": high_cut_val, + } + def compton_sf_sweep( energy: np.array, @@ -1228,9 +1260,9 @@ def compton_sf_sweep( peak: float, eres: list[float, float], dt_mask: np.array = None, - cut_range = (-5,5), - n_samples = 51, - mode= "greater" + cut_range=(-5, 5), + n_samples=51, + mode="greater", ) -> tuple(float, np.array, list): """ Determines survival fraction for compton continuum by basic counting @@ -1238,44 +1270,53 @@ def compton_sf_sweep( cut_vals = np.linspace(cut_range[0], cut_range[1], n_samples) out_df = pd.DataFrame(columns=["cut_val", "sf", "sf_err"]) - + for cut_val in cut_vals: ct_dict = compton_sf(cut_param, cut_val, mode=mode, dt_mask=dt_mask) - df = pd.DataFrame([{"cut_val":ct_dict["low_cut"], "sf":ct_dict["sf"], "sf_err":ct_dict["sf_err"]}]) + df = pd.DataFrame( + [ + { + "cut_val": ct_dict["low_cut"], + "sf": ct_dict["sf"], + "sf_err": ct_dict["sf_err"], + } + ] + ) out_df = pd.concat([out_df, df]) out_df.set_index("cut_val", inplace=True) - + sf_dict = compton_sf(cut_param, final_cut_value, mode=mode, dt_mask=dt_mask) - + return out_df, sf_dict["sf"], sf_dict["sf_err"] + class cal_aoe: - - def __init__(self, - cal_dicts: dict, - cal_energy_param: str, - eres_func: callable, - pdf=standard_aoe, - selection_string: str = "is_valid_cal&is_not_pulser", - dt_corr: bool = False, - dep_acc:float = 0.9, - dep_correct: bool = False, - dt_cut:dict = None, - dt_param:str = "dt_eff", - high_cut_val: int = 3, - mean_func:Callable=pol1, - sigma_func:Callable=sigma_fit, - comptBands_width:int=20, - plot_options:dict={} - ): + def __init__( + self, + cal_dicts: dict, + cal_energy_param: str, + eres_func: callable, + pdf=standard_aoe, + selection_string: str = "is_valid_cal&is_not_pulser", + dt_corr: bool = False, + dep_acc: float = 0.9, + dep_correct: bool = False, + dt_cut: dict = None, + dt_param: str = "dt_eff", + high_cut_val: int = 3, + mean_func: Callable = pol1, + sigma_func: Callable = sigma_fit, + comptBands_width: int = 20, + plot_options: dict = {}, + ): self.cal_dicts = cal_dicts self.cal_energy_param = cal_energy_param self.eres_func = eres_func - self.pdf =pdf + self.pdf = pdf self.selection_string = selection_string self.dt_corr = dt_corr self.dt_param = "dt_eff" - self.dep_correct= dep_correct + self.dep_correct = dep_correct self.dt_cut = dt_cut self.dep_acc = dep_acc if self.dt_cut is not None: @@ -1288,10 +1329,10 @@ def __init__(self, self.dt_cut_hard = False self.fit_selection = self.selection_string self.high_cut_val = high_cut_val - self.mean_func= mean_func - self.sigma_func=sigma_func + self.mean_func = mean_func + self.sigma_func = sigma_func self.comptBands_width = comptBands_width - self.plot_options=plot_options + self.plot_options = plot_options def update_cal_dicts(self, update_dict): if re.match(r"(\d{8})T(\d{6})Z", list(self.cal_dicts)[0]): @@ -1303,15 +1344,11 @@ def update_cal_dicts(self, update_dict): else: self.cal_dicts.update(update_dict) - def aoe_timecorr( - self, - df, - aoe_param, - output_name = "AoE_Timecorr", - display=0 - ): + def aoe_timecorr(self, df, aoe_param, output_name="AoE_Timecorr", display=0): log.info("Starting A/E time correction") - self.timecorr_df = pd.DataFrame(columns=["timestamp", "mean", "mean_err", "res", "res_err"]) + self.timecorr_df = pd.DataFrame( + columns=["timestamp", "mean", "mean_err", "res", "res_err"] + ) try: if "timestamp" in df: tstamps = sorted(np.unique(df["timestamp"])) @@ -1330,98 +1367,132 @@ def aoe_timecorr( display=display, ) self.timecorr_df = pd.concat( - [ - self.timecorr_df, - pd.DataFrame([ - {"timestamp": tstamp, - "mean":pars["mu"], - "mean_err":errs["mu"], - "res":pars["sigma"] / pars["mu"], - "res_err":(pars["sigma"] / pars["mu"]) * np.sqrt(errs["sigma"] / pars["sigma"] + errs["mu"] / pars["mu"])} - ]), - ]) + [ + self.timecorr_df, + pd.DataFrame( + [ + { + "timestamp": tstamp, + "mean": pars["mu"], + "mean_err": errs["mu"], + "res": pars["sigma"] / pars["mu"], + "res_err": (pars["sigma"] / pars["mu"]) + * np.sqrt( + errs["sigma"] / pars["sigma"] + + errs["mu"] / pars["mu"] + ), + } + ] + ), + ] + ) except: self.timecorr_df = pd.concat( - [ - self.timecorr_df, - pd.DataFrame([ - {"timestamp": tstamp, - "mean":np.nan, - "mean_err":np.nan, - "res":np.nan, - "res_err":np.nan} - ]), - ]) + [ + self.timecorr_df, + pd.DataFrame( + [ + { + "timestamp": tstamp, + "mean": np.nan, + "mean_err": np.nan, + "res": np.nan, + "res_err": np.nan, + } + ] + ), + ] + ) self.timecorr_df.set_index("timestamp", inplace=True) - time_dict = fit_time_means(np.array(self.timecorr_df.index), - np.array(self.timecorr_df["mean"]), - np.array(self.timecorr_df["res"])) + time_dict = fit_time_means( + np.array(self.timecorr_df.index), + np.array(self.timecorr_df["mean"]), + np.array(self.timecorr_df["res"]), + ) df[output_name] = df[aoe_param] / np.array( [time_dict[tstamp] for tstamp in df["timestamp"]] ) - self.update_cal_dicts({ - tstamp: { - output_name: { - "expression": f"{aoe_param}/a", - "parameters": {"a": t_dict}, + self.update_cal_dicts( + { + tstamp: { + output_name: { + "expression": f"{aoe_param}/a", + "parameters": {"a": t_dict}, + } } + for tstamp, t_dict in time_dict.items() } - for tstamp, t_dict in time_dict.items() - }) + ) log.info("A/E time correction finished") else: try: pars, errs, cov = unbinned_aoe_fit( - df.query(f"{self.fit_selection} & {self.cal_energy_param}>1000 & {self.cal_energy_param}<1300")[ - aoe_param - ], + df.query( + f"{self.fit_selection} & {self.cal_energy_param}>1000 & {self.cal_energy_param}<1300" + )[aoe_param], pdf=self.pdf, display=display, ) self.timecorr_df = pd.concat( [ self.timecorr_df, - pd.DataFrame([ - { - "mean":pars["mu"], - "mean_err":errs["mu"], - "res":pars["sigma"] / pars["mu"], - "res_err":(pars["sigma"] / pars["mu"]) * np.sqrt(errs["sigma"] / pars["sigma"] + errs["mu"] / pars["mu"])} - ]), - ]) + pd.DataFrame( + [ + { + "mean": pars["mu"], + "mean_err": errs["mu"], + "res": pars["sigma"] / pars["mu"], + "res_err": (pars["sigma"] / pars["mu"]) + * np.sqrt( + errs["sigma"] / pars["sigma"] + + errs["mu"] / pars["mu"] + ), + } + ] + ), + ] + ) except: self.timecorr_df = pd.concat( [ self.timecorr_df, - pd.DataFrame([ - { - "mean":np.nan, - "mean_err":np.nan, - "res":np.nan, - "res_err":np.nan} - ]), - ]) + pd.DataFrame( + [ + { + "mean": np.nan, + "mean_err": np.nan, + "res": np.nan, + "res_err": np.nan, + } + ] + ), + ] + ) df[output_name] = df[aoe_param] / pars["mu"] - self.update_cal_dicts({ - output_name: { - "expression": f"{aoe_param}/a", - "parameters": {"a": pars["mu"]}, + self.update_cal_dicts( + { + output_name: { + "expression": f"{aoe_param}/a", + "parameters": {"a": pars["mu"]}, + } } - }) + ) log.info("A/E time correction finished") except: log.error("A/E time correction failed") - self.update_cal_dicts({ - output_name: { - "expression": f"{aoe_param}/a", - "parameters": {"a": np.nan}, + self.update_cal_dicts( + { + output_name: { + "expression": f"{aoe_param}/a", + "parameters": {"a": np.nan}, + } } - }) - + ) + def drift_time_correction( self, - data:pd.DataFrame, + data: pd.DataFrame, aoe_param, display: int = 0, ): @@ -1431,7 +1502,9 @@ def drift_time_correction( log.info("Starting A/E drift time correction") self.dt_res_dict = {} try: - dep_events = data.query(f"{self.fit_selection}&{self.cal_energy_param}>1582&{self.cal_energy_param}<1602&{self.cal_energy_param}=={self.cal_energy_param}&{aoe_param}=={aoe_param}") + dep_events = data.query( + f"{self.fit_selection}&{self.cal_energy_param}>1582&{self.cal_energy_param}<1602&{self.cal_energy_param}=={self.cal_energy_param}&{aoe_param}=={aoe_param}" + ) hist, bins, var = pgh.get_hist( dep_events[aoe_param], @@ -1441,64 +1514,88 @@ def drift_time_correction( mu = bin_cs[np.argmax(hist)] aoe_range = [mu * 0.9, mu * 1.1] - dt_range = [np.nanpercentile(dep_events[self.dt_param], 1) , np.nanpercentile(dep_events[self.dt_param], 99)] + dt_range = [ + np.nanpercentile(dep_events[self.dt_param], 1), + np.nanpercentile(dep_events[self.dt_param], 99), + ] - self.dt_res_dict['final_selection'] = f"{aoe_param}>{aoe_range[0]}&{aoe_param}<{aoe_range[1]}&{self.dt_param}>{dt_range[0]}&{self.dt_param}<{dt_range[1]}&{self.dt_param}=={self.dt_param}" + self.dt_res_dict[ + "final_selection" + ] = f"{aoe_param}>{aoe_range[0]}&{aoe_param}<{aoe_range[1]}&{self.dt_param}>{dt_range[0]}&{self.dt_param}<{dt_range[1]}&{self.dt_param}=={self.dt_param}" - final_df = dep_events.query(self.dt_res_dict['final_selection']) + final_df = dep_events.query(self.dt_res_dict["final_selection"]) hist, bins, var = pgh.get_hist( - final_df[self.dt_param], dx=10, range=(np.nanmin(final_df[self.dt_param]), np.nanmax(final_df[self.dt_param])) + final_df[self.dt_param], + dx=10, + range=( + np.nanmin(final_df[self.dt_param]), + np.nanmax(final_df[self.dt_param]), + ), + ) + + gpars = self.dt_res_dict["dt_guess"] = drift_time_distribution.guess( + hist, bins, var + ) + cost_func = cost.ExtendedUnbinnedNLL( + final_df[self.dt_param], drift_time_distribution.extended_pdf ) - - gpars = self.dt_res_dict['dt_guess'] =drift_time_distribution.guess(hist, bins, var) - cost_func = cost.ExtendedUnbinnedNLL(final_df[self.dt_param], drift_time_distribution.extended_pdf) m = Minuit(cost_func, **gpars) m.limits = drift_time_distribution.bounds(gpars) m.fixed = drift_time_distribution.fixed() m.simplex().migrad() m.hesse() - - self.dt_res_dict["dt_fit"]={"pars": m.values,"errs":m.errors, "object":m} - aoe_grp1 = self.dt_res_dict["aoe_grp1"] = f'{self.dt_param}>{m.values["mu1"] - 2 * m.values["sigma1"]} & {self.dt_param}<{m.values["mu1"] + 2 * m.values["sigma1"]}' - aoe_grp2 = self.dt_res_dict["aoe_grp2"] = f'{self.dt_param}>{m.values["mu2"] - 2 * m.values["sigma2"]} & {self.dt_param}<{m.values["mu2"] + 2 * m.values["sigma2"]}' - - aoe_pars, aoe_errs, _ = unbinned_aoe_fit(final_df.query(aoe_grp1)[aoe_param], - pdf=self.pdf, display=display) - self.dt_res_dict["aoe_fit1"] = {"pars":aoe_pars, "errs": aoe_errs} + self.dt_res_dict["dt_fit"] = { + "parameters": m.values, + "uncertainties": m.errors, + "object": m, + } + aoe_grp1 = self.dt_res_dict[ + "aoe_grp1" + ] = f'{self.dt_param}>{m.values["mu1"] - 2 * m.values["sigma1"]} & {self.dt_param}<{m.values["mu1"] + 2 * m.values["sigma1"]}' + aoe_grp2 = self.dt_res_dict[ + "aoe_grp2" + ] = f'{self.dt_param}>{m.values["mu2"] - 2 * m.values["sigma2"]} & {self.dt_param}<{m.values["mu2"] + 2 * m.values["sigma2"]}' + + aoe_pars, aoe_errs, _ = unbinned_aoe_fit( + final_df.query(aoe_grp1)[aoe_param], pdf=self.pdf, display=display + ) - aoe_pars2, aoe_errs2, _ = unbinned_aoe_fit(final_df.query(aoe_grp2)[aoe_param], - pdf=self.pdf, display=display) + self.dt_res_dict["aoe_fit1"] = {"pars": aoe_pars, "errs": aoe_errs} - self.dt_res_dict["aoe_fit2"] = {"pars":aoe_pars2, "errs": aoe_errs2} + aoe_pars2, aoe_errs2, _ = unbinned_aoe_fit( + final_df.query(aoe_grp2)[aoe_param], pdf=self.pdf, display=display + ) + + self.dt_res_dict["aoe_fit2"] = {"pars": aoe_pars2, "errs": aoe_errs2} try: self.alpha = (aoe_pars["mu"] - aoe_pars2["mu"]) / ( - (m.values["mu2"] * aoe_pars2["mu"]) - (m.values["mu1"] * aoe_pars["mu"]) + (m.values["mu2"] * aoe_pars2["mu"]) + - (m.values["mu1"] * aoe_pars["mu"]) ) except ZeroDivisionError: self.alpha = 0 - self.dt_res_dict["alpha"] = self.alpha + self.dt_res_dict["alpha"] = self.alpha log.info(f"dtcorr successful alpha:{self.alpha}") - data["AoE_DTcorr"] = data[aoe_param] * (1 + self.alpha * data[self.dt_param]) + data["AoE_DTcorr"] = data[aoe_param] * ( + 1 + self.alpha * data[self.dt_param] + ) except: log.error("Drift time correction failed") - self.alpha=np.nan + self.alpha = np.nan - self.update_cal_dicts({ - "AoE_DTcorr": { - "expression": f"{aoe_param}*(1+a*{self.dt_param})", - "parameters": {"a": self.alpha}, + self.update_cal_dicts( + { + "AoE_DTcorr": { + "expression": f"{aoe_param}*(1+a*{self.dt_param})", + "parameters": {"a": self.alpha}, + } } - }) + ) - def AoEcorrection( - self, - data:pd.DataFrame, - aoe_param:str, - display:int=0 - ): + def AoEcorrection(self, data: pd.DataFrame, aoe_param: str, display: int = 0): """ Calculates the corrections needed for the energy dependence of the A/E. Does this by fitting the compton continuum in slices and then applies fits to the centroid and variance. @@ -1508,20 +1605,32 @@ def AoEcorrection( self.energy_corr_res_dict = {} comptBands = np.arange(900, 2350, self.comptBands_width) - peaks = np.array([1080, 1094, 1459, 1512, 1552, 1592, 1620, 1650, 1670, 1830, 2105]) + peaks = np.array( + [1080, 1094, 1459, 1512, 1552, 1592, 1620, 1650, 1670, 1830, 2105] + ) allowed = np.array([], dtype=bool) for i, band in enumerate(comptBands): allow = True for peak in peaks: if (peak - 5) > band and (peak - 5) < (band + self.comptBands_width): allow = False - elif (peak + 5 > band) and (peak + 5) < (band +self.comptBands_width): + elif (peak + 5 > band) and (peak + 5) < (band + self.comptBands_width): allow = False allowed = np.append(allowed, allow) comptBands = comptBands[allowed] - self.energy_corr_fits = pd.DataFrame(columns=["compt_bands", "mean", "mean_err", - "sigma", "sigma_err", "ratio", "ratio_err"], dtype=float) + self.energy_corr_fits = pd.DataFrame( + columns=[ + "compt_bands", + "mean", + "mean_err", + "sigma", + "sigma_err", + "ratio", + "ratio_err", + ], + dtype=float, + ) try: select_df = data.query(f"{self.fit_selection} & {aoe_param}>0") @@ -1529,53 +1638,73 @@ def AoEcorrection( for band in comptBands: try: pars, errs, cov = unbinned_aoe_fit( - select_df.query(f"{self.cal_energy_param}>{band}&{self.cal_energy_param}< {self.comptBands_width+band}")[aoe_param], - pdf=self.pdf, display=display) + select_df.query( + f"{self.cal_energy_param}>{band}&{self.cal_energy_param}< {self.comptBands_width+band}" + )[aoe_param], + pdf=self.pdf, + display=display, + ) mean, mean_err = self.pdf.centroid(pars, errs, cov) sigma, sigma_err = self.pdf.width(pars, errs, cov) self.energy_corr_fits = pd.concat( - [ - self.energy_corr_fits, - pd.DataFrame([ - {"compt_bands": band+self.comptBands_width/2, - "mean":mean, - "mean_err":mean_err, - "sigma":sigma, - "sigma_err":sigma_err, - "ratio":pars["n_sig"] / pars["n_bkg"], - "ratio_err":(pars["n_sig"] / pars["n_bkg"]) *np.sqrt( - (errs["n_sig"] / pars["n_sig"]) ** 2 - + (errs["n_bkg"] / pars["n_bkg"]) ** 2 - ) - }] - ), - ]) + [ + self.energy_corr_fits, + pd.DataFrame( + [ + { + "compt_bands": band + self.comptBands_width / 2, + "mean": mean, + "mean_err": mean_err, + "sigma": sigma, + "sigma_err": sigma_err, + "ratio": pars["n_sig"] / pars["n_bkg"], + "ratio_err": (pars["n_sig"] / pars["n_bkg"]) + * np.sqrt( + (errs["n_sig"] / pars["n_sig"]) ** 2 + + (errs["n_bkg"] / pars["n_bkg"]) ** 2 + ), + } + ] + ), + ] + ) except: self.energy_corr_fits = pd.concat( - [ - self.energy_corr_fits, - pd.DataFrame( - [{"compt_bands": band, - "mean":np.nan, - "mean_err":np.nan, - "sigma":np.nan, - "sigma_err":np.nan, - "ratio":np.nan, - "ratio_err":np.nan - }] - ), - ]) + [ + self.energy_corr_fits, + pd.DataFrame( + [ + { + "compt_bands": band, + "mean": np.nan, + "mean_err": np.nan, + "sigma": np.nan, + "sigma_err": np.nan, + "ratio": np.nan, + "ratio_err": np.nan, + } + ] + ), + ] + ) self.energy_corr_fits.set_index("compt_bands", inplace=True) - valid_fits = self.energy_corr_fits.query("mean_err==mean_err&sigma_err==sigma_err & sigma_err!=0 & mean_err!=0") + valid_fits = self.energy_corr_fits.query( + "mean_err==mean_err&sigma_err==sigma_err & sigma_err!=0 & mean_err!=0" + ) self.energy_corr_res_dict["n_of_valid_fits"] = len(valid_fits) log.info(f"{len(valid_fits)} compton bands fit successfully") # Fit mus against energy - p0_mu = self.mean_func.guess(valid_fits.index, valid_fits["mean"], valid_fits["mean_err"]) + p0_mu = self.mean_func.guess( + valid_fits.index, valid_fits["mean"], valid_fits["mean_err"] + ) c_mu = cost.LeastSquares( - valid_fits.index, valid_fits["mean"], valid_fits["mean_err"], self.mean_func.func + valid_fits.index, + valid_fits["mean"], + valid_fits["mean_err"], + self.mean_func.func, ) c_mu.loss = "soft_l1" m_mu = Minuit(c_mu, *p0_mu) @@ -1587,16 +1716,28 @@ def AoEcorrection( mu_errs = m_mu.errors csqr_mu = np.sum( - ((valid_fits["mean"] - self.mean_func.func(valid_fits.index, *mu_pars)) ** 2) / valid_fits["mean_err"] + ( + ( + valid_fits["mean"] + - self.mean_func.func(valid_fits.index, *mu_pars) + ) + ** 2 + ) + / valid_fits["mean_err"] ) dof_mu = len(valid_fits["mean"]) - len(pars) p_val_mu = chi2.sf(csqr_mu, dof_mu) self.mean_fit_obj = m_mu # Fit sigma against energy - p0_sig = self.sigma_func.guess(valid_fits.index, valid_fits["sigma"], valid_fits["sigma_err"]) + p0_sig = self.sigma_func.guess( + valid_fits.index, valid_fits["sigma"], valid_fits["sigma_err"] + ) c_sig = cost.LeastSquares( - valid_fits.index, valid_fits["sigma"], valid_fits["sigma_err"], self.sigma_func.func + valid_fits.index, + valid_fits["sigma"], + valid_fits["sigma_err"], + self.sigma_func.func, ) c_sig.loss = "soft_l1" m_sig = Minuit(c_sig, *p0_sig) @@ -1606,14 +1747,20 @@ def AoEcorrection( sig_pars = m_sig.values sig_errs = m_sig.errors - + csqr_sig = np.sum( - ((valid_fits["sigma"] - self.sigma_func.func(valid_fits.index, *sig_pars)) ** 2) + ( + ( + valid_fits["sigma"] + - self.sigma_func.func(valid_fits.index, *sig_pars) + ) + ** 2 + ) / valid_fits["sigma_err"] ) dof_sig = len(valid_fits["sigma"]) - len(sig_pars) p_val_sig = chi2.sf(csqr_sig, dof_sig) - + self.sigma_fit_obj = m_sig # Get DEP fit @@ -1624,17 +1771,21 @@ def AoEcorrection( emax = peak + n_sigma * sigma try: dep_pars, dep_err, _ = unbinned_aoe_fit( - select_df.query(f"{self.cal_energy_param}>{emin}&{self.cal_energy_param}<{emax}")[aoe_param], + select_df.query( + f"{self.cal_energy_param}>{emin}&{self.cal_energy_param}<{emax}" + )[aoe_param], pdf=self.pdf, - display=display + display=display, ) except: dep_pars, dep_err, _ = return_nans(self.pdf) - data["AoE_Corrected"] = data[aoe_param] / self.mean_func.func(data[self.cal_energy_param], *mu_pars) + data["AoE_Corrected"] = data[aoe_param] / self.mean_func.func( + data[self.cal_energy_param], *mu_pars + ) data["AoE_Classifier"] = (data["AoE_Corrected"] - 1) / self.sigma_func.func( - data[self.cal_energy_param], *sig_pars - ) + data[self.cal_energy_param], *sig_pars + ) log.info("Finished A/E energy successful") log.info(f"mean pars are {mu_pars.to_dict()}") log.info(f"sigma pars are {sig_pars.to_dict()}") @@ -1646,46 +1797,54 @@ def AoEcorrection( sig_pars, sig_errs, sig_cov = return_nans(self.sigma_func.func) dep_pars, dep_err, dep_cov = return_nans(self.pdf) - self.energy_corr_res_dict["mean_fits"] = {"func": self.mean_func.__name__, - "module": self.mean_func.__module__, - "expression":self.mean_func.string_func("x"), - "pars": mu_pars.to_dict(), - "errs": mu_errs.to_dict(), - "p_val_mu": p_val_mu, - "csqr_mu": (csqr_mu, dof_mu)} - - self.energy_corr_res_dict["sigma_fits"] = {"func": self.sigma_func.__name__, - "module": self.sigma_func.__module__, - "expression":self.sigma_func.string_func("x"), - "pars": sig_pars.to_dict(), - "errs": sig_errs.to_dict(), - "p_val_mu": p_val_sig, - "csqr_mu": (csqr_sig, dof_sig)} - - self.energy_corr_res_dict["dep_fit"]={"func": self.pdf.__name__, - "module": self.pdf.__module__, - "pars": dep_pars.to_dict(), - "errs": dep_err.to_dict()} - - self.update_cal_dicts({ - "AoE_Corrected": { - "expression": f"{aoe_param}/({self.mean_func.string_func(self.cal_energy_param)})", - "parameters": mu_pars.to_dict(), - }, - "AoE_Classifier": { + self.energy_corr_res_dict["mean_fits"] = { + "func": self.mean_func.__name__, + "module": self.mean_func.__module__, + "expression": self.mean_func.string_func("x"), + "parameters": mu_pars.to_dict(), + "uncertainties": mu_errs.to_dict(), + "p_val_mu": p_val_mu, + "csqr_mu": (csqr_mu, dof_mu), + } + + self.energy_corr_res_dict["sigma_fits"] = { + "func": self.sigma_func.__name__, + "module": self.sigma_func.__module__, + "expression": self.sigma_func.string_func("x"), + "parameters": sig_pars.to_dict(), + "uncertainties": sig_errs.to_dict(), + "p_val_mu": p_val_sig, + "csqr_mu": (csqr_sig, dof_sig), + } + + self.energy_corr_res_dict["dep_fit"] = { + "func": self.pdf.__name__, + "module": self.pdf.__module__, + "parameters": dep_pars.to_dict(), + "uncertainties": dep_err.to_dict(), + } + + self.update_cal_dicts( + { + "AoE_Corrected": { + "expression": f"{aoe_param}/({self.mean_func.string_func(self.cal_energy_param)})", + "parameters": mu_pars.to_dict(), + }, + "AoE_Classifier": { "expression": f"AoE_Corrected/({self.sigma_func.string_func(self.cal_energy_param)})", "parameters": sig_pars.to_dict(), + }, } - }) + ) def get_aoe_cut_fit( self, - data:pd.DataFrame, - aoe_param:str, + data: pd.DataFrame, + aoe_param: str, peak: float, ranges: tuple, dep_acc: float, - display: int = 1 + display: int = 1, ): """ Determines A/E cut by sweeping through values and for each one fitting the DEP to determine how many events survive. @@ -1699,7 +1858,9 @@ def get_aoe_cut_fit( min_range, max_range = ranges try: - select_df = data.query(f"{self.fit_selection}&({self.cal_energy_param} > {peak - min_range}) & ({self.cal_energy_param} < {peak + max_range})") + select_df = data.query( + f"{self.fit_selection}&({self.cal_energy_param} > {peak - min_range}) & ({self.cal_energy_param} < {peak + max_range})" + ) # if dep_correct is True: # peak_aoe = (select_df[aoe_param] / dep_mu(select_df[self.cal_energy_param])) - 1 @@ -1716,70 +1877,102 @@ def get_aoe_cut_fit( peak, self.eres_func(peak), guess_pars_cut=None, - guess_pars_surv=None + guess_pars_surv=None, ) self.cut_fits = pd.concat( [ self.cut_fits, pd.DataFrame( - [{"cut_val": cut_val, - "sf":sf, - "sf_err":err, - }] + [ + { + "cut_val": cut_val, + "sf": sf, + "sf_err": err, + } + ] ), - ]) + ] + ) self.cut_fits.set_index("cut_val", inplace=True) - valid_fits = self.cut_fits.query(f'sf_err<{(1.5 * np.nanpercentile(self.cut_fits["sf_err"],85))}&sf_err==sf_err') + valid_fits = self.cut_fits.query( + f'sf_err<{(1.5 * np.nanpercentile(self.cut_fits["sf_err"],85))}&sf_err==sf_err' + ) c = cost.LeastSquares( - valid_fits.index, valid_fits["sf"], valid_fits["sf_err"], sigmoid_fit.func + valid_fits.index, + valid_fits["sf"], + valid_fits["sf_err"], + sigmoid_fit.func, ) c.loss = "soft_l1" - m1 = Minuit(c, *sigmoid_fit.guess(valid_fits.index, valid_fits["sf"], valid_fits["sf_err"])) + m1 = Minuit( + c, + *sigmoid_fit.guess( + valid_fits.index, valid_fits["sf"], valid_fits["sf_err"] + ), + ) m1.simplex().migrad() - xs = np.arange(np.nanmin(valid_fits.index), np.nanmax(valid_fits.index), 0.01) + xs = np.arange( + np.nanmin(valid_fits.index), np.nanmax(valid_fits.index), 0.01 + ) p = sigmoid_fit.func(xs, *m1.values) - self.cut_fit = {"function": sigmoid_fit.__name__ , "pars": m1.values.to_dict(), "errs": m1.errors.to_dict()} + self.cut_fit = { + "function": sigmoid_fit.__name__, + "parameters": m1.values.to_dict(), + "uncertainties": m1.errors.to_dict(), + } self.low_cut_val = round(xs[np.argmin(np.abs(p - (100 * self.dep_acc)))], 3) log.info(f"Cut found at {self.low_cut_val}") - - data["AoE_Low_Cut"] = (data[aoe_param]>self.low_cut_val) + + data["AoE_Low_Cut"] = data[aoe_param] > self.low_cut_val if self.dt_cut_param is not None: - data["AoE_Low_Cut"] = data["AoE_Low_Cut"] &(data[self.dt_cut_param]) - data["AoE_Double_Sided_Cut"] = data["AoE_Low_Cut"] & (data[aoe_param]a) & ({self.dt_cut_param})", - "parameters": {"a": self.low_cut_val}, - }}) + self.update_cal_dicts( + { + "AoE_Low_Cut": { + "expression": f"({aoe_param}>a) & ({self.dt_cut_param})", + "parameters": {"a": self.low_cut_val}, + } + } + ) else: - self.update_cal_dicts({ - "AoE_Low_Cut": { - "expression": f"({aoe_param}>a)", - "parameters": {"a": self.low_cut_val}, - }}) - self.update_cal_dicts({"AoE_Double_Sided_Cut": { - "expression": f"(a>{aoe_param}) & (AoE_Low_Cut)", - "parameters": {"a": self.high_cut_val}, - }}) + self.update_cal_dicts( + { + "AoE_Low_Cut": { + "expression": f"({aoe_param}>a)", + "parameters": {"a": self.low_cut_val}, + } + } + ) + self.update_cal_dicts( + { + "AoE_Double_Sided_Cut": { + "expression": f"(a>{aoe_param}) & (AoE_Low_Cut)", + "parameters": {"a": self.high_cut_val}, + } + } + ) def get_results_dict(self): return { "cal_energy_param": self.cal_energy_param, "dt_param": self.dt_param, - "rt_correction": self.dt_corr, - "pdf":self.pdf.__name__, + "rt_correction": self.dt_corr, + "pdf": self.pdf.__name__, "1000-1300keV": self.timecorr_df.to_dict("index"), "correction_fit_results": self.energy_corr_res_dict, "low_cut": self.low_cut_val, "high_cut": self.high_cut_val, "low_side_sfs": self.low_side_sf.to_dict("index"), "2_side_sfs": self.two_side_sf.to_dict("index"), - } + } def fill_plot_dict(self, data, plot_dict={}): for key, item in self.plot_options.items(): @@ -1789,52 +1982,39 @@ def fill_plot_dict(self, data, plot_dict={}): plot_dict[key] = item["function"](self, data) return plot_dict - def calibrate(self, df, initial_aoe_param): - self.aoe_timecorr( - df, initial_aoe_param - ) + self.aoe_timecorr(df, initial_aoe_param) log.info("Finished A/E time correction") if self.dt_corr == True: aoe_param = "AoE_DTcorr" - self.drift_time_correction( - df, - "AoE_Timecorr" - ) + self.drift_time_correction(df, "AoE_Timecorr") else: aoe_param = "AoE_Timecorr" - self.AoEcorrection( - df, - aoe_param - ) - - - self.get_aoe_cut_fit( - df, - "AoE_Classifier", - 1592, - (40, 20), - 0.9 - ) - + self.AoEcorrection(df, aoe_param) + + self.get_aoe_cut_fit(df, "AoE_Classifier", 1592, (40, 20), 0.9) + aoe_param = "AoE_Classifier" log.info(" Compute low side survival fractions: ") self.low_side_sf = pd.DataFrame(columns=["peak", "sf", "sf_err"]) peaks_of_interest = [1592.5, 1620.5, 2039, 2103.53, 2614.50] fit_widths = [(40, 25), (25, 40), (0, 0), (25, 40), (50, 50)] - self.low_side_peak_dfs={} - - + self.low_side_peak_dfs = {} + for i, peak in enumerate(peaks_of_interest): try: - select_df = df.query(f"{self.selection_string}&{aoe_param}=={aoe_param}") + select_df = df.query( + f"{self.selection_string}&{aoe_param}=={aoe_param}" + ) fwhm = self.eres_func(peak) if peak == 2039: emin = 2 * fwhm emax = 2 * fwhm - peak_df = select_df.query(f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})") + peak_df = select_df.query( + f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})" + ) cut_df, sf, sf_err = compton_sf_sweep( peak_df[self.cal_energy_param].to_numpy(), @@ -1842,31 +2022,53 @@ def calibrate(self, df, initial_aoe_param): self.low_cut_val, peak, fwhm, - dt_mask=peak_df[self.dt_cut_param].to_numpy() if self.dt_cut_param is not None else None + dt_mask=peak_df[self.dt_cut_param].to_numpy() + if self.dt_cut_param is not None + else None, + ) + self.low_side_sf = pd.concat( + [ + self.low_side_sf, + pd.DataFrame([{"peak": peak, "sf": sf, "sf_err": sf_err}]), + ] ) - self.low_side_sf = pd.concat([self.low_side_sf, pd.DataFrame([{"peak":peak, "sf":sf, "sf_err":sf_err}])]) - self.low_side_peak_dfs[peak]=cut_df + self.low_side_peak_dfs[peak] = cut_df else: - emin,emax = fit_widths[i] - peak_df = select_df.query(f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})") + emin, emax = fit_widths[i] + peak_df = select_df.query( + f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})" + ) cut_df, sf, sf_err = get_sf_sweep( peak_df[self.cal_energy_param].to_numpy(), peak_df[aoe_param].to_numpy(), self.low_cut_val, peak, fwhm, - dt_mask=peak_df[self.dt_cut_param].to_numpy() if self.dt_cut_param is not None else None + dt_mask=peak_df[self.dt_cut_param].to_numpy() + if self.dt_cut_param is not None + else None, + ) + self.low_side_sf = pd.concat( + [ + self.low_side_sf, + pd.DataFrame([{"peak": peak, "sf": sf, "sf_err": sf_err}]), + ] ) - self.low_side_sf = pd.concat([self.low_side_sf, pd.DataFrame([{"peak":peak, "sf":sf, "sf_err":sf_err}])]) - self.low_side_peak_dfs[peak]=cut_df + self.low_side_peak_dfs[peak] = cut_df log.info(f"{peak}keV: {sf:2.1f} +/- {sf_err:2.1f} %") except: - self.low_side_sf = pd.concat([self.low_side_sf, pd.DataFrame([{"peak":peak, "sf":np.nan, "sf_err":np.nan}])]) - log.error(f"A/E Low side Survival fraction determination failed for {peak} peak") + self.low_side_sf = pd.concat( + [ + self.low_side_sf, + pd.DataFrame([{"peak": peak, "sf": np.nan, "sf_err": np.nan}]), + ] + ) + log.error( + f"A/E Low side Survival fraction determination failed for {peak} peak" + ) self.low_side_sf.set_index("peak", inplace=True) - - self.two_side_sf = pd.DataFrame(columns=["peak", "sf", "sf_err"]) + self.two_side_sf = pd.DataFrame(columns=["peak", "sf", "sf_err"]) log.info("Calculating 2 sided cut sfs") for i, peak in enumerate(peaks_of_interest): fwhm = self.eres_func(peak) @@ -1874,49 +2076,84 @@ def calibrate(self, df, initial_aoe_param): if peak == 2039: emin = 2 * fwhm emax = 2 * fwhm - peak_df = select_df.query(f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})") + peak_df = select_df.query( + f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})" + ) - sf_dict = compton_sf(peak_df[aoe_param].to_numpy(), - self.low_cut_val, - self.high_cut_val, - dt_mask=peak_df[self.dt_cut_param].to_numpy() if self.dt_cut_param is not None else None) + sf_dict = compton_sf( + peak_df[aoe_param].to_numpy(), + self.low_cut_val, + self.high_cut_val, + dt_mask=peak_df[self.dt_cut_param].to_numpy() + if self.dt_cut_param is not None + else None, + ) sf = sf_dict["sf"] sf_err = sf_dict["sf_err"] - self.two_side_sf = pd.concat([self.two_side_sf, pd.DataFrame([{"peak":peak, - "sf":sf, - "sf_err":sf_err}])]) + self.two_side_sf = pd.concat( + [ + self.two_side_sf, + pd.DataFrame([{"peak": peak, "sf": sf, "sf_err": sf_err}]), + ] + ) else: emin, emax = fit_widths[i] - peak_df = select_df.query(f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})") + peak_df = select_df.query( + f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})" + ) sf, sf_err, _, _ = get_survival_fraction( - peak_df[self.cal_energy_param].to_numpy(), - peak_df[aoe_param].to_numpy(), - self.low_cut_val, - peak, - fwhm, - high_cut=self.high_cut_val, - dt_mask=peak_df[self.dt_cut_param].to_numpy() if self.dt_cut_param is not None else None) - self.two_side_sf = pd.concat([self.two_side_sf, pd.DataFrame([{"peak":peak, "sf":sf, "sf_err":sf_err}])]) + peak_df[self.cal_energy_param].to_numpy(), + peak_df[aoe_param].to_numpy(), + self.low_cut_val, + peak, + fwhm, + high_cut=self.high_cut_val, + dt_mask=peak_df[self.dt_cut_param].to_numpy() + if self.dt_cut_param is not None + else None, + ) + self.two_side_sf = pd.concat( + [ + self.two_side_sf, + pd.DataFrame([{"peak": peak, "sf": sf, "sf_err": sf_err}]), + ] + ) log.info(f"{peak}keV: {sf:2.1f} +/- {sf_err:2.1f} %") except: - self.two_side_sf = pd.concat([self.two_side_sf, pd.DataFrame([{"peak":peak, "sf":np.nan, "sf_err":np.nan}])]) - log.error(f"A/E two side Survival fraction determination failed for {peak} peak") + self.two_side_sf = pd.concat( + [ + self.two_side_sf, + pd.DataFrame([{"peak": peak, "sf": np.nan, "sf_err": np.nan}]), + ] + ) + log.error( + f"A/E two side Survival fraction determination failed for {peak} peak" + ) self.two_side_sf.set_index("peak", inplace=True) -def plot_aoe_mean_time(aoe_class, data, time_param = "AoE_Timecorr", figsize=[12, 8], fontsize=12): + +def plot_aoe_mean_time( + aoe_class, data, time_param="AoE_Timecorr", figsize=[12, 8], fontsize=12 +): plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize fig, ax = plt.subplots(1, 1) try: ax.errorbar( - [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in aoe_class.timecorr_df.index], + [ + datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") + for tstamp in aoe_class.timecorr_df.index + ], aoe_class.timecorr_df["mean"], yerr=aoe_class.timecorr_df["mean_err"], linestyle=" ", ) - grouped_means = [cal_dict[time_param]["parameters"]["a"] for tstamp, cal_dict in aoe_class.cal_dicts.items()] + grouped_means = [ + cal_dict[time_param]["parameters"]["a"] + for tstamp, cal_dict in aoe_class.cal_dicts.items() + ] ax.step( [ datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") @@ -1930,10 +2167,8 @@ def plot_aoe_mean_time(aoe_class, data, time_param = "AoE_Timecorr", figsize=[12 datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in aoe_class.cal_dicts ], - y1=np.array(grouped_means) - - 0.2 * np.array(aoe_class.timecorr_df["res"]), - y2=np.array(grouped_means) - + 0.2 * np.array(aoe_class.timecorr_df["res"]), + y1=np.array(grouped_means) - 0.2 * np.array(aoe_class.timecorr_df["res"]), + y2=np.array(grouped_means) + 0.2 * np.array(aoe_class.timecorr_df["res"]), color="green", alpha=0.2, ) @@ -1942,14 +2177,13 @@ def plot_aoe_mean_time(aoe_class, data, time_param = "AoE_Timecorr", figsize=[12 datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in aoe_class.cal_dicts ], - y1=np.array(grouped_means) - - 0.4 * np.array(aoe_class.timecorr_df["res"]), - y2=np.array(grouped_means) - + 0.4 * np.array(aoe_class.timecorr_df["res"]), + y1=np.array(grouped_means) - 0.4 * np.array(aoe_class.timecorr_df["res"]), + y2=np.array(grouped_means) + 0.4 * np.array(aoe_class.timecorr_df["res"]), color="yellow", alpha=0.2, ) - except:pass + except: + pass ax.set_xlabel("time") ax.set_ylabel("A/E mean") myFmt = mdates.DateFormatter("%b %d") @@ -1957,18 +2191,25 @@ def plot_aoe_mean_time(aoe_class, data, time_param = "AoE_Timecorr", figsize=[12 plt.close() return fig -def plot_aoe_res_time(aoe_class, data, time_param = "AoE_Timecorr", figsize=[12, 8], fontsize=12): + +def plot_aoe_res_time( + aoe_class, data, time_param="AoE_Timecorr", figsize=[12, 8], fontsize=12 +): plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize fig, ax = plt.subplots(1, 1) try: ax.errorbar( - [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in aoe_class.timecorr_df.index], + [ + datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") + for tstamp in aoe_class.timecorr_df.index + ], aoe_class.timecorr_df["res"], yerr=aoe_class.timecorr_df["res_err"], linestyle=" ", ) - except:pass + except: + pass ax.set_xlabel("time") ax.set_ylabel("A/E res") myFmt = mdates.DateFormatter("%b %d") @@ -1976,26 +2217,34 @@ def plot_aoe_res_time(aoe_class, data, time_param = "AoE_Timecorr", figsize=[12, plt.close() return fig -def drifttime_corr_plot(aoe_class, data, aoe_param = "AoE_Timecorr", aoe_param_corr="AoE_DTcorr", - figsize=[12, 8], fontsize=12): - + +def drifttime_corr_plot( + aoe_class, + data, + aoe_param="AoE_Timecorr", + aoe_param_corr="AoE_DTcorr", + figsize=[12, 8], + fontsize=12, +): plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize fig = plt.figure() try: - - dep_events = data.query(f"{aoe_class.fit_selection}&{aoe_class.cal_energy_param}>1582&{aoe_class.cal_energy_param}<1602&{aoe_class.cal_energy_param}=={aoe_class.cal_energy_param}&{aoe_param}=={aoe_param}") - final_df = dep_events.query(aoe_class.dt_res_dict['final_selection']) - - + dep_events = data.query( + f"{aoe_class.fit_selection}&{aoe_class.cal_energy_param}>1582&{aoe_class.cal_energy_param}<1602&{aoe_class.cal_energy_param}=={aoe_class.cal_energy_param}&{aoe_param}=={aoe_param}" + ) + final_df = dep_events.query(aoe_class.dt_res_dict["final_selection"]) + plt.subplot(2, 2, 1) - aoe_pars = aoe_class.dt_res_dict["aoe_fit1"]["pars"] - + aoe_pars = aoe_class.dt_res_dict["aoe_fit1"]["parameters"] + xs = np.linspace(aoe_pars["lower_range"], aoe_pars["upper_range"], 100) counts, aoe_bins, bars = plt.hist( - final_df.query(f'{aoe_class.dt_res_dict["aoe_grp1"]}&{aoe_param}<{aoe_pars["upper_range"]}&{aoe_param}>{aoe_pars["lower_range"]}')[aoe_param], + final_df.query( + f'{aoe_class.dt_res_dict["aoe_grp1"]}&{aoe_param}<{aoe_pars["upper_range"]}&{aoe_param}>{aoe_pars["lower_range"]}' + )[aoe_param], bins=400, histtype="step", label="data", @@ -2008,12 +2257,14 @@ def drifttime_corr_plot(aoe_class, data, aoe_param = "AoE_Timecorr", aoe_param_c plt.legend(loc="upper left") plt.xlabel("A/E") plt.ylabel("counts") - - aoe_pars2 = aoe_class.dt_res_dict["aoe_fit2"]["pars"] + + aoe_pars2 = aoe_class.dt_res_dict["aoe_fit2"]["parameters"] plt.subplot(2, 2, 2) xs = np.linspace(aoe_pars2["lower_range"], aoe_pars2["upper_range"], 100) counts, aoe_bins2, bars = plt.hist( - final_df.query(f'{aoe_class.dt_res_dict["aoe_grp2"]}&{aoe_param}<{aoe_pars2["upper_range"]}&{aoe_param}>{aoe_pars2["lower_range"]}')[aoe_param], + final_df.query( + f'{aoe_class.dt_res_dict["aoe_grp2"]}&{aoe_param}<{aoe_pars2["upper_range"]}&{aoe_param}>{aoe_pars2["lower_range"]}' + )[aoe_param], bins=400, histtype="step", label="Data", @@ -2026,24 +2277,33 @@ def drifttime_corr_plot(aoe_class, data, aoe_param = "AoE_Timecorr", aoe_param_c plt.legend(loc="upper left") plt.xlabel("A/E") plt.ylabel("counts") - + hist, bins, var = pgh.get_hist( - final_df[aoe_class.dt_param], dx=10, range=(np.nanmin(final_df[aoe_class.dt_param]), - np.nanmax(final_df[aoe_class.dt_param])) + final_df[aoe_class.dt_param], + dx=10, + range=( + np.nanmin(final_df[aoe_class.dt_param]), + np.nanmax(final_df[aoe_class.dt_param]), + ), ) - + plt.subplot(2, 2, 3) plt.step(pgh.get_bin_centers(bins), hist, label="data") plt.plot( pgh.get_bin_centers(bins), - drift_time_distribution.pdf(pgh.get_bin_centers(bins), - **aoe_class.dt_res_dict['dt_guess']) * np.diff(bins)[0], + drift_time_distribution.pdf( + pgh.get_bin_centers(bins), **aoe_class.dt_res_dict["dt_guess"] + ) + * np.diff(bins)[0], label="Guess", ) plt.plot( pgh.get_bin_centers(bins), - drift_time_distribution.pdf(pgh.get_bin_centers(bins), - *aoe_class.dt_res_dict["dt_fit"]["pars"]) * np.diff(bins)[0], + drift_time_distribution.pdf( + pgh.get_bin_centers(bins), + *aoe_class.dt_res_dict["dt_fit"]["parameters"], + ) + * np.diff(bins)[0], label="fit", ) plt.xlabel("drift time (ns)") @@ -2057,41 +2317,49 @@ def drifttime_corr_plot(aoe_class, data, aoe_param = "AoE_Timecorr", aoe_param_c 200, ) plt.hist(final_df[aoe_param], bins=bins, histtype="step", label="uncorrected") - plt.hist(final_df[aoe_param_corr], bins=bins, histtype="step", label="corrected") + plt.hist( + final_df[aoe_param_corr], bins=bins, histtype="step", label="corrected" + ) plt.xlabel("A/E") plt.ylabel("counts") plt.legend(loc="upper left") plt.tight_layout() - plt.xlim( - bins[0], bins[-1] - ) - except:pass + plt.xlim(bins[0], bins[-1]) + except: + pass plt.close() return fig -def plot_compt_bands_overlayed(aoe_class, - data, - eranges: list[tuple], - aoe_param = "AoE_Timecorr", - aoe_range: list[float] = None, - title= "Compton Bands", - density=True, - n_bins=50, - figsize=[12, 8], fontsize=12 - ) -> None: + +def plot_compt_bands_overlayed( + aoe_class, + data, + eranges: list[tuple], + aoe_param="AoE_Timecorr", + aoe_range: list[float] = None, + title="Compton Bands", + density=True, + n_bins=50, + figsize=[12, 8], + fontsize=12, +) -> None: """ Function to plot various compton bands to check energy dependence and corrections """ plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize - + fig = plt.figure() - + for erange in eranges: try: - select_df = data.query(f'{aoe_class.selection_string}&{aoe_class.cal_energy_param}>{erange[0]}&{aoe_class.cal_energy_param}<{erange[1]}&{aoe_param}=={aoe_param}') + select_df = data.query( + f"{aoe_class.selection_string}&{aoe_class.cal_energy_param}>{erange[0]}&{aoe_class.cal_energy_param}<{erange[1]}&{aoe_param}=={aoe_param}" + ) if aoe_range is not None: - select_df = select_df.query(f'{aoe_param}>{aoe_range[0]}&{aoe_param}<{aoe_range[1]}') + select_df = select_df.query( + f"{aoe_param}>{aoe_range[0]}&{aoe_param}<{aoe_range[1]}" + ) bins = np.linspace(aoe_range[0], aoe_range[1], n_bins) else: bins = np.linspace(0.85, 1.05, n_bins) @@ -2102,7 +2370,8 @@ def plot_compt_bands_overlayed(aoe_class, label=f"{erange[0]}-{erange[1]}", density=density, ) - except:pass + except: + pass plt.ylabel("counts") plt.xlabel(aoe_param) plt.title(title) @@ -2110,54 +2379,60 @@ def plot_compt_bands_overlayed(aoe_class, plt.close() return fig -def plot_dt_dep(aoe_class, - data, - eranges: list[tuple], - titles:list=None, - aoe_param = "AoE_Timecorr", - bins=[200, 100], - dt_max = 2000, - figsize=[12, 8], fontsize=12 - ) -> None: + +def plot_dt_dep( + aoe_class, + data, + eranges: list[tuple], + titles: list = None, + aoe_param="AoE_Timecorr", + bins=[200, 100], + dt_max=2000, + figsize=[12, 8], + fontsize=12, +) -> None: """ Function to produce 2d histograms of A/E against drift time to check dependencies """ plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize - + fig = plt.figure() - for i,erange in enumerate(eranges): + for i, erange in enumerate(eranges): try: - plt.subplot(3, 2, i+1) - select_df = data.query(f'{aoe_class.selection_string}&{aoe_class.cal_energy_param}<{erange[1]}&{aoe_class.cal_energy_param}>{erange[0]}&{aoe_param}=={aoe_param}') - - hist, bs, var = pgh.get_hist( - select_df[aoe_param], bins=500 + plt.subplot(3, 2, i + 1) + select_df = data.query( + f"{aoe_class.selection_string}&{aoe_class.cal_energy_param}<{erange[1]}&{aoe_class.cal_energy_param}>{erange[0]}&{aoe_param}=={aoe_param}" ) + + hist, bs, var = pgh.get_hist(select_df[aoe_param], bins=500) bin_cs = (bs[1:] + bs[:-1]) / 2 mu = bin_cs[np.argmax(hist)] aoe_range = [mu * 0.9, mu * 1.1] - - final_df = select_df.query(f'{aoe_param}<{aoe_range[1]}&{aoe_param}>{aoe_range[0]}&{aoe_class.dt_param}<{dt_max}') - plt.hist2d(final_df[aoe_param], final_df[aoe_class.dt_param], - bins=bins, norm=LogNorm()) + final_df = select_df.query( + f"{aoe_param}<{aoe_range[1]}&{aoe_param}>{aoe_range[0]}&{aoe_class.dt_param}<{dt_max}" + ) + plt.hist2d( + final_df[aoe_param], + final_df[aoe_class.dt_param], + bins=bins, + norm=LogNorm(), + ) plt.ylabel("drift time (ns)") plt.xlabel("A/E") if titles is None: - plt.title(f'{erange[0]}-{erange[1]}') + plt.title(f"{erange[0]}-{erange[1]}") else: plt.title(titles[i]) - except:pass + except: + pass plt.tight_layout() plt.close() return fig -def plot_mean_fit(aoe_class, - data, - figsize=[12, 8], fontsize=12 - ) -> plt.figure: +def plot_mean_fit(aoe_class, data, figsize=[12, 8], fontsize=12) -> plt.figure: plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True) @@ -2166,18 +2441,23 @@ def plot_mean_fit(aoe_class, aoe_class.energy_corr_fits.index, aoe_class.energy_corr_fits["mean"], yerr=aoe_class.energy_corr_fits["mean_err"], - xerr=aoe_class.comptBands_width/2, + xerr=aoe_class.comptBands_width / 2, label="data", linestyle=" ", ) - ax1.plot(aoe_class.energy_corr_fits.index, - aoe_class.mean_func.func(aoe_class.energy_corr_fits.index, - **aoe_class.energy_corr_res_dict["mean_fits"]["pars"]), label="linear model") + ax1.plot( + aoe_class.energy_corr_fits.index, + aoe_class.mean_func.func( + aoe_class.energy_corr_fits.index, + **aoe_class.energy_corr_res_dict["mean_fits"]["parameters"], + ), + label="linear model", + ) ax1.errorbar( 1592, - aoe_class.energy_corr_res_dict["dep_fit"]["pars"]['mu'], - yerr=aoe_class.energy_corr_res_dict["dep_fit"]["errs"]['mu'], + aoe_class.energy_corr_res_dict["dep_fit"]["parameters"]["mu"], + yerr=aoe_class.energy_corr_res_dict["dep_fit"]["uncertainties"]["mu"], label="DEP", color="green", linestyle=" ", @@ -2188,42 +2468,60 @@ def plot_mean_fit(aoe_class, ax1.set_ylabel("raw A/E (a.u.)", ha="right", y=1) ax2.scatter( aoe_class.energy_corr_fits.index, - 100 * (aoe_class.energy_corr_fits["mean"] - aoe_class.mean_func.func(aoe_class.energy_corr_fits.index, **aoe_class.energy_corr_res_dict["mean_fits"]["pars"])) / aoe_class.mean_func.func(aoe_class.energy_corr_fits.index, **aoe_class.energy_corr_res_dict["mean_fits"]["pars"]), + 100 + * ( + aoe_class.energy_corr_fits["mean"] + - aoe_class.mean_func.func( + aoe_class.energy_corr_fits.index, + **aoe_class.energy_corr_res_dict["mean_fits"]["parameters"], + ) + ) + / aoe_class.mean_func.func( + aoe_class.energy_corr_fits.index, + **aoe_class.energy_corr_res_dict["mean_fits"]["parameters"], + ), lw=1, c="b", ) ax2.scatter( 1592, - 100 * (aoe_class.energy_corr_res_dict["dep_fit"]["pars"]['mu'] - aoe_class.mean_func.func(1592, **aoe_class.energy_corr_res_dict["mean_fits"]["pars"])) / aoe_class.mean_func.func(1592, **aoe_class.energy_corr_res_dict["mean_fits"]["pars"]), + 100 + * ( + aoe_class.energy_corr_res_dict["dep_fit"]["parameters"]["mu"] + - aoe_class.mean_func.func( + 1592, **aoe_class.energy_corr_res_dict["mean_fits"]["parameters"] + ) + ) + / aoe_class.mean_func.func( + 1592, **aoe_class.energy_corr_res_dict["mean_fits"]["parameters"] + ), lw=1, c="g", ) - except:pass + except: + pass ax2.set_ylabel("residuals %", ha="right", y=1) ax2.set_xlabel("energy (keV)", ha="right", x=1) plt.tight_layout() plt.close() return fig -def plot_sigma_fit(aoe_class, - data, - figsize=[12, 8], fontsize=12 - ) -> plt.figure: +def plot_sigma_fit(aoe_class, data, figsize=[12, 8], fontsize=12) -> plt.figure: plt.rcParams["figure.figsize"] = figsize - plt.rcParams["font.size"] = fontsize - + plt.rcParams["font.size"] = fontsize + fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True) try: ax1.errorbar( aoe_class.energy_corr_fits.index, aoe_class.energy_corr_fits["sigma"], yerr=aoe_class.energy_corr_fits["sigma_err"], - xerr=aoe_class.comptBands_width/2, + xerr=aoe_class.comptBands_width / 2, label="data", linestyle=" ", ) - sig_pars = aoe_class.energy_corr_res_dict["sigma_fits"]["pars"] + sig_pars = aoe_class.energy_corr_res_dict["sigma_fits"]["parameters"] if aoe_class.sigma_func == sigma_fit: label = f'sqrt model: \nsqrt({sig_pars["a"]:1.4f}+({sig_pars["b"]:1.1f}/E)^{sig_pars["c"]:1.1f})' elif aoe_class.sigma_func == sigma_fit_quadratic: @@ -2232,13 +2530,13 @@ def plot_sigma_fit(aoe_class, raise ValueError("unknown sigma function") ax1.plot( aoe_class.energy_corr_fits.index, - aoe_class.sigma_func.func(aoe_class.energy_corr_fits.index,**sig_pars), + aoe_class.sigma_func.func(aoe_class.energy_corr_fits.index, **sig_pars), label=label, ) ax1.errorbar( 1592, - aoe_class.energy_corr_res_dict["dep_fit"]["pars"]['sigma'], - yerr=aoe_class.energy_corr_res_dict["dep_fit"]["errs"]['sigma'], + aoe_class.energy_corr_res_dict["dep_fit"]["parameters"]["sigma"], + yerr=aoe_class.energy_corr_res_dict["dep_fit"]["uncertainies"]["sigma"], label="DEP", color="green", linestyle=" ", @@ -2247,29 +2545,40 @@ def plot_sigma_fit(aoe_class, ax1.legend(title="A/E stdev energy dependence", frameon=False) ax2.scatter( aoe_class.energy_corr_fits.index, - 100 * (aoe_class.energy_corr_fits["sigma"] - aoe_class.sigma_func.func(aoe_class.energy_corr_fits.index, **sig_pars)) / aoe_class.sigma_func.func(aoe_class.energy_corr_fits.index, **sig_pars), + 100 + * ( + aoe_class.energy_corr_fits["sigma"] + - aoe_class.sigma_func.func( + aoe_class.energy_corr_fits.index, **sig_pars + ) + ) + / aoe_class.sigma_func.func(aoe_class.energy_corr_fits.index, **sig_pars), lw=1, c="b", ) ax2.scatter( 1592, - 100 * (aoe_class.energy_corr_res_dict["dep_fit"]["pars"]['sigma'] - aoe_class.sigma_func.func(1592, **sig_pars)) / aoe_class.sigma_func.func(1592, **sig_pars), + 100 + * ( + aoe_class.energy_corr_res_dict["dep_fit"]["parameters"]["sigma"] + - aoe_class.sigma_func.func(1592, **sig_pars) + ) + / aoe_class.sigma_func.func(1592, **sig_pars), lw=1, c="g", ) - except:pass + except: + pass ax2.set_ylabel("residuals", ha="right", y=1) ax2.set_xlabel("energy (keV)", ha="right", x=1) plt.tight_layout() plt.close() return fig - -def plot_cut_fit(aoe_class, - data, - figsize=[12, 8], fontsize=12 - ) -> plt.figure: + + +def plot_cut_fit(aoe_class, data, figsize=[12, 8], fontsize=12) -> plt.figure: plt.rcParams["figure.figsize"] = figsize - plt.rcParams["font.size"] = fontsize + plt.rcParams["font.size"] = fontsize fig = plt.figure() try: plt.errorbar( @@ -2279,9 +2588,19 @@ def plot_cut_fit(aoe_class, linestyle=" ", ) - plt.plot(aoe_class.cut_fits.index, sigmoid_fit.func(aoe_class.cut_fits.index.to_numpy(), - **aoe_class.cut_fit["pars"])) - plt.hlines((100 * aoe_class.dep_acc), -8.1, aoe_class.low_cut_val, color="red", linestyle="--") + plt.plot( + aoe_class.cut_fits.index, + sigmoid_fit.func( + aoe_class.cut_fits.index.to_numpy(), **aoe_class.cut_fit["parameters"] + ), + ) + plt.hlines( + (100 * aoe_class.dep_acc), + -8.1, + aoe_class.low_cut_val, + color="red", + linestyle="--", + ) plt.vlines( aoe_class.low_cut_val, np.nanmin(aoe_class.cut_fits["sf"]) * 0.9, @@ -2291,26 +2610,31 @@ def plot_cut_fit(aoe_class, ) plt.xlim([-8.1, 0.1]) vals, labels = plt.yticks() - plt.yticks(vals, [f'{x:,.0f} %' for x in vals]) + plt.yticks(vals, [f"{x:,.0f} %" for x in vals]) plt.ylim([np.nanmin(aoe_class.cut_fits["sf"]) * 0.9, 102]) - except:pass + except: + pass plt.xlabel("cut value") plt.ylabel("survival percentage") plt.close() return fig -def plot_survival_fraction_curves(aoe_class, - data, - figsize=[12, 8], fontsize=12 - ) -> plt.figure: - + +def plot_survival_fraction_curves( + aoe_class, data, figsize=[12, 8], fontsize=12 +) -> plt.figure: plt.rcParams["figure.figsize"] = figsize - plt.rcParams["font.size"] = fontsize - + plt.rcParams["font.size"] = fontsize + fig = plt.figure() try: - plt.vlines(aoe_class.low_cut_val, 0, 100, label=f"cut value: {aoe_class.low_cut_val:1.2f}", color="black") - + plt.vlines( + aoe_class.low_cut_val, + 0, + 100, + label=f"cut value: {aoe_class.low_cut_val:1.2f}", + color="black", + ) for peak, survival_df in aoe_class.low_side_peak_dfs.items(): try: @@ -2318,12 +2642,14 @@ def plot_survival_fraction_curves(aoe_class, survival_df.index, survival_df["sf"], yerr=survival_df["sf_err"], - label=f'{get_peak_label(peak)} {peak} keV: {aoe_class.low_side_sf.loc[peak]["sf"]:2.1f} +/- {aoe_class.low_side_sf.loc[peak]["sf_err"]:2.1f} %' + label=f'{get_peak_label(peak)} {peak} keV: {aoe_class.low_side_sf.loc[peak]["sf"]:2.1f} +/- {aoe_class.low_side_sf.loc[peak]["sf_err"]:2.1f} %', ) - except:pass - except:pass + except: + pass + except: + pass vals, labels = plt.yticks() - plt.yticks(vals, [f'{x:,.0f} %' for x in vals]) + plt.yticks(vals, [f"{x:,.0f} %" for x in vals]) plt.legend(loc="upper right") plt.xlabel("cut value") plt.ylabel("survival percentage") @@ -2331,18 +2657,20 @@ def plot_survival_fraction_curves(aoe_class, plt.close() return fig -def plot_spectra(aoe_class, - data, - xrange=(900, 3000), - n_bins=2101, - xrange_inset = (1580, 1640), - n_bins_inset = 200, - figsize=[12, 8], fontsize=12 - ) -> plt.figure: - + +def plot_spectra( + aoe_class, + data, + xrange=(900, 3000), + n_bins=2101, + xrange_inset=(1580, 1640), + n_bins_inset=200, + figsize=[12, 8], + fontsize=12, +) -> plt.figure: plt.rcParams["figure.figsize"] = figsize - plt.rcParams["font.size"] = fontsize - + plt.rcParams["font.size"] = fontsize + fig, ax = plt.subplots() try: bins = np.linspace(xrange[0], xrange[1], n_bins) @@ -2353,19 +2681,25 @@ def plot_spectra(aoe_class, label="before PSD", ) ax.hist( - data.query(f"{aoe_class.selection_string}&AoE_Low_Cut")[aoe_class.cal_energy_param], + data.query(f"{aoe_class.selection_string}&AoE_Low_Cut")[ + aoe_class.cal_energy_param + ], bins=bins, histtype="step", label="low side PSD cut", ) ax.hist( - data.query(f"{aoe_class.selection_string}&AoE_Double_Sided_Cut")[aoe_class.cal_energy_param], + data.query(f"{aoe_class.selection_string}&AoE_Double_Sided_Cut")[ + aoe_class.cal_energy_param + ], bins=bins, histtype="step", label="double sided PSD cut", ) ax.hist( - data.query(f"{aoe_class.selection_string} & (~AoE_Double_Sided_Cut)")[aoe_class.cal_energy_param], + data.query(f"{aoe_class.selection_string} & (~AoE_Double_Sided_Cut)")[ + aoe_class.cal_energy_param + ], bins=bins, histtype="step", label="rejected by PSD cut", @@ -2373,28 +2707,37 @@ def plot_spectra(aoe_class, axins = ax.inset_axes([0.25, 0.07, 0.4, 0.3]) bins = np.linspace(xrange_inset[0], xrange_inset[1], n_bins_inset) - select_df = data.query(f"{aoe_class.cal_energy_param}<{xrange_inset[1]}&{aoe_class.cal_energy_param}>{xrange_inset[0]}") + select_df = data.query( + f"{aoe_class.cal_energy_param}<{xrange_inset[1]}&{aoe_class.cal_energy_param}>{xrange_inset[0]}" + ) axins.hist( select_df.query(aoe_class.selection_string)[aoe_class.cal_energy_param], bins=bins, histtype="step", ) axins.hist( - select_df.query(f"{aoe_class.selection_string}&AoE_Low_Cut")[aoe_class.cal_energy_param], + select_df.query(f"{aoe_class.selection_string}&AoE_Low_Cut")[ + aoe_class.cal_energy_param + ], bins=bins, histtype="step", ) axins.hist( - select_df.query(f"{aoe_class.selection_string}&AoE_Double_Sided_Cut")[aoe_class.cal_energy_param], + select_df.query(f"{aoe_class.selection_string}&AoE_Double_Sided_Cut")[ + aoe_class.cal_energy_param + ], bins=bins, histtype="step", ) axins.hist( - select_df.query(f"{aoe_class.selection_string} & (~AoE_Double_Sided_Cut)")[aoe_class.cal_energy_param], + select_df.query(f"{aoe_class.selection_string} & (~AoE_Double_Sided_Cut)")[ + aoe_class.cal_energy_param + ], bins=bins, histtype="step", ) - except:pass + except: + pass ax.set_xlim(xrange) ax.set_yscale("log") plt.xlabel("energy (keV)") @@ -2403,58 +2746,67 @@ def plot_spectra(aoe_class, plt.close() return fig -def plot_sf_vs_energy(aoe_class, - data, - xrange = (900, 3000), - n_bins=701, - figsize=[12, 8], fontsize=12 - ) -> plt.figure: - + +def plot_sf_vs_energy( + aoe_class, data, xrange=(900, 3000), n_bins=701, figsize=[12, 8], fontsize=12 +) -> plt.figure: plt.rcParams["figure.figsize"] = figsize - plt.rcParams["font.size"] = fontsize - + plt.rcParams["font.size"] = fontsize + fig = plt.figure() try: bins = np.linspace(xrange[0], xrange[1], n_bins) counts_pass, bins_pass, _ = pgh.get_hist( - data.query(f"{aoe_class.selection_string}&AoE_Double_Sided_Cut")[aoe_class.cal_energy_param], + data.query(f"{aoe_class.selection_string}&AoE_Double_Sided_Cut")[ + aoe_class.cal_energy_param + ], + bins=bins, + ) + counts, bins, _ = pgh.get_hist( + data.query(aoe_class.selection_string)[aoe_class.cal_energy_param], bins=bins, ) - counts, bins, _ = pgh.get_hist(data.query(aoe_class.selection_string)[aoe_class.cal_energy_param], bins=bins) survival_fracs = counts_pass / (counts + 10**-99) - plt.step(pgh.get_bin_centers(bins_pass), 100*survival_fracs) - except:pass + plt.step(pgh.get_bin_centers(bins_pass), 100 * survival_fracs) + except: + pass plt.ylim([0, 100]) vals, labels = plt.yticks() - plt.yticks(vals, [f'{x:,.0f} %' for x in vals]) + plt.yticks(vals, [f"{x:,.0f} %" for x in vals]) plt.xlabel("energy (keV)") plt.ylabel("survival percentage") plt.close() return fig -def plot_classifier(aoe_class, - data, - aoe_param="AoE_Classifier", - xrange = (900, 3000), - yrange=(-50,10), - xn_bins=700, - yn_bins=500, - figsize=[12, 8], fontsize=12 - ) -> plt.figure: - + +def plot_classifier( + aoe_class, + data, + aoe_param="AoE_Classifier", + xrange=(900, 3000), + yrange=(-50, 10), + xn_bins=700, + yn_bins=500, + figsize=[12, 8], + fontsize=12, +) -> plt.figure: plt.rcParams["figure.figsize"] = figsize - plt.rcParams["font.size"] = fontsize - + plt.rcParams["font.size"] = fontsize + fig = plt.figure() try: - plt.hist2d(data.query(aoe_class.selection_string)[aoe_class.cal_energy_param] , - data.query(aoe_class.selection_string)[aoe_param], - bins=[np.linspace(xrange[0], xrange[1], xn_bins), - np.linspace(yrange[0], yrange[1], yn_bins)], - norm=LogNorm() - ) - except:pass + plt.hist2d( + data.query(aoe_class.selection_string)[aoe_class.cal_energy_param], + data.query(aoe_class.selection_string)[aoe_param], + bins=[ + np.linspace(xrange[0], xrange[1], xn_bins), + np.linspace(yrange[0], yrange[1], yn_bins), + ], + norm=LogNorm(), + ) + except: + pass plt.xlabel("energy (keV)") plt.ylabel(aoe_param) plt.xlim(xrange) @@ -2462,27 +2814,29 @@ def plot_classifier(aoe_class, plt.close() return fig -def aoe_calibration(files, - lh5_path:str, - cal_dicts: dict, - current_param:str, - energy_param:str, - cal_energy_param: str, - eres_func: Callable, - pdf:Callable=standard_aoe, - cut_field:str = "is_valid_cal", - dt_corr: bool = False, - dep_correct: bool = False, - dt_cut: dict = None, - high_cut_val: int = 3, - mean_func:Callable=pol1, - sigma_func:Callable=sigma_fit, - dep_acc:float = 0.9, - dt_param:str = "dt_eff", - comptBands_width:int=20, - plot_options:dict={}, - threshold:int=800 - ): + +def aoe_calibration( + files, + lh5_path: str, + cal_dicts: dict, + current_param: str, + energy_param: str, + cal_energy_param: str, + eres_func: Callable, + pdf: Callable = standard_aoe, + cut_field: str = "is_valid_cal", + dt_corr: bool = False, + dep_correct: bool = False, + dt_cut: dict = None, + high_cut_val: int = 3, + mean_func: Callable = pol1, + sigma_func: Callable = sigma_fit, + dep_acc: float = 0.9, + dt_param: str = "dt_eff", + comptBands_width: int = 20, + plot_options: dict = {}, + threshold: int = 800, +): params = [ current_param, "tp_0_est", @@ -2491,46 +2845,43 @@ def aoe_calibration(files, energy_param, cal_energy_param, cut_field, - ] + ] - aoe = cal_aoe( - cal_dicts, - cal_energy_param, - eres_func, - pdf, - f"{cut_field}&is_not_pulser", - dt_corr, - dep_acc, - dep_correct, - dt_cut, - dt_param, - high_cut_val, - mean_func, - sigma_func, - comptBands_width, - plot_options - ) + aoe = cal_aoe( + cal_dicts, + cal_energy_param, + eres_func, + pdf, + f"{cut_field}&is_not_pulser", + dt_corr, + dep_acc, + dep_correct, + dt_cut, + dt_param, + high_cut_val, + mean_func, + sigma_func, + comptBands_width, + plot_options, + ) if dt_cut is not None: params.append(dt_cut["out_param"]) data = load_data( - files, - lh5_path, - aoe.cal_dicts, - params, - cal_energy_param, - threshold - ) + files, lh5_path, aoe.cal_dicts, params, cal_energy_param, threshold + ) data["AoE_Uncorr"] = np.divide(data[current_param], data[energy_param]) - - aoe.update_cal_dicts({"AoE_Uncorr": - {"expression":f"{current_param}/{energy_param}", - "parameters":{} - }} - ) + aoe.update_cal_dicts( + { + "AoE_Uncorr": { + "expression": f"{current_param}/{energy_param}", + "parameters": {}, + } + } + ) aoe.calibrate(data, "AoE_Uncorr") log.info(f"Calibrated A/E") - return cal_dicts, aoe.get_results_dict(), aoe.fill_plot_dict(data), aoe \ No newline at end of file + return cal_dicts, aoe.get_results_dict(), aoe.fill_plot_dict(data), aoe diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py index 578d44867..acc8ed77c 100644 --- a/src/pygama/pargen/ecal_th.py +++ b/src/pygama/pargen/ecal_th.py @@ -89,32 +89,34 @@ def gen_pars_dict(pars, deg, energy_param): return out_dict + class fwhm_linear: - def func(x,a,b): + def func(x, a, b): return np.sqrt(a + b * x) - + def string_func(input_param): return f"(a+b*{input_param})**(0.5)" - + def guess(xs, ys, y_errs): - return [np.nanmin(ys), 10**-3] - + return [np.nanmin(ys), 10**-3] + def bounds(): - return [(0,None),(0,None)] - + return [(0, None), (0, None)] + + class fwhm_quadratic: - def func(x, a, b, c): - return np.sqrt(a + b * x + c*x**2) - + return np.sqrt(a + b * x + c * x**2) + def string_func(input_param): return f"(a+b*{input_param}+c*{input_param}**2)**(0.5)" - + def guess(xs, ys, y_errs): - return [np.nanmin(ys), 10**-3, 10**-5] - + return [np.nanmin(ys), 10**-3, 10**-5] + def bounds(): - return [(0,None),(0,None),(0,None)] + return [(0, None), (0, None), (0, None)] + class calibrate_parameter: glines = [ @@ -132,8 +134,8 @@ class calibrate_parameter: (20, 20), (30, 30), (30, 30), - (40, 25), - (25, 40), + (40, 20), + (20, 40), (40, 40), (60, 60), ] # side bands width @@ -161,7 +163,7 @@ class calibrate_parameter: def __init__( self, energy_param, - selection_string = "is_usable", + selection_string="is_usable", plot_options: dict = None, guess_keV: float | None = None, threshold: int = 0, @@ -169,7 +171,7 @@ def __init__( n_events: int = None, simplex: bool = True, deg: int = 1, - cal_energy_param:str = None + cal_energy_param: str = None, ): self.energy_param = energy_param if cal_energy_param is None: @@ -228,7 +230,7 @@ def fit_energy_res(self): try: if 2614.50 not in fwhm_peaks: raise RuntimeError - + c_lin = cost.LeastSquares( fwhm_peaks, fit_fwhms, fit_dfwhms, fwhm_linear.func ) @@ -240,57 +242,61 @@ def fit_energy_res(self): m_lin.hesse() rng = np.random.default_rng(1) - pars_b = rng.multivariate_normal(m_lin.values, - m_lin.covariance, size=1000) + pars_b = rng.multivariate_normal(m_lin.values, m_lin.covariance, size=1000) fits = np.array([fwhm_linear.func(fwhm_peaks, *par_b) for par_b in pars_b]) qbb_vals = np.array([fwhm_linear.func(2039.0, *par_b) for par_b in pars_b]) qbb_err = np.nanstd(qbb_vals) predicted_fwhms = fwhm_linear.func(fwhm_peaks, *m_lin.values) fit_qbb = fwhm_linear.func(2039.0, *m_lin.values) - - p_val = scipy.stats.chi2.sf(m_lin.fval, len(fwhm_peaks)-len(m_lin.values)) - - self.fwhm_fit_linear = {"function":fwhm_linear.__name__, - "module":fwhm_linear.__module__, - "expression":fwhm_linear.string_func("x"), - "Qbb_fwhm(keV)": fit_qbb, - "Qbb_fwhm_err(keV)":qbb_err, - "pars":m_lin.values, - "errors":m_lin.errors, - "cov":m_lin.covariance, - "csqr": (m_lin.fval, len(fwhm_peaks)-len(m_lin.values)), - "p_val":p_val} - - + + p_val = scipy.stats.chi2.sf(m_lin.fval, len(fwhm_peaks) - len(m_lin.values)) + + self.fwhm_fit_linear = { + "function": fwhm_linear.__name__, + "module": fwhm_linear.__module__, + "expression": fwhm_linear.string_func("x"), + "Qbb_fwhm(keV)": fit_qbb, + "Qbb_fwhm_err(keV)": qbb_err, + "parameters": m_lin.values, + "uncertainties": m_lin.errors, + "cov": m_lin.covariance, + "csqr": (m_lin.fval, len(fwhm_peaks) - len(m_lin.values)), + "p_val": p_val, + } + log.info(f'FWHM linear fit: {self.fwhm_fit_linear["pars"].to_dict()}') log.info(f"FWHM fit values:") log.info(f"\t Energy | FWHM (keV) | Predicted (keV)") for i, (peak, fwhm, fwhme) in enumerate( - zip(fwhm_peaks, fit_fwhms, fit_dfwhms) - ): - log.info( - f"\t{i}".ljust(4) - + str(peak).ljust(9) - + f"| {fwhm:.2f}+-{fwhme:.2f} ".ljust(5) - +f"| {fwhm_linear.func(peak, *self.fwhm_fit_linear['pars']):.2f}".ljust(5) - ) - + zip(fwhm_peaks, fit_fwhms, fit_dfwhms) + ): + log.info( + f"\t{i}".ljust(4) + + str(peak).ljust(9) + + f"| {fwhm:.2f}+-{fwhme:.2f} ".ljust(5) + + f"| {fwhm_linear.func(peak, *self.fwhm_fit_linear['parameters']):.2f}".ljust( + 5 + ) + ) + log.info( f"FWHM energy resolution at Qbb (linear fit): {fit_qbb:1.2f} +- {qbb_err:1.2f} keV" - ) + ) except RuntimeError: log.error(f"FWHM linear fit failed for {self.energy_param}") pars, errs, cov = return_nans(fwhm_linear.func) - self.fwhm_fit_linear = {"function":fwhm_linear.__name__, - "module":fwhm_linear.__module__, - "expression":fwhm_linear.string_func("x"), - "Qbb_fwhm(keV)": np.nan, - "Qbb_fwhm_err(keV)":np.nan, - "pars":pars, - "errors":errs, - "cov":cov, - "csqr":(np.nan, np.nan), - "p_val":0} + self.fwhm_fit_linear = { + "function": fwhm_linear.__name__, + "module": fwhm_linear.__module__, + "expression": fwhm_linear.string_func("x"), + "Qbb_fwhm(keV)": np.nan, + "Qbb_fwhm_err(keV)": np.nan, + "parameters": pars, + "uncertainties": errs, + "cov": cov, + "csqr": (np.nan, np.nan), + "p_val": 0, + } log.error("FWHM linear fit failed to converge") try: if 2614.50 not in fwhm_peaks: @@ -299,51 +305,65 @@ def fit_energy_res(self): fwhm_peaks, fit_fwhms, fit_dfwhms, fwhm_quadratic.func ) c_quad.loss = "soft_l1" - m_quad = Minuit(c_quad, *fwhm_quadratic.guess(fwhm_peaks, fit_fwhms, fit_dfwhms)) + m_quad = Minuit( + c_quad, *fwhm_quadratic.guess(fwhm_peaks, fit_fwhms, fit_dfwhms) + ) m_quad.limits = fwhm_quadratic.bounds() m_quad.simplex() m_quad.migrad() m_quad.hesse() - + rng = np.random.default_rng(1) - pars_b = rng.multivariate_normal(m_quad.values, - m_quad.covariance, size=1000) - fits = np.array([fwhm_quadratic.func(fwhm_peaks, *par_b) for par_b in pars_b]) - qbb_vals = np.array([fwhm_quadratic.func(2039.0, *par_b) for par_b in pars_b]) + pars_b = rng.multivariate_normal( + m_quad.values, m_quad.covariance, size=1000 + ) + fits = np.array( + [fwhm_quadratic.func(fwhm_peaks, *par_b) for par_b in pars_b] + ) + qbb_vals = np.array( + [fwhm_quadratic.func(2039.0, *par_b) for par_b in pars_b] + ) qbb_err = np.nanstd(qbb_vals) predicted_fwhms = fwhm_quadratic.func(fwhm_peaks, *m_quad.values) fit_qbb = fwhm_quadratic.func(2039.0, *m_quad.values) - - p_val = scipy.stats.chi2.sf(m_quad.fval, len(fwhm_peaks)-len(m_quad.values)) - - self.fwhm_fit_quadratic = {"function":fwhm_quadratic.__name__, - "module":fwhm_quadratic.__module__, - "expression":fwhm_quadratic.string_func("x"), - "Qbb_fwhm(keV)": fit_qbb, - "Qbb_fwhm_err(keV)":qbb_err, - "pars":m_quad.values, - "errors":m_quad.errors, - "cov":m_quad.covariance, - "csqr": (m_quad.fval, len(fwhm_peaks)-len(m_quad.values)), - "p_val":p_val - } - log.info(f'FWHM quadratic fit: {self.fwhm_fit_quadratic["pars"].to_dict()}') + + p_val = scipy.stats.chi2.sf( + m_quad.fval, len(fwhm_peaks) - len(m_quad.values) + ) + + self.fwhm_fit_quadratic = { + "function": fwhm_quadratic.__name__, + "module": fwhm_quadratic.__module__, + "expression": fwhm_quadratic.string_func("x"), + "Qbb_fwhm(keV)": fit_qbb, + "Qbb_fwhm_err(keV)": qbb_err, + "parameters": m_quad.values, + "uncertainties": m_quad.errors, + "cov": m_quad.covariance, + "csqr": (m_quad.fval, len(fwhm_peaks) - len(m_quad.values)), + "p_val": p_val, + } + log.info( + f'FWHM quadratic fit: {self.fwhm_fit_quadratic["parameters"].to_dict()}' + ) log.info( f"FWHM energy resolution at Qbb (quadratic fit): {fit_qbb:1.2f} +- {qbb_err:1.2f} keV" - ) + ) except RuntimeError: log.error(f"FWHM quadratic fit failed for {self.energy_param}") pars, errs, cov = return_nans(fwhm_quadratic.func) - self.fwhm_fit_quadratic = {"function":fwhm_quadratic.__name__, - "module":fwhm_quadratic.__module__, - "expression":fwhm_quadratic.string_func("x"), - "Qbb_fwhm(keV)": np.nan, - "Qbb_fwhm_err(keV)":np.nan, - "pars":pars, - "errors":errs, - "cov":cov, - "csqr":(np.nan, np.nan), - "p_val":0} + self.fwhm_fit_quadratic = { + "function": fwhm_quadratic.__name__, + "module": fwhm_quadratic.__module__, + "expression": fwhm_quadratic.string_func("x"), + "Qbb_fwhm(keV)": np.nan, + "Qbb_fwhm_err(keV)": np.nan, + "parameters": pars, + "uncertainties": errs, + "cov": cov, + "csqr": (np.nan, np.nan), + "p_val": 0, + } log.error("FWHM quadratic fit failed to converge") def gen_pars_dict(self): @@ -373,63 +393,69 @@ def get_results_dict(self, data): return {} else: fwhm_linear = self.fwhm_fit_linear.copy() - fwhm_linear["pars"] = fwhm_linear['pars'].to_dict() - fwhm_linear["errors"] = fwhm_linear['errors'].to_dict() + fwhm_linear["parameters"] = fwhm_linear["parameters"].to_dict() + fwhm_linear["uncertainties"] = fwhm_linear["uncertainties"].to_dict() fwhm_linear["cov"] = fwhm_linear["cov"].tolist() fwhm_quad = self.fwhm_fit_quadratic.copy() - fwhm_quad["pars"] = fwhm_quad['pars'].to_dict() - fwhm_quad["errors"] = fwhm_quad['errors'].to_dict() + fwhm_quad["parameters"] = fwhm_quad["parameters"].to_dict() + fwhm_quad["uncertainties"] = fwhm_quad["uncertainties"].to_dict() fwhm_quad["cov"] = fwhm_quad["cov"].tolist() - pk_dict = {Ei:{"function":func_i.__name__, - "module":func_i.__module__, - "pars(uncal)":parsi.to_dict(), - "errs(uncal)":errorsi.to_dict(), - "p_val": pvali, - "fwhm (keV)": list(fwhmi)} - for i, (Ei, parsi, errorsi, pvali, fwhmi, func_i) in enumerate( - zip(self.results["fitted_keV"], - self.results["pk_pars"][self.results["pk_validities"]], - self.results["pk_errors"][self.results["pk_validities"]], - self.results["pk_pvals"][self.results["pk_validities"]], - self.results["pk_fwhms"], - self.funcs) - )} + pk_dict = { + Ei: { + "function": func_i.__name__, + "module": func_i.__module__, + "parameters_in_ADC": parsi.to_dict(), + "uncertainties_in_ADC": errorsi.to_dict(), + "p_val": pvali, + "fwhm_in_keV": list(fwhmi), + } + for i, (Ei, parsi, errorsi, pvali, fwhmi, func_i) in enumerate( + zip( + self.results["fitted_keV"], + self.results["pk_pars"][self.results["pk_validities"]], + self.results["pk_errors"][self.results["pk_validities"]], + self.results["pk_pvals"][self.results["pk_validities"]], + self.results["pk_fwhms"], + self.funcs, + ) + ) + } return { - "total_fep": len( - data.query( - f"{self.cal_energy_param}>2604&{self.cal_energy_param}<2624" - ) - ), - "total_dep": len( - data.query( - f"{self.cal_energy_param}>1587&{self.cal_energy_param}<1597" - ) - ), - "pass_fep": len( - data.query( - f"{self.cal_energy_param}>2604&{self.cal_energy_param}<2624&{self.selection_string}" - ) - ), - "pass_dep": len( - data.query( - f"{self.cal_energy_param}>1587&{self.cal_energy_param}<1597&{self.selection_string}" - ) - ), - "eres_linear": fwhm_linear, - "eres_quadratic":fwhm_quad, - "fitted_peaks": self.results["fitted_keV"].tolist(), - "pk_fits":pk_dict - } + "total_fep": len( + data.query( + f"{self.cal_energy_param}>2604&{self.cal_energy_param}<2624" + ) + ), + "total_dep": len( + data.query( + f"{self.cal_energy_param}>1587&{self.cal_energy_param}<1597" + ) + ), + "pass_fep": len( + data.query( + f"{self.cal_energy_param}>2604&{self.cal_energy_param}<2624&{self.selection_string}" + ) + ), + "pass_dep": len( + data.query( + f"{self.cal_energy_param}>1587&{self.cal_energy_param}<1597&{self.selection_string}" + ) + ), + "eres_linear": fwhm_linear, + "eres_quadratic": fwhm_quad, + "fitted_peaks": self.results["fitted_keV"].tolist(), + "pk_fits": pk_dict, + } def calibrate_parameter(self, data): kev_ranges = self.range_keV.copy() if self.guess_keV is None: self.guess_keV = 2620 / np.nanpercentile( - data.query(f"{self.selection_string} & {self.energy_param}>{self.threshold}")[ - self.energy_param - ], + data.query( + f"{self.selection_string} & {self.energy_param}>{self.threshold}" + )[self.energy_param], 99, ) @@ -457,8 +483,8 @@ def calibrate_parameter(self, data): if self.pars is None: raise ValueError - for i, peak in enumerate(self.results["got_peaks_keV"]): - idx = np.where(peak ==self.glines)[0][0] + for i, peak in enumerate(self.results["got_peaks_keV"]): + idx = np.where(peak == self.glines)[0][0] self.funcs[idx] = fitted_funcs[i] if fitted_funcs[i] == pgf.extended_radford_pdf: self.gof_funcs[idx] = pgf.radford_pdf @@ -469,7 +495,10 @@ def calibrate_parameter(self, data): fitted_peaks = np.array([]) fitted_funcs = np.array([]) - if len(fitted_peaks) != len(self.glines) or self.gof_funcs[-1]==pgf.gauss_step_pdf: + if ( + len(fitted_peaks) != len(self.glines) + or self.gof_funcs[-1] == pgf.gauss_step_pdf + ): if self.glines[-1] in fitted_peaks: if fitted_funcs[-1] == pgf.extended_gauss_step_pdf: self.funcs = [pgf.extended_gauss_step_pdf for entry in self.glines] @@ -489,7 +518,10 @@ def calibrate_parameter(self, data): > 0.05 ): index = np.where(self.glines == peak)[0][0] - kev_ranges[i] = (kev_ranges[index][0] - 5, kev_ranges[index][1] - 5) + kev_ranges[i] = ( + kev_ranges[index][0] - 5, + kev_ranges[index][1] - 5, + ) except: pass @@ -513,8 +545,8 @@ def calibrate_parameter(self, data): log.debug("Calibrated found") log.info(f"Calibration pars are {self.pars}") - for i, peak in enumerate(self.results["got_peaks_keV"]): - idx = np.where(peak ==self.glines)[0][0] + for i, peak in enumerate(self.results["got_peaks_keV"]): + idx = np.where(peak == self.glines)[0][0] self.funcs[idx] = fitted_funcs[i] if fitted_funcs[i] == pgf.extended_radford_pdf: self.gof_funcs[idx] = pgf.radford_pdf @@ -522,23 +554,19 @@ def calibrate_parameter(self, data): self.gof_funcs[idx] = pgf.gauss_step_pdf if self.pars is None: raise ValueError - + except: self.pars = np.full(self.deg + 1, np.nan) self.results = None - - log.error( - f"Calibration failed completely for {self.energy_param}" - ) + + log.error(f"Calibration failed completely for {self.energy_param}") else: log.debug("Calibrated found") log.info(f"Calibration pars are {self.pars}") if ~np.isnan(self.pars).all(): self.fit_energy_res() self.hit_dict[self.cal_energy_param] = self.gen_pars_dict() - data[f"{self.energy_param}_cal"] = pgf.poly( - data[self.energy_param], self.pars - ) + data[f"{self.energy_param}_cal"] = pgf.poly(data[self.energy_param], self.pars) def fill_plot_dict(self, data, plot_dict={}): for key, item in self.plot_options.items(): @@ -549,7 +577,6 @@ def fill_plot_dict(self, data, plot_dict={}): return plot_dict - class high_stats_fitting(calibrate_parameter): glines = [ 238.632, @@ -568,11 +595,11 @@ class high_stats_fitting(calibrate_parameter): 2614.50, 3125, 3198, - 3474 - ] # gamma lines used for calibration + 3474, + ] # gamma lines used for calibration range_keV = [ (10, 10), - (30,30), + (30, 30), (30, 30), (30, 30), (30, 15), @@ -588,9 +615,28 @@ class high_stats_fitting(calibrate_parameter): (30, 30), (30, 30), (30, 30), - ] # side bands width + ] # side bands width + binning = [ + 0.02, + 0.02, + 0.02, + 0.02, + 0.2, + 0.2, + 0.02, + 0.2, + 0.2, + 0.2, + 0.1, + 0.1, + 0.1, + 0.02, + 0.2, + 0.2, + 0.2, + ] funcs = [ - pgf.extended_gauss_step_pdf, #probably should be gauss on exp + pgf.extended_gauss_step_pdf, # probably should be gauss on exp pgf.extended_gauss_step_pdf, pgf.extended_radford_pdf, pgf.extended_radford_pdf, @@ -629,8 +675,16 @@ class high_stats_fitting(calibrate_parameter): pgf.gauss_step_pdf, ] - def __init__(self, energy_param, selection_string, threshold, p_val, - plot_options={}, simplex=False): + def __init__( + self, + energy_param, + selection_string, + threshold, + p_val, + plot_options={}, + simplex=False, + tail_weight=20, + ): self.energy_param = energy_param self.cal_energy_param = energy_param self.selection_string = selection_string @@ -640,69 +694,87 @@ def __init__(self, energy_param, selection_string, threshold, p_val, self.simplex = simplex self.results = {} self.plot_dict = {} - self.n_events=None + self.n_events = None self.output_dict = {} - self.pars=[1,0] - + self.pars = [1, 0] + self.tail_weight = tail_weight + def get_results_dict(self, data): if self.results: fwhm_linear = self.fwhm_fit_linear.copy() - fwhm_linear["pars"] = fwhm_linear['pars'].to_dict() - fwhm_linear["errors"] = fwhm_linear['errors'].to_dict() + fwhm_linear["parameters"] = fwhm_linear["parameters"].to_dict() + fwhm_linear["uncertainties"] = fwhm_linear["uncertainties"].to_dict() fwhm_linear["cov"] = fwhm_linear["cov"].tolist() fwhm_quad = self.fwhm_fit_quadratic.copy() - fwhm_quad["pars"] = fwhm_quad['pars'].to_dict() - fwhm_quad["errors"] = fwhm_quad['errors'].to_dict() + fwhm_quad["parameters"] = fwhm_quad["parameters"].to_dict() + fwhm_quad["uncertainties"] = fwhm_quad["uncertainties"].to_dict() fwhm_quad["cov"] = fwhm_quad["cov"].tolist() - - pk_dict = {Ei:{"function":func_i.__name__, - "module":func_i.__module__, - "pars(cal)":parsi.to_dict(), - "errs(cal)":errorsi.to_dict(), + + pk_dict = { + Ei: { + "function": func_i.__name__, + "module": func_i.__module__, + "parameters_in_keV": parsi.to_dict(), + "uncertainties_in_keV": errorsi.to_dict(), "p_val": pvali, - "fwhm (keV)": list(fwhmi)} - for i, (Ei, parsi, errorsi, pvali, fwhmi, func_i) in enumerate( - zip(self.results["fitted_keV"], - self.results["pk_pars"][self.results["pk_validities"]], - self.results["pk_errors"][self.results["pk_validities"]], - self.results["pk_pvals"][self.results["pk_validities"]], - self.results["pk_fwhms"], - self.funcs) - )} - + "fwhm_in_keV": list(fwhmi), + } + for i, (Ei, parsi, errorsi, pvali, fwhmi, func_i) in enumerate( + zip( + self.results["fitted_keV"], + self.results["pk_pars"][self.results["pk_validities"]], + self.results["pk_errors"][self.results["pk_validities"]], + self.results["pk_pvals"][self.results["pk_validities"]], + self.results["pk_fwhms"], + self.funcs, + ) + ) + } + return { - "eres_linear": fwhm_linear, - "eres_quadratic":fwhm_quad, - "fitted_peaks": self.results["fitted_keV"].tolist(), - "pk_fits":pk_dict + "eres_linear": fwhm_linear, + "eres_quadratic": fwhm_quad, + "fitted_peaks": self.results["fitted_keV"].tolist(), + "pk_fits": pk_dict, } else: return {} - def fit_peaks(self, data): log.debug(f"Fitting {self.energy_param}") try: - n_bins = [int((self.range_keV[i][1]+self.range_keV[i][0]) /0.2) for i in range(len(self.glines))] - pk_pars, pk_errors, pk_covs, pk_binws, pk_ranges, pk_pvals, valid_pks, pk_funcs = cal.hpge_fit_E_peaks( - data.query(self.selection_string)[self.energy_param], - self.glines, - self.range_keV, - n_bins=n_bins, - funcs=self.funcs, - method="unbinned", - gof_funcs=self.gof_funcs, - n_events=None, - allowed_p_val=self.p_val - ) - for idx, peak in enumerate(self.glines): - #idx = np.where(peak ==self.glines)[0][0] + n_bins = [ + int((self.range_keV[i][1] + self.range_keV[i][0]) / self.binning[i]) + for i in range(len(self.glines)) + ] + ( + pk_pars, + pk_errors, + pk_covs, + pk_binws, + pk_ranges, + pk_pvals, + valid_pks, + pk_funcs, + ) = cal.hpge_fit_E_peaks( + data.query(self.selection_string)[self.energy_param], + self.glines, + self.range_keV, + n_bins=n_bins, + funcs=self.funcs, + method="unbinned", + gof_funcs=self.gof_funcs, + n_events=None, + allowed_p_val=self.p_val, + tail_weight=20, + ) + for idx, peak in enumerate(self.glines): self.funcs[idx] = pk_funcs[idx] if pk_funcs[idx] == pgf.extended_radford_pdf: self.gof_funcs[idx] = pgf.radford_pdf else: - self.gof_funcs[idx] = pgf.gauss_step_pdf - + self.gof_funcs[idx] = pgf.gauss_step_pdf + self.results["got_peaks_keV"] = self.glines self.results["pk_pars"] = pk_pars self.results["pk_errors"] = pk_errors @@ -710,13 +782,15 @@ def fit_peaks(self, data): self.results["pk_binws"] = pk_binws self.results["pk_ranges"] = pk_ranges self.results["pk_pvals"] = pk_pvals - for i, pk in enumerate(self.results["got_peaks_keV"]): try: - if self.results["pk_pars"][i]["n_sig"]<10: + if self.results["pk_pars"][i]["n_sig"] < 10: valid_pks[i] = False - elif 2*self.results["pk_errors"][i]["n_sig"]>self.results["pk_pars"][i]["n_sig"]: + elif ( + 2 * self.results["pk_errors"][i]["n_sig"] + > self.results["pk_pars"][i]["n_sig"] + ): valid_pks[i] = False except: pass @@ -724,16 +798,16 @@ def fit_peaks(self, data): self.results["pk_validities"] = valid_pks # Drop failed fits - fitted_peaks_keV = self.results["fitted_keV"] = np.asarray(self.glines)[valid_pks] + fitted_peaks_keV = self.results["fitted_keV"] = np.asarray(self.glines)[ + valid_pks + ] pk_pars = np.asarray(pk_pars, dtype=object)[valid_pks] # ragged - pk_errors = np.asarray(pk_errors, dtype=object)[valid_pks] + pk_errors = np.asarray(pk_errors, dtype=object)[valid_pks] pk_covs = np.asarray(pk_covs, dtype=object)[valid_pks] pk_binws = np.asarray(pk_binws)[valid_pks] pk_ranges = np.asarray(pk_ranges)[valid_pks] pk_pvals = np.asarray(pk_pvals)[valid_pks] pk_funcs = np.asarray(pk_funcs)[valid_pks] - - log.info(f"{len(np.where(valid_pks)[0])} peaks fitted:") for i, (Ei, parsi, errorsi, covsi, func_i) in enumerate( @@ -778,7 +852,6 @@ def fit_peaks(self, data): except: self.results = {} log.debug(f"high stats fitting failed") - def get_peak_labels( @@ -814,7 +887,9 @@ def get_peak_label(peak: float) -> str: return "" -def plot_fits(ecal_class, data, figsize=[12, 8], fontsize=12, ncols=3, nrows=3, binning_keV=5): +def plot_fits( + ecal_class, data, figsize=[12, 8], fontsize=12, ncols=3, nrows=3, binning_keV=5 +): plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize @@ -828,7 +903,6 @@ def plot_fits(ecal_class, data, figsize=[12, 8], fontsize=12, ncols=3, nrows=3, if peak in fitted_peaks: fitted_gof_funcs.append(ecal_class.gof_funcs[i]) - mus = [ pgf.get_mu_func(func_i, pars_i) if pars_i is not None else np.nan for func_i, pars_i in zip(fitted_gof_funcs, pk_pars) @@ -841,7 +915,7 @@ def plot_fits(ecal_class, data, figsize=[12, 8], fontsize=12, ncols=3, nrows=3, range_adu = 5 / der[i] plt.subplot(nrows, ncols, i + 1) try: - binning = np.arange(pk_ranges[i][0], pk_ranges[i][1], 0.1/ der[i]) + binning = np.arange(pk_ranges[i][0], pk_ranges[i][1], 0.1 / der[i]) bin_cs = (binning[1:] + binning[:-1]) / 2 energies = data.query( f"{ecal_class.energy_param}>{pk_ranges[i][0]}&{ecal_class.energy_param}<{pk_ranges[i][1]}&{ecal_class.selection_string}" @@ -850,7 +924,9 @@ def plot_fits(ecal_class, data, figsize=[12, 8], fontsize=12, ncols=3, nrows=3, counts, bs, bars = plt.hist(energies, bins=binning, histtype="step") if pk_pars[i] is not None: - fit_vals = fitted_gof_funcs[i](bin_cs, *pk_pars[i][:-1], 0) * np.diff(bs)[0] + fit_vals = ( + fitted_gof_funcs[i](bin_cs, *pk_pars[i][:-1], 0) * np.diff(bs)[0] + ) plt.plot(bin_cs, fit_vals) plt.step( bin_cs, @@ -861,7 +937,9 @@ def plot_fits(ecal_class, data, figsize=[12, 8], fontsize=12, ncols=3, nrows=3, ) plt.annotate( - get_peak_label(fitted_peaks[i]), (0.02, 0.9), xycoords="axes fraction" + get_peak_label(fitted_peaks[i]), + (0.02, 0.9), + xycoords="axes fraction", ) plt.annotate( f"{fitted_peaks[i]:.1f} keV", (0.02, 0.8), xycoords="axes fraction" @@ -885,7 +963,13 @@ def plot_fits(ecal_class, data, figsize=[12, 8], fontsize=12, ncols=3, nrows=3, def plot_2614_timemap( - ecal_class, data, figsize=[12, 8], fontsize=12, erange=[2580, 2630], dx=1, time_dx=180 + ecal_class, + data, + figsize=[12, 8], + fontsize=12, + erange=[2580, 2630], + dx=1, + time_dx=180, ): plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize @@ -945,9 +1029,7 @@ def plot_pulser_timemap( else: mean = np.nanpercentile(selection[ecal_class.cal_energy_param], 50) - spread = mean - np.nanpercentile( - selection[ecal_class.cal_energy_param], 10 - ) + spread = mean - np.nanpercentile(selection[ecal_class.cal_energy_param], 10) plt.hist2d( selection["timestamp"], @@ -1150,31 +1232,59 @@ def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsiz qbb_line_vx = [2039.0, 2039.0] qbb_line_vy = [ - 0.9 * np.nanmin(fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["pars"])), - np.nanmax([ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"],ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"]]) + 0.9 + * np.nanmin( + fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"]) + ), + np.nanmax( + [ + ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"], + ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"], + ] + ), ] qbb_line_hx = [erange[0], 2039.0] ax1.plot( - fwhm_slope_bins, fwhm_linear.func(fwhm_slope_bins, - *ecal_class.fwhm_fit_linear["pars"]), lw=1, c="g", - label=f'linear, Qbb fwhm: {ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"]:1.2f} +- {ecal_class.fwhm_fit_linear["Qbb_fwhm_err(keV)"]:1.2f} keV' + fwhm_slope_bins, + fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"]), + lw=1, + c="g", + label=f'linear, Qbb fwhm: {ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"]:1.2f} +- {ecal_class.fwhm_fit_linear["Qbb_fwhm_err(keV)"]:1.2f} keV', + ) + ax1.plot( + fwhm_slope_bins, + fwhm_quadratic.func( + fwhm_slope_bins, *ecal_class.fwhm_fit_quadratic["parameters"] + ), + lw=1, + c="b", + label=f'quadratic, Qbb fwhm: {ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"]:1.2f} +- {ecal_class.fwhm_fit_quadratic["Qbb_fwhm_err(keV)"]:1.2f} keV', ) ax1.plot( - fwhm_slope_bins, fwhm_quadratic.func(fwhm_slope_bins, - *ecal_class.fwhm_fit_quadratic["pars"]), lw=1, c="b", - label=f'quadratic, Qbb fwhm: {ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"]:1.2f} +- {ecal_class.fwhm_fit_quadratic["Qbb_fwhm_err(keV)"]:1.2f} keV' + qbb_line_hx, + [ + ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"], + ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"], + ], + lw=1, + c="r", + ls="--", + ) + ax1.plot( + qbb_line_hx, + [ + ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"], + ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"], + ], + lw=1, + c="r", + ls="--", ) - ax1.plot(qbb_line_hx, [ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"], - ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"]], - lw=1, c="r", ls="--") - ax1.plot(qbb_line_hx, [ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"], - ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"]], - lw=1, c="r", ls="--") ax1.plot(qbb_line_vx, qbb_line_vy, lw=1, c="r", ls="--") ax1.legend(loc="upper left", frameon=False) - if np.isnan(ecal_class.fwhm_fit_linear["pars"]).all(): + if np.isnan(ecal_class.fwhm_fit_linear["parameters"]).all(): [ 0.9 * np.nanmin(fit_fwhms), 1.1 * np.nanmax(fit_fwhms), @@ -1182,27 +1292,47 @@ def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsiz else: ax1.set_ylim( [ - 0.9 * np.nanmin(fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["pars"])), - 1.1 * np.nanmax(fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["pars"])), + 0.9 + * np.nanmin( + fwhm_linear.func( + fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"] + ) + ), + 1.1 + * np.nanmax( + fwhm_linear.func( + fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"] + ) + ), ] ) ax1.set_xlim(erange) ax1.set_ylabel("FWHM energy resolution (keV)") ax2.plot( fwhm_peaks, - (fit_fwhms - fwhm_linear.func(fwhm_peaks, *ecal_class.fwhm_fit_linear["pars"])) / fit_dfwhms, + ( + fit_fwhms + - fwhm_linear.func(fwhm_peaks, *ecal_class.fwhm_fit_linear["parameters"]) + ) + / fit_dfwhms, lw=0, marker="x", c="g", ) ax2.plot( fwhm_peaks, - (fit_fwhms - fwhm_quadratic.func(fwhm_peaks, *ecal_class.fwhm_fit_quadratic["pars"])) / fit_dfwhms, + ( + fit_fwhms + - fwhm_quadratic.func( + fwhm_peaks, *ecal_class.fwhm_fit_quadratic["parameters"] + ) + ) + / fit_dfwhms, lw=0, marker="x", c="b", ) - ax2.plot(erange,[0,0], color="black",lw=0.5) + ax2.plot(erange, [0, 0], color="black", lw=0.5) ax2.set_xlabel("Energy (keV)") ax2.set_ylabel("Normalised Residuals") plt.tight_layout() @@ -1218,9 +1348,7 @@ def bin_spectrum(ecal_class, data, erange=[0, 3000], dx=2): data.query(ecal_class.selection_string)[ecal_class.cal_energy_param], bins )[0], "cut_counts": np.histogram( - data.query("~is_valid_cal&is_not_pulser")[ - ecal_class.cal_energy_param - ], + data.query("~is_valid_cal&is_not_pulser")[ecal_class.cal_energy_param], bins, )[0], "pulser_counts": np.histogram( @@ -1236,9 +1364,7 @@ def bin_survival_fraction(ecal_class, data, erange=[0, 3000], dx=6): bins=np.arange(erange[0], erange[1] + dx, dx), ) counts_fail, bins_fail, _ = pgh.get_hist( - data.query("~is_valid_cal&is_not_pulser")[ - ecal_class.cal_energy_param - ], + data.query("~is_valid_cal&is_not_pulser")[ecal_class.cal_energy_param], bins=np.arange(erange[0], erange[1] + dx, dx), ) sf = 100 * (counts_pass + 10 ** (-6)) / (counts_pass + counts_fail + 10 ** (-6)) @@ -1247,7 +1373,7 @@ def bin_survival_fraction(ecal_class, data, erange=[0, 3000], dx=6): def energy_cal_th( files: list[str], - energy_params: list[str] , + energy_params: list[str], lh5_path: str = "dsp", hit_dict: dict = {}, cut_parameters: dict[str, int] = {"bl_mean": 4, "bl_std": 4, "pz_std": 4}, @@ -1260,12 +1386,11 @@ def energy_cal_th( guess_keV: float | None = None, deg: int = 1, ) -> tuple(dict, dict, dict, dict): - data = load_data( files, lh5_path, hit_dict, - params = energy_params + list(cut_parameters) + ["timestamp"] + params=energy_params + list(cut_parameters) + ["timestamp"], ) data, hit_dict = apply_cuts(data, hit_dict, cut_parameters, final_cut_field) @@ -1274,7 +1399,7 @@ def energy_cal_th( plot_dict = {} full_object_dict = {} for energy_param in energy_params: - ecal = calibrate_parameter( + full_object_dict[energy_param] = calibrate_parameter( energy_param, f"{final_cut_field}&is_not_pulser", plot_options, @@ -1285,13 +1410,15 @@ def energy_cal_th( simplex, deg, ) - ecal.calibrate_parameter(data) - results_dict[ecal.cal_energy_param] = ecal.get_results_dict(data) - hit_dict.update(ecal.hit_dict) - full_object_dict[ecal.cal_energy_param] = ecal - if ~np.isnan(ecal.pars).all(): - plot_dict[ecal.cal_energy_param] = ecal.fill_plot_dict(data) - + full_object_dict[energy_param].calibrate_parameter(data) + results_dict[ + full_object_dict[energy_param].cal_energy_param + ] = full_object_dict[energy_param].get_results_dict(data) + hit_dict.update(full_object_dict[energy_param].hit_dict) + if ~np.isnan(full_object_dict[energy_param].pars).all(): + plot_dict[full_object_dict[energy_param].cal_energy_param] = ( + full_object_dict[energy_param].fill_plot_dict(data).copy() + ) log.info(f"Finished all calibrations") return hit_dict, results_dict, plot_dict, full_object_dict @@ -1308,32 +1435,36 @@ def partition_energy_cal_th( n_events: int = None, final_cut_field: str = "is_valid_cal", simplex: bool = True, + tail_weight: int = 20, ) -> tuple(dict, dict, dict, dict): - data = load_data( files, lh5_path, hit_dict, - params = energy_params + [final_cut_field] + ["timestamp"] + params=energy_params + [final_cut_field] + ["timestamp"], ) results_dict = {} plot_dict = {} full_object_dict = {} for energy_param in energy_params: - ecal = high_stats_fitting( + full_object_dict[energy_param] = high_stats_fitting( energy_param, f"{final_cut_field}&is_not_pulser", threshold, p_val, plot_options, simplex, + tail_weight, + ) + full_object_dict[energy_param].fit_peaks(data) + results_dict[energy_param] = full_object_dict[energy_param].get_results_dict( + data ) - ecal.fit_peaks(data) - results_dict[energy_param] = ecal.get_results_dict(data) - full_object_dict[energy_param] = ecal if ecal.results: - plot_dict[energy_param] = ecal.fill_plot_dict(data) + plot_dict[energy_param] = ( + full_object_dict[energy_param].fill_plot_dict(data).copy() + ) log.info(f"Finished all calibrations") - return results_dict, plot_dict, full_object_dict \ No newline at end of file + return results_dict, plot_dict, full_object_dict diff --git a/src/pygama/pargen/energy_cal.py b/src/pygama/pargen/energy_cal.py index 8a54f3bfd..293d8f30d 100644 --- a/src/pygama/pargen/energy_cal.py +++ b/src/pygama/pargen/energy_cal.py @@ -264,7 +264,9 @@ def get_hpge_E_peak_par_guess(hist, bins, var, func, mode_guess): or func == pgf.extended_gauss_step_pdf ): # get mu and height from a gauss fit, also sigma as fallback - pars, cov = pgf.gauss_mode_width_max(hist, bins, var, mode_guess=mode_guess, n_bins=10) + pars, cov = pgf.gauss_mode_width_max( + hist, bins, var, mode_guess=mode_guess, n_bins=10 + ) bin_centres = pgh.get_bin_centers(bins) if pars is None: log.info("get_hpge_E_peak_par_guess: gauss_mode_width_max failed") @@ -273,7 +275,7 @@ def get_hpge_E_peak_par_guess(hist, bins, var, func, mode_guess): height = hist[i_0] sigma_guess = None else: - mu = pars[0] + mu = mode_guess sigma_guess = pars[1] height = pars[2] @@ -319,9 +321,9 @@ def get_hpge_E_peak_par_guess(hist, bins, var, func, mode_guess): hstep = step / (bg + np.mean(hist[:10])) parguess = [n_sig, mu, sigma / 2, n_bkg, hstep, bins[0], bins[-1], 0] - for i, guess in enumerate(parguess): + for i, guess in enumerate(parguess): if np.isnan(guess): - parguess[i]=0 + parguess[i] = 0 return parguess @@ -331,7 +333,9 @@ def get_hpge_E_peak_par_guess(hist, bins, var, func, mode_guess): or func == pgf.extended_radford_pdf ): # guess mu, height - pars, cov = pgf.gauss_mode_width_max(hist, bins, var, mode_guess=mode_guess, n_bins=10) + pars, cov = pgf.gauss_mode_width_max( + hist, bins, var, mode_guess=mode_guess, n_bins=10 + ) bin_centres = pgh.get_bin_centers(bins) if pars is None: log.info("get_hpge_E_peak_par_guess: gauss_mode_width_max failed") @@ -393,9 +397,9 @@ def get_hpge_E_peak_par_guess(hist, bins, var, func, mode_guess): parguess = [n_sig, mu, sigma, htail, tau, n_bkg, hstep, bins[0], bins[-1], 0] - for i, guess in enumerate(parguess): + for i, guess in enumerate(parguess): if np.isnan(guess): - parguess[i]=0 + parguess[i] = 0 return parguess @@ -472,48 +476,65 @@ def get_hpge_E_bounds(func, parguess): log.error(f"get_hpge_E_bounds not implemented for {func.__name__}") return [] + class tail_prior: """ Generic least-squares cost function with error. """ - verbose=0 + + verbose = 0 errordef = Minuit.LIKELIHOOD # for Minuit to compute errors correctly - def __init__(self, data, model): + def __init__(self, data, model, tail_weight=100): self.model = model # model predicts y for given x - self.data=data - #self.x = np.asarray(x) - + self.data = data + self.tail_weight = tail_weight + def _call(self, *pars): - return self.__call__( *pars[0]) + return self.__call__(*pars[0]) + + def __call__( + self, + n_sig, + mu, + sigma, + htail, + tau, + n_bkg, + hstep, + lower_range, + upper_range, + components, + ): + return self.tail_weight * np.log(htail + 0.1) # len(self.data)/ - def __call__(self, n_sig, mu, sigma, htail, - tau, n_bkg, hstep, - lower_range ,upper_range, components): - return 100 * np.log(htail+0.1) #len(self.data)/ -def staged_fit(energies, hist, bins, var, func_i, gof_func_i, simplex, mode_guess): +def staged_fit( + energies, hist, bins, var, func_i, gof_func_i, simplex, mode_guess, tail_weight=100 +): par_guesses = get_hpge_E_peak_par_guess(hist, bins, var, func_i, mode_guess) bounds = get_hpge_E_bounds(func_i, par_guesses) - fixed, mask = get_hpge_E_fixed(func_i) - + fixed, mask = get_hpge_E_fixed(func_i) + if func_i == pgf.extended_radford_pdf or func_i == pgf.radford_pdf: - cost_func = cost.ExtendedUnbinnedNLL(energies, func_i) +tail_prior(energies, func_i) + cost_func = cost.ExtendedUnbinnedNLL(energies, func_i) + tail_prior( + energies, func_i, tail_weight=tail_weight + ) m = Minuit(cost_func, *par_guesses) m.limits = bounds for fix in fixed: m.fixed[fix] = True - + m.values["htail"] = 0 m.values["tau"] = 0 - m.fixed["htail"] = True - m.fixed["tau"] = True + m.fixed["htail"] = True + m.fixed["tau"] = True if simplex == True: m.simplex().migrad() else: m.migrad() try: - #set htail to guess + # set htail to guess m.values["htail"] = par_guesses[3] m.values["tau"] = par_guesses[4] m.fixed = False @@ -534,16 +555,22 @@ def staged_fit(energies, hist, bins, var, func_i, gof_func_i, simplex, mode_gues except: func_i = pgf.extended_gauss_step_pdf gof_func_i = pgf.gauss_step_pdf - pars_i, errs_i, cov_i, func_i, gof_func_i, mask, valid_fit = staged_fit(energies, hist, bins, var, - func_i, gof_func_i, simplex, mode_guess) - - #check htail - if m.values["htail"]<0.01 or m.values["htail"]<2*m.errors["htail"] or np.isnan(m.values).any():# or + pars_i, errs_i, cov_i, func_i, gof_func_i, mask, valid_fit = staged_fit( + energies, hist, bins, var, func_i, gof_func_i, simplex, mode_guess + ) + + # check htail + if ( + m.values["htail"] < 0.01 + or m.values["htail"] < 2 * m.errors["htail"] + or np.isnan(m.values).any() + ): # or func_i = pgf.extended_gauss_step_pdf gof_func_i = pgf.gauss_step_pdf - pars_i, errs_i, cov_i, func_i, gof_func_i, mask, valid_fit = staged_fit(energies, hist, bins, var, - func_i, gof_func_i, simplex, mode_guess) - + pars_i, errs_i, cov_i, func_i, gof_func_i, mask, valid_fit = staged_fit( + energies, hist, bins, var, func_i, gof_func_i, simplex, mode_guess + ) + else: cost_func = cost.ExtendedUnbinnedNLL(energies, func_i) m = Minuit(cost_func, *par_guesses) @@ -553,8 +580,8 @@ def staged_fit(energies, hist, bins, var, func_i, gof_func_i, simplex, mode_gues if simplex == True: m.simplex().migrad() else: - m.migrad() - + m.migrad() + m.hesse() pars_i = m.values @@ -565,6 +592,7 @@ def staged_fit(energies, hist, bins, var, func_i, gof_func_i, simplex, mode_gues return pars_i, errs_i, cov_i, func_i, gof_func_i, mask, valid_fit + def hpge_fit_E_peaks( E_uncal, mode_guesses, @@ -577,6 +605,7 @@ def hpge_fit_E_peaks( allowed_p_val=0.05, uncal_is_int=False, simplex=False, + tail_weight=100, ): """Fit the Energy peaks specified using the function given @@ -615,14 +644,14 @@ def hpge_fit_E_peaks( ranges: list of array a list of [Euc_min, Euc_max] used for each peak fit """ - pars = np.zeros(len(mode_guesses), dtype='object') - errors = np.zeros(len(mode_guesses), dtype='object') - covs = np.zeros(len(mode_guesses), dtype='object') + pars = np.zeros(len(mode_guesses), dtype="object") + errors = np.zeros(len(mode_guesses), dtype="object") + covs = np.zeros(len(mode_guesses), dtype="object") binws = np.zeros(len(mode_guesses)) - ranges = np.zeros(len(mode_guesses), dtype='object') + ranges = np.zeros(len(mode_guesses), dtype="object") p_vals = np.zeros(len(mode_guesses)) - valid_pks = np.zeros(len(mode_guesses),dtype=bool) - out_funcs= np.zeros(len(mode_guesses), dtype='object') + valid_pks = np.zeros(len(mode_guesses), dtype=bool) + out_funcs = np.zeros(len(mode_guesses), dtype="object") for i_peak, mode_guess in enumerate(mode_guesses): # get args for this peak @@ -642,7 +671,7 @@ def hpge_fit_E_peaks( # bin a histogram Euc_min = mode_guesses[i_peak] - wleft_i Euc_max = mode_guesses[i_peak] + wright_i - if uncal_is_int ==True: + if uncal_is_int == True: Euc_min, Euc_max, n_bins_i = pgh.better_int_binning( x_lo=Euc_min, x_hi=Euc_max, n_bins=n_bins_i ) @@ -653,10 +682,28 @@ def hpge_fit_E_peaks( energies, bins=n_bins_i, range=(Euc_min, Euc_max) ) if func_i == pgf.extended_radford_pdf or pgf.extended_gauss_step_pdf: - pars_i, errs_i, cov_i, func_i, gof_func_i, mask, valid_fit = staged_fit(energies, hist, bins, var, - func_i, gof_func_i, simplex, mode_guess) + ( + pars_i, + errs_i, + cov_i, + func_i, + gof_func_i, + mask, + valid_fit, + ) = staged_fit( + energies, + hist, + bins, + var, + func_i, + gof_func_i, + simplex, + mode_guess, + tail_weight=tail_weight, + ) + if pars_i["n_sig"] < 20: + valid_fit = False else: - par_guesses = get_hpge_E_peak_par_guess(hist, bins, var, func_i) bounds = get_hpge_E_bounds(func_i, par_guesses) fixed, mask = get_hpge_E_fixed(func_i) @@ -678,7 +725,13 @@ def hpge_fit_E_peaks( valid_fit = m.valid csqr = pgf.goodness_of_fit( - hist, bins, None, gof_func_i, pars_i, method="Pearson", scale_bins=True + hist, + bins, + None, + gof_func_i, + pars_i, + method="Pearson", + scale_bins=True, ) else: @@ -700,25 +753,35 @@ def hpge_fit_E_peaks( simplex=simplex, bounds=bounds, ) - valid_fit=True + valid_fit = True csqr = pgf.goodness_of_fit( - hist, bins, None, gof_func_i, pars_i, method="Pearson", scale_bins=False + hist, + bins, + None, + gof_func_i, + pars_i, + method="Pearson", + scale_bins=False, ) - + if np.isnan(pars_i).any(): log.debug( f"hpge_fit_E_peaks: fit failed for i_peak={i_peak} at loc {mode_guesses[i_peak]:g}, par is nan : {pars_i}" ) - raise RuntimeError + raise RuntimeError - p_val = scipy.stats.chi2.sf(csqr[0], csqr[1]+ len(np.where(mask)[0])) + p_val = scipy.stats.chi2.sf(csqr[0], csqr[1] + len(np.where(mask)[0])) total_events = pgf.get_total_events_func(func_i, pars_i, errors=errs_i) if ( - sum(sum(c) if c is not None else 0 for c in cov_i[mask,:][:,mask]) == np.inf - or sum(sum(c) if c is not None else 0 for c in cov_i[mask,:][:,mask]) == 0 - or np.isnan(sum(sum(c) if c is not None else 0 for c in cov_i[mask,:][:,mask])) + sum(sum(c) if c is not None else 0 for c in cov_i[mask, :][:, mask]) + == np.inf + or sum(sum(c) if c is not None else 0 for c in cov_i[mask, :][:, mask]) + == 0 + or np.isnan( + sum(sum(c) if c is not None else 0 for c in cov_i[mask, :][:, mask]) + ) ): log.debug( f"hpge_fit_E_peaks: cov estimation failed for i_peak={i_peak} at loc {mode_guesses[i_peak]:g}" @@ -732,8 +795,9 @@ def hpge_fit_E_peaks( ) valid_pks[i_peak] = False - elif ((np.abs(np.array(errs_i)[mask] / np.array(pars_i)[mask]) < 1e-7).any() - or np.isnan(np.array(errs_i)[mask]).any()): + elif ( + np.abs(np.array(errs_i)[mask] / np.array(pars_i)[mask]) < 1e-7 + ).any() or np.isnan(np.array(errs_i)[mask]).any(): log.debug( f"hpge_fit_E_peaks: failed for i_peak={i_peak} at loc {mode_guesses[i_peak]:g}, parameter error too low" ) @@ -758,33 +822,24 @@ def hpge_fit_E_peaks( except: log.debug( - f"hpge_fit_E_peaks: fit failed for i_peak={i_peak}, unknown error" - ) + f"hpge_fit_E_peaks: fit failed for i_peak={i_peak}, unknown error" + ) valid_pks[i_peak] = False - pars_i, errs_i, cov_i = return_nans(func_i)#None, None, None, None + pars_i, errs_i, cov_i = return_nans(func_i) # None, None, None, None p_val = 0 # get binning binw_1 = (bins[-1] - bins[0]) / (len(bins) - 1) - pars[i_peak] = pars_i + pars[i_peak] = pars_i errors[i_peak] = errs_i covs[i_peak] = cov_i - binws[i_peak] =binw_1 - ranges[i_peak] =[Euc_min, Euc_max] - p_vals[i_peak] =p_val - out_funcs[i_peak] =func_i - - return ( - pars, - errors, - covs, - binws, - ranges, - p_vals, - valid_pks, - out_funcs - ) + binws[i_peak] = binw_1 + ranges[i_peak] = [Euc_min, Euc_max] + p_vals[i_peak] = p_val + out_funcs[i_peak] = func_i + + return (pars, errors, covs, binws, ranges, p_vals, valid_pks, out_funcs) def hpge_fit_E_scale(mus, mu_vars, Es_keV, deg=0): @@ -1011,7 +1066,7 @@ def hpge_E_calibration( range_keV = [range_keV[i] for i in idx] funcs = [funcs[i] for i in idx] gof_funcs = [gof_funcs[i] for i in idx] - + # Drop peaks to not be fitted tmp = zip( *[ @@ -1071,7 +1126,7 @@ def hpge_E_calibration( pk_ranges, pk_pvals, valid_pks, - pk_funcs + pk_funcs, ) = hpge_fit_E_peaks( E_uncal, got_peaks_locs, diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index 3bcdb5e0a..b82b39afc 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -1038,12 +1038,21 @@ def event_selection( e_upper_lim = peak_loc + (1.5 * kev_width[1]) / rough_adc_to_kev e_ranges = (int(peak_loc - e_lower_lim), int(e_upper_lim - peak_loc)) - params, errors, covs, bins, ranges, p_val, valid_pks, pk_funcs = pgc.hpge_fit_E_peaks( + ( + params, + errors, + covs, + bins, + ranges, + p_val, + valid_pks, + pk_funcs, + ) = pgc.hpge_fit_E_peaks( energy, [peak_loc], [e_ranges], n_bins=(np.nanmax(energy) - np.nanmin(energy)) // 1, - uncal_is_int=True + uncal_is_int=True, ) if params[0] is None or np.isnan(params[0]).any(): log.debug("Fit failed, using max guess") diff --git a/src/pygama/pargen/utils.py b/src/pygama/pargen/utils.py index 86d1d94ba..e39dc255e 100644 --- a/src/pygama/pargen/utils.py +++ b/src/pygama/pargen/utils.py @@ -1,16 +1,18 @@ from __future__ import annotations -import numpy as np -import pandas as pd import logging -from iminuit import Minuit, cost, util from types import FunctionType import lgdo.lh5_store as lh5 +import numpy as np +import pandas as pd +from iminuit import Minuit, cost, util + import pygama.pargen.cuts as cts log = logging.getLogger(__name__) + def return_nans(input): if isinstance(input, FunctionType): args = input.__code__.co_varnames[: input.__code__.co_argcount][1:] @@ -23,6 +25,7 @@ def return_nans(input): m = Minuit(c, *[np.nan for arg in args]) return m.values, m.errors, np.full((len(m.values), len(m.values)), np.nan) + def tag_pulser(files, lh5_path): pulser_df = lh5.load_dfs(files, ["timestamp", "trapTmax"], lh5_path) pulser_props = cts.find_pulser_properties(pulser_df, energy="trapTmax") @@ -43,6 +46,7 @@ def tag_pulser(files, lh5_path): log.debug(f"no pulser found") return ids + def get_params(file_params, param_list): out_params = [] if isinstance(file_params, dict): @@ -54,17 +58,15 @@ def get_params(file_params, param_list): if key in param: out_params.append(key) return np.unique(out_params).tolist() - + def load_data( files: list, lh5_path: str, cal_dict: dict, - params = [ - "cuspEmax" - ], - cal_energy_param: str="cuspEmax_ctc_cal", - threshold = None + params=["cuspEmax"], + cal_energy_param: str = "cuspEmax_ctc_cal", + threshold=None, ) -> tuple(np.array, np.array, np.array, np.array): """ Loads in the A/E parameters needed and applies calibration constants to energy @@ -75,7 +77,7 @@ def load_data( if isinstance(files, dict): df = [] all_files = [] - masks=np.array([],dtype=bool) + masks = np.array([], dtype=bool) for tstamp, tfiles in files.items(): table = sto.read_object(lh5_path, tfiles)[0] if tstamp in cal_dict: @@ -85,12 +87,12 @@ def load_data( file_df["timestamp"] = np.full(len(file_df), tstamp, dtype=object) params.append("timestamp") if threshold is not None: - mask = file_df[cal_energy_param]threshold + masks = df[cal_energy_param] > threshold df.drop(np.where(~masks)[0], inplace=True) else: - masks = np.ones(len(df),dtype=bool) + masks = np.ones(len(df), dtype=bool) all_files = files - if lh5_path[-1] != "/": lh5_path+='/' + if lh5_path[-1] != "/": + lh5_path += "/" keys = lh5.ls(all_files[0], lh5_path) keys = [key.split("/")[-1] for key in keys] - params = get_params(keys+list(df.keys()), params) + params = get_params(keys + list(df.keys()), params) ids = tag_pulser(all_files, lh5_path) df["is_not_pulser"] = ids[masks] @@ -124,4 +127,4 @@ def load_data( if param not in df: df[param] = lh5.load_nda(all_files, [param], lh5_path)[param][masks] log.debug(f"data loaded") - return df \ No newline at end of file + return df From 0c6ddbe0d6178efda2fa205b3f9bac1c7943e6ef Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 3 Oct 2023 17:26:58 +0200 Subject: [PATCH 010/191] bugfix for partitiona ecal --- src/pygama/pargen/ecal_th.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py index acc8ed77c..9613a054d 100644 --- a/src/pygama/pargen/ecal_th.py +++ b/src/pygama/pargen/ecal_th.py @@ -1461,7 +1461,7 @@ def partition_energy_cal_th( results_dict[energy_param] = full_object_dict[energy_param].get_results_dict( data ) - if ecal.results: + if results_dict[energy_param].results: plot_dict[energy_param] = ( full_object_dict[energy_param].fill_plot_dict(data).copy() ) From 64a45a11ec651d8727e6ecb4e39671b53da1101e Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 3 Oct 2023 17:34:29 +0200 Subject: [PATCH 011/191] fixed pars to params to eres fit --- src/pygama/pargen/ecal_th.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py index 9613a054d..ffbf37633 100644 --- a/src/pygama/pargen/ecal_th.py +++ b/src/pygama/pargen/ecal_th.py @@ -264,7 +264,7 @@ def fit_energy_res(self): "p_val": p_val, } - log.info(f'FWHM linear fit: {self.fwhm_fit_linear["pars"].to_dict()}') + log.info(f'FWHM linear fit: {self.fwhm_fit_linear["parameters"].to_dict()}') log.info(f"FWHM fit values:") log.info(f"\t Energy | FWHM (keV) | Predicted (keV)") for i, (peak, fwhm, fwhme) in enumerate( From a71479da58176b010d615d4b8c0b5c2d211a6d42 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 3 Oct 2023 17:54:16 +0200 Subject: [PATCH 012/191] partition ecal naming fix --- src/pygama/pargen/ecal_th.py | 26 ++++++++++---------------- src/pygama/pargen/energy_cal.py | 2 ++ 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py index ffbf37633..7aa72260c 100644 --- a/src/pygama/pargen/ecal_th.py +++ b/src/pygama/pargen/ecal_th.py @@ -172,6 +172,7 @@ def __init__( simplex: bool = True, deg: int = 1, cal_energy_param: str = None, + tail_weight=100, ): self.energy_param = energy_param if cal_energy_param is None: @@ -186,6 +187,7 @@ def __init__( self.deg = deg self.plot_options = plot_options self.simplex = simplex + self.tail_weight = tail_weight self.output_dict = {} self.hit_dict = {} @@ -474,6 +476,7 @@ def calibrate_parameter(self, data): n_events=self.n_events, allowed_p_val=self.p_val, simplex=self.simplex, + tail_weight=self.tail_weight, verbose=False, ) pk_pars = self.results["pk_pars"] @@ -494,19 +497,7 @@ def calibrate_parameter(self, data): found_peaks = np.array([]) fitted_peaks = np.array([]) fitted_funcs = np.array([]) - - if ( - len(fitted_peaks) != len(self.glines) - or self.gof_funcs[-1] == pgf.gauss_step_pdf - ): - if self.glines[-1] in fitted_peaks: - if fitted_funcs[-1] == pgf.extended_gauss_step_pdf: - self.funcs = [pgf.extended_gauss_step_pdf for entry in self.glines] - self.gof_funcs = [pgf.gauss_step_pdf for entry in self.glines] - - for i, peak in enumerate(self.glines): - if peak not in fitted_peaks: - kev_ranges[i] = (kev_ranges[i][0] - 5, kev_ranges[i][1] - 5) + if len(fitted_peaks) != len(self.glines): for i, peak in enumerate(self.glines): if peak not in fitted_peaks: kev_ranges[i] = (kev_ranges[i][0] - 5, kev_ranges[i][1] - 5) @@ -518,13 +509,12 @@ def calibrate_parameter(self, data): > 0.05 ): index = np.where(self.glines == peak)[0][0] - kev_ranges[i] = ( + kev_ranges[index] = ( kev_ranges[index][0] - 5, kev_ranges[index][1] - 5, ) except: pass - try: self.pars, self.cov, self.results = cal.hpge_E_calibration( data.query(self.selection_string)[self.energy_param], @@ -537,6 +527,7 @@ def calibrate_parameter(self, data): n_events=self.n_events, allowed_p_val=self.p_val, simplex=self.simplex, + tail_weight=self.tail_weight, verbose=False, ) fitted_peaks = self.results["fitted_keV"] @@ -934,6 +925,7 @@ def plot_fits( (fval - count) / count if count != 0 else (fval - count) for count, fval in zip(counts, fit_vals) ], + where="mid", ) plt.annotate( @@ -1384,6 +1376,7 @@ def energy_cal_th( final_cut_field: str = "is_valid_cal", simplex: bool = True, guess_keV: float | None = None, + tail_weight=100, deg: int = 1, ) -> tuple(dict, dict, dict, dict): data = load_data( @@ -1409,6 +1402,7 @@ def energy_cal_th( n_events, simplex, deg, + tail_weight=tail_weight, ) full_object_dict[energy_param].calibrate_parameter(data) results_dict[ @@ -1461,7 +1455,7 @@ def partition_energy_cal_th( results_dict[energy_param] = full_object_dict[energy_param].get_results_dict( data ) - if results_dict[energy_param].results: + if full_object_dict[energy_param].results: plot_dict[energy_param] = ( full_object_dict[energy_param].fill_plot_dict(data).copy() ) diff --git a/src/pygama/pargen/energy_cal.py b/src/pygama/pargen/energy_cal.py index 293d8f30d..e7fdb21d8 100644 --- a/src/pygama/pargen/energy_cal.py +++ b/src/pygama/pargen/energy_cal.py @@ -929,6 +929,7 @@ def hpge_E_calibration( n_events=None, simplex=False, allowed_p_val=0.05, + tail_weight=100, verbose=True, ): """Calibrate HPGe data to a set of known peaks @@ -1139,6 +1140,7 @@ def hpge_E_calibration( uncal_is_int=False, simplex=simplex, allowed_p_val=allowed_p_val, + tail_weight=tail_weight, ) results["pk_pars"] = pk_pars results["pk_errors"] = pk_errors From 9cd6853649ceed0852075e38ce6de3bfa3bfa9e2 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 9 Oct 2023 13:08:57 +0200 Subject: [PATCH 013/191] updated units in fwhm to convention --- src/pygama/pargen/AoE_cal.py | 71 ++++++++++++++++++------------------ src/pygama/pargen/ecal_th.py | 16 ++++---- 2 files changed, 44 insertions(+), 43 deletions(-) diff --git a/src/pygama/pargen/AoE_cal.py b/src/pygama/pargen/AoE_cal.py index 2125f80c3..55fec4ee3 100644 --- a/src/pygama/pargen/AoE_cal.py +++ b/src/pygama/pargen/AoE_cal.py @@ -1547,8 +1547,8 @@ def drift_time_correction( m.hesse() self.dt_res_dict["dt_fit"] = { - "parameters": m.values, - "uncertainties": m.errors, + "pars": m.values, + "errs": m.errors, "object": m, } aoe_grp1 = self.dt_res_dict[ @@ -1801,8 +1801,8 @@ def AoEcorrection(self, data: pd.DataFrame, aoe_param: str, display: int = 0): "func": self.mean_func.__name__, "module": self.mean_func.__module__, "expression": self.mean_func.string_func("x"), - "parameters": mu_pars.to_dict(), - "uncertainties": mu_errs.to_dict(), + "pars": mu_pars.to_dict(), + "errs": mu_errs.to_dict(), "p_val_mu": p_val_mu, "csqr_mu": (csqr_mu, dof_mu), } @@ -1811,8 +1811,8 @@ def AoEcorrection(self, data: pd.DataFrame, aoe_param: str, display: int = 0): "func": self.sigma_func.__name__, "module": self.sigma_func.__module__, "expression": self.sigma_func.string_func("x"), - "parameters": sig_pars.to_dict(), - "uncertainties": sig_errs.to_dict(), + "pars": sig_pars.to_dict(), + "errs": sig_errs.to_dict(), "p_val_mu": p_val_sig, "csqr_mu": (csqr_sig, dof_sig), } @@ -1820,8 +1820,8 @@ def AoEcorrection(self, data: pd.DataFrame, aoe_param: str, display: int = 0): self.energy_corr_res_dict["dep_fit"] = { "func": self.pdf.__name__, "module": self.pdf.__module__, - "parameters": dep_pars.to_dict(), - "uncertainties": dep_err.to_dict(), + "pars": dep_pars.to_dict(), + "errs": dep_err.to_dict(), } self.update_cal_dicts( @@ -1831,7 +1831,7 @@ def AoEcorrection(self, data: pd.DataFrame, aoe_param: str, display: int = 0): "parameters": mu_pars.to_dict(), }, "AoE_Classifier": { - "expression": f"AoE_Corrected/({self.sigma_func.string_func(self.cal_energy_param)})", + "expression": f"(AoE_Corrected-1)/({self.sigma_func.string_func(self.cal_energy_param)})", "parameters": sig_pars.to_dict(), }, } @@ -1918,8 +1918,8 @@ def get_aoe_cut_fit( p = sigmoid_fit.func(xs, *m1.values) self.cut_fit = { "function": sigmoid_fit.__name__, - "parameters": m1.values.to_dict(), - "uncertainties": m1.errors.to_dict(), + "pars": m1.values.to_dict(), + "errs": m1.errors.to_dict(), } self.low_cut_val = round(xs[np.argmin(np.abs(p - (100 * self.dep_acc)))], 3) log.info(f"Cut found at {self.low_cut_val}") @@ -2238,12 +2238,12 @@ def drifttime_corr_plot( final_df = dep_events.query(aoe_class.dt_res_dict["final_selection"]) plt.subplot(2, 2, 1) - aoe_pars = aoe_class.dt_res_dict["aoe_fit1"]["parameters"] + aoe_pars = aoe_class.dt_res_dict["aoe_fit1"]["pars"] xs = np.linspace(aoe_pars["lower_range"], aoe_pars["upper_range"], 100) counts, aoe_bins, bars = plt.hist( final_df.query( - f'{aoe_class.dt_res_dict["aoe_grp1"]}&{aoe_param}<{aoe_pars["upper_range"]}&{aoe_param}>{aoe_pars["lower_range"]}' + f'{aoe_class.dt_res_dict["aoe_grp1"]}&({aoe_param}<{aoe_pars["upper_range"]})&({aoe_param}>{aoe_pars["lower_range"]})' )[aoe_param], bins=400, histtype="step", @@ -2258,12 +2258,12 @@ def drifttime_corr_plot( plt.xlabel("A/E") plt.ylabel("counts") - aoe_pars2 = aoe_class.dt_res_dict["aoe_fit2"]["parameters"] + aoe_pars2 = aoe_class.dt_res_dict["aoe_fit2"]["pars"] plt.subplot(2, 2, 2) xs = np.linspace(aoe_pars2["lower_range"], aoe_pars2["upper_range"], 100) counts, aoe_bins2, bars = plt.hist( final_df.query( - f'{aoe_class.dt_res_dict["aoe_grp2"]}&{aoe_param}<{aoe_pars2["upper_range"]}&{aoe_param}>{aoe_pars2["lower_range"]}' + f'{aoe_class.dt_res_dict["aoe_grp2"]}&({aoe_param}<{aoe_pars2["upper_range"]})&({aoe_param}>{aoe_pars2["lower_range"]})' )[aoe_param], bins=400, histtype="step", @@ -2300,8 +2300,7 @@ def drifttime_corr_plot( plt.plot( pgh.get_bin_centers(bins), drift_time_distribution.pdf( - pgh.get_bin_centers(bins), - *aoe_class.dt_res_dict["dt_fit"]["parameters"], + pgh.get_bin_centers(bins), *aoe_class.dt_res_dict["dt_fit"]["pars"] ) * np.diff(bins)[0], label="fit", @@ -2447,17 +2446,17 @@ def plot_mean_fit(aoe_class, data, figsize=[12, 8], fontsize=12) -> plt.figure: ) ax1.plot( - aoe_class.energy_corr_fits.index, + aoe_class.energy_corr_fits.index.to_numpy(), aoe_class.mean_func.func( - aoe_class.energy_corr_fits.index, - **aoe_class.energy_corr_res_dict["mean_fits"]["parameters"], + aoe_class.energy_corr_fits.index.to_numpy(), + **aoe_class.energy_corr_res_dict["mean_fits"]["pars"], ), label="linear model", ) ax1.errorbar( 1592, - aoe_class.energy_corr_res_dict["dep_fit"]["parameters"]["mu"], - yerr=aoe_class.energy_corr_res_dict["dep_fit"]["uncertainties"]["mu"], + aoe_class.energy_corr_res_dict["dep_fit"]["pars"]["mu"], + yerr=aoe_class.energy_corr_res_dict["dep_fit"]["errs"]["mu"], label="DEP", color="green", linestyle=" ", @@ -2473,12 +2472,12 @@ def plot_mean_fit(aoe_class, data, figsize=[12, 8], fontsize=12) -> plt.figure: aoe_class.energy_corr_fits["mean"] - aoe_class.mean_func.func( aoe_class.energy_corr_fits.index, - **aoe_class.energy_corr_res_dict["mean_fits"]["parameters"], + **aoe_class.energy_corr_res_dict["mean_fits"]["pars"], ) ) / aoe_class.mean_func.func( aoe_class.energy_corr_fits.index, - **aoe_class.energy_corr_res_dict["mean_fits"]["parameters"], + **aoe_class.energy_corr_res_dict["mean_fits"]["pars"], ), lw=1, c="b", @@ -2487,13 +2486,13 @@ def plot_mean_fit(aoe_class, data, figsize=[12, 8], fontsize=12) -> plt.figure: 1592, 100 * ( - aoe_class.energy_corr_res_dict["dep_fit"]["parameters"]["mu"] + aoe_class.energy_corr_res_dict["dep_fit"]["pars"]["mu"] - aoe_class.mean_func.func( - 1592, **aoe_class.energy_corr_res_dict["mean_fits"]["parameters"] + 1592, **aoe_class.energy_corr_res_dict["mean_fits"]["pars"] ) ) / aoe_class.mean_func.func( - 1592, **aoe_class.energy_corr_res_dict["mean_fits"]["parameters"] + 1592, **aoe_class.energy_corr_res_dict["mean_fits"]["pars"] ), lw=1, c="g", @@ -2521,7 +2520,7 @@ def plot_sigma_fit(aoe_class, data, figsize=[12, 8], fontsize=12) -> plt.figure: label="data", linestyle=" ", ) - sig_pars = aoe_class.energy_corr_res_dict["sigma_fits"]["parameters"] + sig_pars = aoe_class.energy_corr_res_dict["sigma_fits"]["pars"] if aoe_class.sigma_func == sigma_fit: label = f'sqrt model: \nsqrt({sig_pars["a"]:1.4f}+({sig_pars["b"]:1.1f}/E)^{sig_pars["c"]:1.1f})' elif aoe_class.sigma_func == sigma_fit_quadratic: @@ -2529,14 +2528,16 @@ def plot_sigma_fit(aoe_class, data, figsize=[12, 8], fontsize=12) -> plt.figure: else: raise ValueError("unknown sigma function") ax1.plot( - aoe_class.energy_corr_fits.index, - aoe_class.sigma_func.func(aoe_class.energy_corr_fits.index, **sig_pars), + aoe_class.energy_corr_fits.index.to_numpy(), + aoe_class.sigma_func.func( + aoe_class.energy_corr_fits.index.to_numpy(), **sig_pars + ), label=label, ) ax1.errorbar( 1592, - aoe_class.energy_corr_res_dict["dep_fit"]["parameters"]["sigma"], - yerr=aoe_class.energy_corr_res_dict["dep_fit"]["uncertainies"]["sigma"], + aoe_class.energy_corr_res_dict["dep_fit"]["pars"]["sigma"], + yerr=aoe_class.energy_corr_res_dict["dep_fit"]["errs"]["sigma"], label="DEP", color="green", linestyle=" ", @@ -2560,7 +2561,7 @@ def plot_sigma_fit(aoe_class, data, figsize=[12, 8], fontsize=12) -> plt.figure: 1592, 100 * ( - aoe_class.energy_corr_res_dict["dep_fit"]["parameters"]["sigma"] + aoe_class.energy_corr_res_dict["dep_fit"]["pars"]["sigma"] - aoe_class.sigma_func.func(1592, **sig_pars) ) / aoe_class.sigma_func.func(1592, **sig_pars), @@ -2589,9 +2590,9 @@ def plot_cut_fit(aoe_class, data, figsize=[12, 8], fontsize=12) -> plt.figure: ) plt.plot( - aoe_class.cut_fits.index, + aoe_class.cut_fits.index.to_numpy(), sigmoid_fit.func( - aoe_class.cut_fits.index.to_numpy(), **aoe_class.cut_fit["parameters"] + aoe_class.cut_fits.index.to_numpy(), **aoe_class.cut_fit["pars"] ), ) plt.hlines( diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py index 7aa72260c..138d9076f 100644 --- a/src/pygama/pargen/ecal_th.py +++ b/src/pygama/pargen/ecal_th.py @@ -257,8 +257,8 @@ def fit_energy_res(self): "function": fwhm_linear.__name__, "module": fwhm_linear.__module__, "expression": fwhm_linear.string_func("x"), - "Qbb_fwhm(keV)": fit_qbb, - "Qbb_fwhm_err(keV)": qbb_err, + "Qbb_fwhm_in_keV": fit_qbb, + "Qbb_fwhm_err_in_keV": qbb_err, "parameters": m_lin.values, "uncertainties": m_lin.errors, "cov": m_lin.covariance, @@ -291,8 +291,8 @@ def fit_energy_res(self): "function": fwhm_linear.__name__, "module": fwhm_linear.__module__, "expression": fwhm_linear.string_func("x"), - "Qbb_fwhm(keV)": np.nan, - "Qbb_fwhm_err(keV)": np.nan, + "Qbb_fwhm_in_keV": np.nan, + "Qbb_fwhm_err_in_keV": np.nan, "parameters": pars, "uncertainties": errs, "cov": cov, @@ -337,8 +337,8 @@ def fit_energy_res(self): "function": fwhm_quadratic.__name__, "module": fwhm_quadratic.__module__, "expression": fwhm_quadratic.string_func("x"), - "Qbb_fwhm(keV)": fit_qbb, - "Qbb_fwhm_err(keV)": qbb_err, + "Qbb_fwhm_in_keV": fit_qbb, + "Qbb_fwhm_err_in_keV": qbb_err, "parameters": m_quad.values, "uncertainties": m_quad.errors, "cov": m_quad.covariance, @@ -358,8 +358,8 @@ def fit_energy_res(self): "function": fwhm_quadratic.__name__, "module": fwhm_quadratic.__module__, "expression": fwhm_quadratic.string_func("x"), - "Qbb_fwhm(keV)": np.nan, - "Qbb_fwhm_err(keV)": np.nan, + "Qbb_fwhm_in_keV": np.nan, + "Qbb_fwhm_err_in_keV": np.nan, "parameters": pars, "uncertainties": errs, "cov": cov, From 5a623db57dfc6dcaa412bbeb01226436c72a4049 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 12 Oct 2023 22:48:59 +0200 Subject: [PATCH 014/191] corrected units to _in_keV --- src/pygama/pargen/ecal_th.py | 16 +++++++-------- src/pygama/pargen/utils.py | 39 ++++++++++++++++++++++++++++++++---- 2 files changed, 43 insertions(+), 12 deletions(-) diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py index 138d9076f..e3f71d800 100644 --- a/src/pygama/pargen/ecal_th.py +++ b/src/pygama/pargen/ecal_th.py @@ -1230,8 +1230,8 @@ def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsiz ), np.nanmax( [ - ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"], - ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"], + ecal_class.fwhm_fit_linear["Qbb_fwhm_in_keV"], + ecal_class.fwhm_fit_quadratic["Qbb_fwhm_in_keV"], ] ), ] @@ -1242,7 +1242,7 @@ def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsiz fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"]), lw=1, c="g", - label=f'linear, Qbb fwhm: {ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"]:1.2f} +- {ecal_class.fwhm_fit_linear["Qbb_fwhm_err(keV)"]:1.2f} keV', + label=f'linear, Qbb fwhm: {ecal_class.fwhm_fit_linear["Qbb_fwhm_in_keV"]:1.2f} +- {ecal_class.fwhm_fit_linear["Qbb_fwhm_err_in_keV"]:1.2f} keV', ) ax1.plot( fwhm_slope_bins, @@ -1251,13 +1251,13 @@ def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsiz ), lw=1, c="b", - label=f'quadratic, Qbb fwhm: {ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"]:1.2f} +- {ecal_class.fwhm_fit_quadratic["Qbb_fwhm_err(keV)"]:1.2f} keV', + label=f'quadratic, Qbb fwhm: {ecal_class.fwhm_fit_quadratic["Qbb_fwhm_in_keV"]:1.2f} +- {ecal_class.fwhm_fit_quadratic["Qbb_fwhm_err_in_keV"]:1.2f} keV', ) ax1.plot( qbb_line_hx, [ - ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"], - ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"], + ecal_class.fwhm_fit_linear["Qbb_fwhm_in_keV"], + ecal_class.fwhm_fit_linear["Qbb_fwhm_in_keV"], ], lw=1, c="r", @@ -1266,8 +1266,8 @@ def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsiz ax1.plot( qbb_line_hx, [ - ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"], - ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"], + ecal_class.fwhm_fit_quadratic["Qbb_fwhm_in_keV"], + ecal_class.fwhm_fit_quadratic["Qbb_fwhm_in_keV"], ], lw=1, c="r", diff --git a/src/pygama/pargen/utils.py b/src/pygama/pargen/utils.py index e39dc255e..de9994523 100644 --- a/src/pygama/pargen/utils.py +++ b/src/pygama/pargen/utils.py @@ -114,10 +114,6 @@ def load_data( keys = [key.split("/")[-1] for key in keys] params = get_params(keys + list(df.keys()), params) - ids = tag_pulser(all_files, lh5_path) - df["is_not_pulser"] = ids[masks] - params.append("is_not_pulser") - for col in list(df.keys()): if col not in params: df.drop(col, inplace=True, axis=1) @@ -128,3 +124,38 @@ def load_data( df[param] = lh5.load_nda(all_files, [param], lh5_path)[param][masks] log.debug(f"data loaded") return df + + +def get_pulser_ids(tcm_file, channel, multiplicity_threshold): + if isinstance(channel, str): + if channel[:2] == "ch": + channel = int(channel[2:]) + else: + chan = int(channel) + else: + chan = channel + if isinstance(tcm_file, list): + mask = np.array([], dtype=bool) + for file in tcm_file: + _, file_mask = get_pulser_ids(file, chan, multiplicity_threshold) + mask = np.append(mask, file_mask) + ids = np.where(mask)[0] + else: + data = lh5.load_dfs(tcm_file, ["array_id", "array_idx"], "hardware_tcm_1") + cum_length = lh5.load_nda(tcm_file, ["cumulative_length"], "hardware_tcm_1")[ + "cumulative_length" + ] + cum_length = np.append(np.array([0]), cum_length) + n_channels = np.diff(cum_length) + evt_numbers = np.repeat(np.arange(0, len(cum_length) - 1), np.diff(cum_length)) + evt_mult = np.repeat(np.diff(cum_length), np.diff(cum_length)) + data["evt_number"] = evt_numbers + data["evt_mult"] = evt_mult + high_mult_events = np.where(n_channels > multiplicity_threshold)[0] + + ids = data.query(f"array_id=={channel} and evt_number in @high_mult_events")[ + "array_idx" + ].to_numpy() + mask = np.zeros(len(data.query(f"array_id==1104000")), dtype="bool") + mask[ids] = True + return ids, mask From 193fac5a97bfa9a2e3fcb9f6ec54552be0b7edf9 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 31 Oct 2023 12:54:32 +0100 Subject: [PATCH 015/191] moved aoe_calibration function to dataflow --- src/pygama/pargen/AoE_cal.py | 74 +----------------------------------- 1 file changed, 1 insertion(+), 73 deletions(-) diff --git a/src/pygama/pargen/AoE_cal.py b/src/pygama/pargen/AoE_cal.py index 55fec4ee3..0a5f7544b 100644 --- a/src/pygama/pargen/AoE_cal.py +++ b/src/pygama/pargen/AoE_cal.py @@ -1297,7 +1297,7 @@ def __init__( cal_energy_param: str, eres_func: callable, pdf=standard_aoe, - selection_string: str = "is_valid_cal&is_not_pulser", + selection_string: str = "", dt_corr: bool = False, dep_acc: float = 0.9, dep_correct: bool = False, @@ -2814,75 +2814,3 @@ def plot_classifier( plt.ylim(yrange) plt.close() return fig - - -def aoe_calibration( - files, - lh5_path: str, - cal_dicts: dict, - current_param: str, - energy_param: str, - cal_energy_param: str, - eres_func: Callable, - pdf: Callable = standard_aoe, - cut_field: str = "is_valid_cal", - dt_corr: bool = False, - dep_correct: bool = False, - dt_cut: dict = None, - high_cut_val: int = 3, - mean_func: Callable = pol1, - sigma_func: Callable = sigma_fit, - dep_acc: float = 0.9, - dt_param: str = "dt_eff", - comptBands_width: int = 20, - plot_options: dict = {}, - threshold: int = 800, -): - params = [ - current_param, - "tp_0_est", - "tp_99", - dt_param, - energy_param, - cal_energy_param, - cut_field, - ] - - aoe = cal_aoe( - cal_dicts, - cal_energy_param, - eres_func, - pdf, - f"{cut_field}&is_not_pulser", - dt_corr, - dep_acc, - dep_correct, - dt_cut, - dt_param, - high_cut_val, - mean_func, - sigma_func, - comptBands_width, - plot_options, - ) - if dt_cut is not None: - params.append(dt_cut["out_param"]) - - data = load_data( - files, lh5_path, aoe.cal_dicts, params, cal_energy_param, threshold - ) - - data["AoE_Uncorr"] = np.divide(data[current_param], data[energy_param]) - - aoe.update_cal_dicts( - { - "AoE_Uncorr": { - "expression": f"{current_param}/{energy_param}", - "parameters": {}, - } - } - ) - - aoe.calibrate(data, "AoE_Uncorr") - log.info(f"Calibrated A/E") - return cal_dicts, aoe.get_results_dict(), aoe.fill_plot_dict(data), aoe From a7ceeab864b12d39bc0921f1f5545347c6290338 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 31 Oct 2023 12:58:23 +0100 Subject: [PATCH 016/191] moved top level funcs to dataflow added pulser field to plot arguments --- src/pygama/pargen/ecal_th.py | 147 +++++++++-------------------------- 1 file changed, 35 insertions(+), 112 deletions(-) diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py index e3f71d800..d048e6b24 100644 --- a/src/pygama/pargen/ecal_th.py +++ b/src/pygama/pargen/ecal_th.py @@ -48,9 +48,10 @@ def apply_cuts( hit_dict, cut_parameters=None, final_cut_field: str = "is_valid_cal", + pulser_field="is_pulser", ): if cut_parameters is not None: - cut_dict = cts.generate_cuts(data.query("is_not_pulser"), cut_parameters) + cut_dict = cts.generate_cuts(data.query(f"(~{pulser_field})"), cut_parameters) hit_dict.update( cts.cut_dict_to_hit_dict(cut_dict, final_cut_field=final_cut_field) ) @@ -61,7 +62,7 @@ def apply_cuts( else: data[final_cut_field] = np.ones(len(data), dtype=bool) - events_pqc = len(data.query(f"{final_cut_field}&is_not_pulser")) + events_pqc = len(data.query(f"{final_cut_field}&(~{pulser_field})")) log.debug(f"{events_pqc} events valid for calibration") return data, hit_dict @@ -1003,7 +1004,14 @@ def plot_2614_timemap( def plot_pulser_timemap( - ecal_class, data, figsize=[12, 8], fontsize=12, dx=0.2, time_dx=180, n_spread=3 + ecal_class, + data, + pulser_field="is_pulser", + figsize=[12, 8], + fontsize=12, + dx=0.2, + time_dx=180, + n_spread=3, ): plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize @@ -1014,7 +1022,7 @@ def plot_pulser_timemap( time_dx, ) - selection = data.query(f"~is_not_pulser") + selection = data.query(pulser_field) fig = plt.figure() if len(selection) == 0: pass @@ -1047,8 +1055,8 @@ def plot_pulser_timemap( return fig -def bin_pulser_stability(ecal_class, data, time_slice=180): - selection = data.query(f"~is_not_pulser") +def bin_pulser_stability(ecal_class, data, pulser_field="is_pulser", time_slice=180): + selection = data.query(pulser_field) utime_array = data["timestamp"] select_energies = selection[ecal_class.cal_energy_param].to_numpy() @@ -1332,7 +1340,14 @@ def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsiz return fig -def bin_spectrum(ecal_class, data, erange=[0, 3000], dx=2): +def bin_spectrum( + ecal_class, + data, + cut_field="is_valid_cal", + pulser_field="is_pulser", + erange=[0, 3000], + dx=2, +): bins = np.arange(erange[0], erange[1] + dx, dx) return { "bins": pgh.get_bin_centers(bins), @@ -1340,125 +1355,33 @@ def bin_spectrum(ecal_class, data, erange=[0, 3000], dx=2): data.query(ecal_class.selection_string)[ecal_class.cal_energy_param], bins )[0], "cut_counts": np.histogram( - data.query("~is_valid_cal&is_not_pulser")[ecal_class.cal_energy_param], + data.querydata.query(f"(~{cut_field})&(~{pulser_field})")[ + ecal_class.cal_energy_param + ], bins, )[0], "pulser_counts": np.histogram( - data.query("~is_not_pulser")[ecal_class.cal_energy_param], + data.query(pulser_field)[ecal_class.cal_energy_param], bins, )[0], } -def bin_survival_fraction(ecal_class, data, erange=[0, 3000], dx=6): +def bin_survival_fraction( + ecal_class, + data, + cut_field="is_valid_cal", + pulser_field="is_pulser", + erange=[0, 3000], + dx=6, +): counts_pass, bins_pass, _ = pgh.get_hist( data.query(ecal_class.selection_string)[ecal_class.cal_energy_param], bins=np.arange(erange[0], erange[1] + dx, dx), ) counts_fail, bins_fail, _ = pgh.get_hist( - data.query("~is_valid_cal&is_not_pulser")[ecal_class.cal_energy_param], + data.query(f"(~{cut_field})&(~{pulser_field})")[ecal_class.cal_energy_param], bins=np.arange(erange[0], erange[1] + dx, dx), ) sf = 100 * (counts_pass + 10 ** (-6)) / (counts_pass + counts_fail + 10 ** (-6)) return {"bins": pgh.get_bin_centers(bins_pass), "sf": sf} - - -def energy_cal_th( - files: list[str], - energy_params: list[str], - lh5_path: str = "dsp", - hit_dict: dict = {}, - cut_parameters: dict[str, int] = {"bl_mean": 4, "bl_std": 4, "pz_std": 4}, - plot_options: dict = None, - threshold: int = 0, - p_val: float = 0, - n_events: int = None, - final_cut_field: str = "is_valid_cal", - simplex: bool = True, - guess_keV: float | None = None, - tail_weight=100, - deg: int = 1, -) -> tuple(dict, dict, dict, dict): - data = load_data( - files, - lh5_path, - hit_dict, - params=energy_params + list(cut_parameters) + ["timestamp"], - ) - - data, hit_dict = apply_cuts(data, hit_dict, cut_parameters, final_cut_field) - - results_dict = {} - plot_dict = {} - full_object_dict = {} - for energy_param in energy_params: - full_object_dict[energy_param] = calibrate_parameter( - energy_param, - f"{final_cut_field}&is_not_pulser", - plot_options, - guess_keV, - threshold, - p_val, - n_events, - simplex, - deg, - tail_weight=tail_weight, - ) - full_object_dict[energy_param].calibrate_parameter(data) - results_dict[ - full_object_dict[energy_param].cal_energy_param - ] = full_object_dict[energy_param].get_results_dict(data) - hit_dict.update(full_object_dict[energy_param].hit_dict) - if ~np.isnan(full_object_dict[energy_param].pars).all(): - plot_dict[full_object_dict[energy_param].cal_energy_param] = ( - full_object_dict[energy_param].fill_plot_dict(data).copy() - ) - - log.info(f"Finished all calibrations") - return hit_dict, results_dict, plot_dict, full_object_dict - - -def partition_energy_cal_th( - files: list[str], - energy_params: list[str], - lh5_path: str = "dsp", - hit_dict: dict = {}, - plot_options: dict = None, - threshold: int = 0, - p_val: float = 0, - n_events: int = None, - final_cut_field: str = "is_valid_cal", - simplex: bool = True, - tail_weight: int = 20, -) -> tuple(dict, dict, dict, dict): - data = load_data( - files, - lh5_path, - hit_dict, - params=energy_params + [final_cut_field] + ["timestamp"], - ) - - results_dict = {} - plot_dict = {} - full_object_dict = {} - for energy_param in energy_params: - full_object_dict[energy_param] = high_stats_fitting( - energy_param, - f"{final_cut_field}&is_not_pulser", - threshold, - p_val, - plot_options, - simplex, - tail_weight, - ) - full_object_dict[energy_param].fit_peaks(data) - results_dict[energy_param] = full_object_dict[energy_param].get_results_dict( - data - ) - if full_object_dict[energy_param].results: - plot_dict[energy_param] = ( - full_object_dict[energy_param].fill_plot_dict(data).copy() - ) - - log.info(f"Finished all calibrations") - return results_dict, plot_dict, full_object_dict From 118381736c11441b1e4b40b35ff1e20d9258c940 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 31 Oct 2023 13:05:20 +0100 Subject: [PATCH 017/191] added option to pass pulser mask to event selection if not calculate itself --- src/pygama/pargen/energy_optimisation.py | 36 +++++++++++++----------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index b82b39afc..b5d59278b 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -915,6 +915,7 @@ def event_selection( peak_idxs, kev_widths, cut_parameters={"bl_mean": 4, "bl_std": 4, "pz_std": 4}, + pulser_mask=None, energy_parameter="trapTmax", wf_field: str = "waveform", n_events=10000, @@ -928,23 +929,26 @@ def event_selection( sto = lh5.LH5Store() df = lh5.load_dfs(raw_files, ["daqenergy", "timestamp"], lh5_path) - pulser_props = cts.find_pulser_properties(df, energy="daqenergy") - if len(pulser_props) > 0: - final_mask = None - for entry in pulser_props: - e_cut = (df.daqenergy.values < entry[0] + entry[1]) & ( - df.daqenergy.values > entry[0] - entry[1] - ) - if final_mask is None: - final_mask = e_cut - else: - final_mask = final_mask | e_cut - ids = final_mask - log.debug(f"pulser found: {pulser_props}") + if pulser_mask is None: + pulser_props = cts.find_pulser_properties(df, energy="daqenergy") + if len(pulser_props) > 0: + final_mask = None + for entry in pulser_props: + e_cut = (df.daqenergy.values < entry[0] + entry[1]) & ( + df.daqenergy.values > entry[0] - entry[1] + ) + if final_mask is None: + final_mask = e_cut + else: + final_mask = final_mask | e_cut + ids = final_mask + log.debug(f"pulser found: {pulser_props}") + else: + log.debug("no_pulser") + ids = np.zeros(len(df.daqenergy.values), dtype=bool) + # Get events around peak using raw file values else: - log.debug("no_pulser") - ids = np.zeros(len(df.daqenergy.values), dtype=bool) - # Get events around peak using raw file values + ids = pulser_mask initial_mask = (df.daqenergy.values > threshold) & (~ids) rough_energy = df.daqenergy.values[initial_mask] initial_idxs = np.where(initial_mask)[0] From 91e7b601bc94165140459daa24bba6c988ea73c6 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 31 Oct 2023 13:08:17 +0100 Subject: [PATCH 018/191] added pulser mask to load data, modified to have the data loading external to main function --- src/pygama/pargen/extract_tau.py | 40 +++++++++++++++++--------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/src/pygama/pargen/extract_tau.py b/src/pygama/pargen/extract_tau.py index 2c096161e..11f76fa81 100644 --- a/src/pygama/pargen/extract_tau.py +++ b/src/pygama/pargen/extract_tau.py @@ -31,6 +31,7 @@ def load_data( raw_file: list[str], lh5_path: str, + pulser_mask=None, n_events: int = 10000, threshold: int = 5000, wf_field: str = "waveform", @@ -38,24 +39,27 @@ def load_data( sto = lh5.LH5Store() df = lh5.load_dfs(raw_file, ["daqenergy", "timestamp"], lh5_path) - pulser_props = cts.find_pulser_properties(df, energy="daqenergy") - if len(pulser_props) > 0: - final_mask = None - for entry in pulser_props: - e_cut = (df.daqenergy.values < entry[0] + entry[1]) & ( - df.daqenergy.values > entry[0] - entry[1] - ) - if final_mask is None: - final_mask = e_cut - else: - final_mask = final_mask | e_cut - ids = ~(final_mask) - log.debug(f"pulser found: {pulser_props}") + if pulser_mask is None: + pulser_props = cts.find_pulser_properties(df, energy="daqenergy") + if len(pulser_props) > 0: + final_mask = None + for entry in pulser_props: + e_cut = (df.daqenergy.values < entry[0] + entry[1]) & ( + df.daqenergy.values > entry[0] - entry[1] + ) + if final_mask is None: + final_mask = e_cut + else: + final_mask = final_mask | e_cut + ids = final_mask + log.debug(f"pulser found: {pulser_props}") + else: + log.debug("no_pulser") + ids = np.zeros(len(df.daqenergy.values), dtype=bool) else: - log.debug("no_pulser") - ids = np.ones(len(df.daqenergy.values), dtype=bool) + ids = pulser_mask - cuts = np.where((df.daqenergy.values > threshold) & (ids))[0] + cuts = np.where((df.daqenergy.values > threshold) & (~ids))[0] waveforms = sto.read_object( f"{lh5_path}/{wf_field}", raw_file, idx=cuts, n_rows=n_events @@ -216,9 +220,8 @@ def get_dpz_consts(grid_out, opt_dict): def dsp_preprocess_decay_const( - raw_files: list[str], + tb_data, dsp_config: dict, - lh5_path: str, double_pz: bool = False, display: int = 0, opt_dict: dict = None, @@ -245,7 +248,6 @@ def dsp_preprocess_decay_const( tau_dict : dict """ - tb_data = load_data(raw_files, lh5_path, wf_field=wf_field, threshold=threshold) tb_out = opt.run_one_dsp(tb_data, dsp_config) log.debug("Processed Data") cut_dict = cts.generate_cuts(tb_out, parameters=cut_parameters) From d3753aee6a58f98f92afbf226551d055d7856a1f Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 31 Oct 2023 13:19:26 +0100 Subject: [PATCH 019/191] switched fit escale and ecal to iminuit and add errors as outputs --- src/pygama/pargen/energy_cal.py | 46 +++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/src/pygama/pargen/energy_cal.py b/src/pygama/pargen/energy_cal.py index e7fdb21d8..a82d13286 100644 --- a/src/pygama/pargen/energy_cal.py +++ b/src/pygama/pargen/energy_cal.py @@ -564,7 +564,7 @@ def staged_fit( m.values["htail"] < 0.01 or m.values["htail"] < 2 * m.errors["htail"] or np.isnan(m.values).any() - ): # or + ): # switch to stat test func_i = pgf.extended_gauss_step_pdf gof_func_i = pgf.gauss_step_pdf pars_i, errs_i, cov_i, func_i, gof_func_i, mask, valid_fit = staged_fit( @@ -787,7 +787,6 @@ def hpge_fit_E_peaks( f"hpge_fit_E_peaks: cov estimation failed for i_peak={i_peak} at loc {mode_guesses[i_peak]:g}" ) valid_pks[i_peak] = False - # pars_i, errs_i, cov_i, p_val = None, None, None, None elif valid_fit == False: log.debug( @@ -802,21 +801,18 @@ def hpge_fit_E_peaks( f"hpge_fit_E_peaks: failed for i_peak={i_peak} at loc {mode_guesses[i_peak]:g}, parameter error too low" ) valid_pks[i_peak] = False - # pars_i, errs_i, cov_i, p_val = None, None, None, None elif np.abs(total_events[0] - np.sum(hist)) / np.sum(hist) > 0.1: log.debug( f"hpge_fit_E_peaks: fit failed for i_peak={i_peak} at loc {mode_guesses[i_peak]:g}, total_events is outside limit" ) valid_pks[i_peak] = False - # pars_i, errs_i, cov_i, p_val = None, None, None, None elif p_val < allowed_p_val or np.isnan(p_val): log.debug( f"hpge_fit_E_peaks: fit failed for i_peak={i_peak}, p-value too low: {p_val}" ) valid_pks[i_peak] = False - # pars_i, errs_i, cov_i, p_val = None, None, None, None else: valid_pks[i_peak] = True @@ -825,7 +821,7 @@ def hpge_fit_E_peaks( f"hpge_fit_E_peaks: fit failed for i_peak={i_peak}, unknown error" ) valid_pks[i_peak] = False - pars_i, errs_i, cov_i = return_nans(func_i) # None, None, None, None + pars_i, errs_i, cov_i = return_nans(func_i) p_val = 0 # get binning @@ -869,9 +865,20 @@ def hpge_fit_E_scale(mus, mu_vars, Es_keV, deg=0): scale, scale_cov = pgu.fit_simple_scaling(Es_keV, mus, var=mu_vars) pars = np.array([scale, 0]) cov = np.array([[scale_cov, 0], [0, 0]]) + errs = np.diag(np.sqrt(cov)) else: - pars, cov = np.polyfit(Es_keV, mus, deg=deg, w=1 / np.sqrt(mu_vars), cov=True) - return pars, cov + poly_pars = np.polyfit(Es_keV, mus, deg=deg, w=1 / np.sqrt(mu_vars)) + c = cost.LeastSquares( + Es_keV, mus, np.sqrt(mu_vars), lambda x, *pars: pgf.poly(x, pars) + ) + m = Minuit(c, *poly_pars) + m.simplex() + m.migrad() + m.hesse() + pars = m.values + cov = m.covariance + errs = m.errors + return pars, errs, cov def hpge_fit_E_cal_func(mus, mu_vars, Es_keV, E_scale_pars, deg=0): @@ -889,7 +896,8 @@ def hpge_fit_E_cal_func(mus, mu_vars, Es_keV, E_scale_pars, deg=0): Es_keV : array energies to fit to, in keV E_scale_pars : array - ??? + Parameters from the escale fit (keV to ADC) used for calculating + uncertainties deg : int degree for energy scale fit. deg=0 corresponds to a simple scaling mu = scale * E. Otherwise deg follows the definition in np.polyfit @@ -906,13 +914,24 @@ def hpge_fit_E_cal_func(mus, mu_vars, Es_keV, E_scale_pars, deg=0): scale, scale_cov = pgu.fit_simple_scaling(mus, Es_keV, var=E_vars) pars = np.array([scale, 0]) cov = np.array([[scale_cov, 0], [0, 0]]) + errs = np.diag(np.sqrt(cov)) else: dmudEs = np.zeros(len(mus)) for n in range(len(E_scale_pars) - 1): dmudEs += E_scale_pars[n] * mus ** (len(E_scale_pars) - 2 - n) E_weights = dmudEs * mu_vars - pars, cov = np.polyfit(mus, Es_keV, deg=deg, w=1 / E_weights, cov=True) - return pars, cov + poly_pars = np.polyfit(mus, Es_keV, deg=deg, w=1 / E_weights) + c = cost.LeastSquares( + mus, Es_keV, E_weights, lambda x, *pars: pgf.poly(x, pars) + ) + m = Minuit(c, *poly_pars) + m.simplex() + m.migrad() + m.hesse() + pars = m.values + errs = m.errors + cov = m.covariance + return pars, errs, cov def hpge_E_calibration( @@ -1188,15 +1207,16 @@ def hpge_E_calibration( mu_vars = np.asarray(mu_vars) ** 2 try: - pars, cov = hpge_fit_E_scale(mus, mu_vars, fitted_peaks_keV, deg=deg) + pars, errs, cov = hpge_fit_E_scale(mus, mu_vars, fitted_peaks_keV, deg=deg) results["pk_cal_pars"] = pars + results["pk_cal_errs"] = errs results["pk_cal_cov"] = cov except ValueError: log.error("Failed to fit enough peaks to get accurate calibration") return None, None, results # Invert the E scale fit to get a calibration function - pars, cov = hpge_fit_E_cal_func(mus, mu_vars, fitted_peaks_keV, pars, deg=deg) + pars, errs, cov = hpge_fit_E_cal_func(mus, mu_vars, fitted_peaks_keV, pars, deg=deg) # Finally, calculate fwhms in keV uncal_fwhms = [ From f6fea9eddfde684567f0957e39ceff25179c71ab Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 31 Oct 2023 22:11:53 +0100 Subject: [PATCH 020/191] removed tag_pulser and cut import as had circular dependencies, renamed get_pulser_ids to get_tcm_pulser_ids, added ability to return mask for load_data --- src/pygama/pargen/utils.py | 39 ++++++++++---------------------------- 1 file changed, 10 insertions(+), 29 deletions(-) diff --git a/src/pygama/pargen/utils.py b/src/pygama/pargen/utils.py index de9994523..a33661fa5 100644 --- a/src/pygama/pargen/utils.py +++ b/src/pygama/pargen/utils.py @@ -8,8 +8,6 @@ import pandas as pd from iminuit import Minuit, cost, util -import pygama.pargen.cuts as cts - log = logging.getLogger(__name__) @@ -26,27 +24,6 @@ def return_nans(input): return m.values, m.errors, np.full((len(m.values), len(m.values)), np.nan) -def tag_pulser(files, lh5_path): - pulser_df = lh5.load_dfs(files, ["timestamp", "trapTmax"], lh5_path) - pulser_props = cts.find_pulser_properties(pulser_df, energy="trapTmax") - if len(pulser_props) > 0: - final_mask = None - for entry in pulser_props: - e_cut = (pulser_df.trapTmax.values < entry[0] + entry[1]) & ( - pulser_df.trapTmax.values > entry[0] - entry[1] - ) - if final_mask is None: - final_mask = e_cut - else: - final_mask = final_mask | e_cut - ids = ~(final_mask) - log.debug(f"pulser found: {pulser_props}") - else: - ids = np.ones(len(pulser_df), dtype=bool) - log.debug(f"no pulser found") - return ids - - def get_params(file_params, param_list): out_params = [] if isinstance(file_params, dict): @@ -67,6 +44,7 @@ def load_data( params=["cuspEmax"], cal_energy_param: str = "cuspEmax_ctc_cal", threshold=None, + return_selection_mask=False, ) -> tuple(np.array, np.array, np.array, np.array): """ Loads in the A/E parameters needed and applies calibration constants to energy @@ -84,8 +62,8 @@ def load_data( file_df = table.eval(cal_dict[tstamp]).get_dataframe() else: file_df = table.eval(cal_dict).get_dataframe() - file_df["timestamp"] = np.full(len(file_df), tstamp, dtype=object) - params.append("timestamp") + file_df["run_timestamp"] = np.full(len(file_df), tstamp, dtype=object) + params.append("run_timestamp") if threshold is not None: mask = file_df[cal_energy_param] < threshold @@ -123,13 +101,16 @@ def load_data( if param not in df: df[param] = lh5.load_nda(all_files, [param], lh5_path)[param][masks] log.debug(f"data loaded") - return df + if return_selection_mask: + return df, masks + else: + return df -def get_pulser_ids(tcm_file, channel, multiplicity_threshold): +def get_tcm_pulser_ids(tcm_file, channel, multiplicity_threshold): if isinstance(channel, str): if channel[:2] == "ch": - channel = int(channel[2:]) + chan = int(channel[2:]) else: chan = int(channel) else: @@ -137,7 +118,7 @@ def get_pulser_ids(tcm_file, channel, multiplicity_threshold): if isinstance(tcm_file, list): mask = np.array([], dtype=bool) for file in tcm_file: - _, file_mask = get_pulser_ids(file, chan, multiplicity_threshold) + _, file_mask = get_tcm_pulser_ids(file, chan, multiplicity_threshold) mask = np.append(mask, file_mask) ids = np.where(mask)[0] else: From 54af5356219125860520ee2427aec7a1a7f1d7d9 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 31 Oct 2023 22:12:50 +0100 Subject: [PATCH 021/191] added default arguments, changed timestamp to run_timestamp to differentiate from normal timestamp --- src/pygama/pargen/AoE_cal.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/pygama/pargen/AoE_cal.py b/src/pygama/pargen/AoE_cal.py index 0a5f7544b..4db0cfbaa 100644 --- a/src/pygama/pargen/AoE_cal.py +++ b/src/pygama/pargen/AoE_cal.py @@ -1293,9 +1293,9 @@ def compton_sf_sweep( class cal_aoe: def __init__( self, - cal_dicts: dict, - cal_energy_param: str, - eres_func: callable, + cal_dicts: dict = {}, + cal_energy_param: str = "cuspEmax_ctc_cal", + eres_func: callable = lambda x: 1, pdf=standard_aoe, selection_string: str = "", dt_corr: bool = False, @@ -1347,17 +1347,17 @@ def update_cal_dicts(self, update_dict): def aoe_timecorr(self, df, aoe_param, output_name="AoE_Timecorr", display=0): log.info("Starting A/E time correction") self.timecorr_df = pd.DataFrame( - columns=["timestamp", "mean", "mean_err", "res", "res_err"] + columns=["run_timestamp", "mean", "mean_err", "res", "res_err"] ) try: - if "timestamp" in df: - tstamps = sorted(np.unique(df["timestamp"])) + if "run_timestamp" in df: + tstamps = sorted(np.unique(df["run_timestamp"])) means = [] errors = [] reses = [] res_errs = [] final_tstamps = [] - for tstamp, time_df in df.groupby("timestamp", sort=True): + for tstamp, time_df in df.groupby("run_timestamp", sort=True): try: pars, errs, cov = unbinned_aoe_fit( time_df.query( @@ -1372,7 +1372,7 @@ def aoe_timecorr(self, df, aoe_param, output_name="AoE_Timecorr", display=0): pd.DataFrame( [ { - "timestamp": tstamp, + "run_timestamp": tstamp, "mean": pars["mu"], "mean_err": errs["mu"], "res": pars["sigma"] / pars["mu"], @@ -1393,7 +1393,7 @@ def aoe_timecorr(self, df, aoe_param, output_name="AoE_Timecorr", display=0): pd.DataFrame( [ { - "timestamp": tstamp, + "run_timestamp": tstamp, "mean": np.nan, "mean_err": np.nan, "res": np.nan, @@ -1403,7 +1403,7 @@ def aoe_timecorr(self, df, aoe_param, output_name="AoE_Timecorr", display=0): ), ] ) - self.timecorr_df.set_index("timestamp", inplace=True) + self.timecorr_df.set_index("run_timestamp", inplace=True) time_dict = fit_time_means( np.array(self.timecorr_df.index), np.array(self.timecorr_df["mean"]), @@ -1411,7 +1411,7 @@ def aoe_timecorr(self, df, aoe_param, output_name="AoE_Timecorr", display=0): ) df[output_name] = df[aoe_param] / np.array( - [time_dict[tstamp] for tstamp in df["timestamp"]] + [time_dict[tstamp] for tstamp in df["run_timestamp"]] ) self.update_cal_dicts( { From 978399008e24c7dce2301399fbc5da93330f6043 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 31 Oct 2023 22:14:41 +0100 Subject: [PATCH 022/191] cleaned up imports, removing * imports and removing unnecessary arguments --- src/pygama/pargen/ecal_th.py | 7 ++++--- src/pygama/pargen/energy_cal.py | 16 ++++++++-------- src/pygama/pargen/extract_tau.py | 1 - 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py index d048e6b24..7d21cd91d 100644 --- a/src/pygama/pargen/ecal_th.py +++ b/src/pygama/pargen/ecal_th.py @@ -20,6 +20,7 @@ import numpy as np import pandas as pd import scipy.stats +from iminuit import Minuit, cost from matplotlib.backends.backend_pdf import PdfPages from matplotlib.colors import LogNorm from scipy.optimize import curve_fit @@ -28,7 +29,7 @@ import pygama.math.peak_fitting as pgf import pygama.pargen.cuts as cts import pygama.pargen.energy_cal as cal -from pygama.pargen.utils import * +from pygama.pargen.utils import load_data, return_nans log = logging.getLogger(__name__) @@ -164,7 +165,7 @@ class calibrate_parameter: def __init__( self, energy_param, - selection_string="is_usable", + selection_string="", plot_options: dict = None, guess_keV: float | None = None, threshold: int = 0, @@ -1355,7 +1356,7 @@ def bin_spectrum( data.query(ecal_class.selection_string)[ecal_class.cal_energy_param], bins )[0], "cut_counts": np.histogram( - data.querydata.query(f"(~{cut_field})&(~{pulser_field})")[ + data.query(f"(~{cut_field})&(~{pulser_field})")[ ecal_class.cal_energy_param ], bins, diff --git a/src/pygama/pargen/energy_cal.py b/src/pygama/pargen/energy_cal.py index a82d13286..a55df8a92 100644 --- a/src/pygama/pargen/energy_cal.py +++ b/src/pygama/pargen/energy_cal.py @@ -18,7 +18,7 @@ import pygama.math.histogram as pgh import pygama.math.peak_fitting as pgf import pygama.math.utils as pgu -from pygama.pargen.utils import * +from pygama.pargen.utils import return_nans log = logging.getLogger(__name__) @@ -838,6 +838,10 @@ def hpge_fit_E_peaks( return (pars, errors, covs, binws, ranges, p_vals, valid_pks, out_funcs) +def poly_wrapper(x, *pars): + return pgf.poly(x, pars) + + def hpge_fit_E_scale(mus, mu_vars, Es_keV, deg=0): """Find best fit of poly(E) = mus +/- sqrt(mu_vars) Compare to hpge_fit_E_cal_func which fits for E = poly(mu) @@ -868,9 +872,7 @@ def hpge_fit_E_scale(mus, mu_vars, Es_keV, deg=0): errs = np.diag(np.sqrt(cov)) else: poly_pars = np.polyfit(Es_keV, mus, deg=deg, w=1 / np.sqrt(mu_vars)) - c = cost.LeastSquares( - Es_keV, mus, np.sqrt(mu_vars), lambda x, *pars: pgf.poly(x, pars) - ) + c = cost.LeastSquares(Es_keV, mus, np.sqrt(mu_vars), poly_wrapper) m = Minuit(c, *poly_pars) m.simplex() m.migrad() @@ -921,9 +923,7 @@ def hpge_fit_E_cal_func(mus, mu_vars, Es_keV, E_scale_pars, deg=0): dmudEs += E_scale_pars[n] * mus ** (len(E_scale_pars) - 2 - n) E_weights = dmudEs * mu_vars poly_pars = np.polyfit(mus, Es_keV, deg=deg, w=1 / E_weights) - c = cost.LeastSquares( - mus, Es_keV, E_weights, lambda x, *pars: pgf.poly(x, pars) - ) + c = cost.LeastSquares(mus, Es_keV, E_weights, poly_wrapper) m = Minuit(c, *poly_pars) m.simplex() m.migrad() @@ -1213,7 +1213,7 @@ def hpge_E_calibration( results["pk_cal_cov"] = cov except ValueError: log.error("Failed to fit enough peaks to get accurate calibration") - return None, None, results + return None, None, None, results # Invert the E scale fit to get a calibration function pars, errs, cov = hpge_fit_E_cal_func(mus, mu_vars, fitted_peaks_keV, pars, deg=deg) diff --git a/src/pygama/pargen/extract_tau.py b/src/pygama/pargen/extract_tau.py index 11f76fa81..72e357fd7 100644 --- a/src/pygama/pargen/extract_tau.py +++ b/src/pygama/pargen/extract_tau.py @@ -225,7 +225,6 @@ def dsp_preprocess_decay_const( double_pz: bool = False, display: int = 0, opt_dict: dict = None, - threshold: int = 5000, wf_field: str = "waveform", wf_plot: str = "wf_pz", norm_param: str = "pz_mean", From 894e60bc4e868ce7cab99eb0f6a869ed77a9e0c4 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 2 Nov 2023 15:06:42 +0100 Subject: [PATCH 023/191] bugfix for tcm pulser where channel was incorrectly hardcoded --- src/pygama/pargen/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pygama/pargen/utils.py b/src/pygama/pargen/utils.py index a33661fa5..e6c9f3c75 100644 --- a/src/pygama/pargen/utils.py +++ b/src/pygama/pargen/utils.py @@ -137,6 +137,6 @@ def get_tcm_pulser_ids(tcm_file, channel, multiplicity_threshold): ids = data.query(f"array_id=={channel} and evt_number in @high_mult_events")[ "array_idx" ].to_numpy() - mask = np.zeros(len(data.query(f"array_id==1104000")), dtype="bool") + mask = np.zeros(len(data.query(f"array_id=={channel}")), dtype="bool") mask[ids] = True return ids, mask From 62f8d3b87c46f65607e553f85dce6023655cdacc Mon Sep 17 00:00:00 2001 From: Jason Detwiler Date: Fri, 3 Nov 2023 02:53:33 -0700 Subject: [PATCH 024/191] Pre-compute `col_tiers` in `DataLoader.build_entry_list()` for speedup (#523) pre-compute col_tiers for speedup --- src/pygama/flow/data_loader.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/pygama/flow/data_loader.py b/src/pygama/flow/data_loader.py index ab4ed5150..7490271b0 100644 --- a/src/pygama/flow/data_loader.py +++ b/src/pygama/flow/data_loader.py @@ -505,6 +505,8 @@ def build_entry_list( # Find out which columns are needed for any cuts cut_cols = {} + # ... and pre-load which tiers need to be loaded to make the cuts + col_tiers_dict = {} for level in [child, parent]: cut_cols[level] = [] @@ -527,6 +529,9 @@ def build_entry_list( and save_output_columns ): for_output.append(term) + col_tiers_dict[level] = self.get_tiers_for_col( + cut_cols[level], merge_files=False + ) if save_output_columns: entry_cols += for_output @@ -611,7 +616,7 @@ def build_entry_list( if level in self.cuts.keys(): cut = self.cuts[level] - col_tiers = self.get_tiers_for_col(cut_cols[level], merge_files=False) + col_tiers = col_tiers_dict[level] # Tables in first tier of event should be the same for all tiers in one level tables = self.filedb.df.loc[file, f"{self.tiers[level][0]}_tables"] From e96bef5384550290305c1bb99df48f32d84482bf Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Mon, 6 Nov 2023 19:40:50 +0100 Subject: [PATCH 025/191] Increase legend-pydataobj lower version bound --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index bc4dcb75e..58bc7bbef 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,7 +36,7 @@ install_requires = h5py>=3.2 iminuit legend-daq2lh5>=1.0 - legend-pydataobj>=1.2 + legend-pydataobj>=1.3 matplotlib numba!=0.53.*,!=0.54.*,!=0.57 numpy>=1.21 From 15fae9893479a900bd9b3a001bb82caf484ff1e9 Mon Sep 17 00:00:00 2001 From: Erin Engelhardt <51338203+erin717@users.noreply.github.com> Date: Tue, 7 Nov 2023 10:32:44 -0500 Subject: [PATCH 026/191] Added routines for calibrating LQ parameter Defines a general class that contains the different functions used during the calibration process, in a similar manner to AoE_cal.py. A few plotting functions are redundant with those found in AoE_cal.py. We should consider generalizing some of these plotting functions in the future for other PSD parameters. --- src/pygama/pargen/lq_cal.py | 938 ++++++++++++++++++++++++++++++++++++ 1 file changed, 938 insertions(+) create mode 100644 src/pygama/pargen/lq_cal.py diff --git a/src/pygama/pargen/lq_cal.py b/src/pygama/pargen/lq_cal.py new file mode 100644 index 000000000..77d91b8a8 --- /dev/null +++ b/src/pygama/pargen/lq_cal.py @@ -0,0 +1,938 @@ +from __future__ import annotations + +import json +import logging +import os +import pathlib +import re +from datetime import datetime +from typing import Callable + +import matplotlib as mpl +mpl.use("agg") + +import matplotlib.cm as cmx +import matplotlib.colors as mcolors +import matplotlib.pyplot as plt +import matplotlib.dates as mdates +import numpy as np +import pandas as pd +from iminuit import Minuit, cost, util +from scipy.stats import linregress +from matplotlib.backends.backend_pdf import PdfPages +from matplotlib.colors import LogNorm + +import lgdo.lh5_store as lh5 +import pygama.math.histogram as pgh +import pygama.math.peak_fitting as pgf +import pygama.pargen.AoE_cal as aoe +from pygama.pargen.utils import * + +log = logging.getLogger(__name__) + + +def get_fit_range(lq: np.array) -> tuple(float, float): + + """ + Function for determining the fit range for a given distribution of lq values + """ + + # Get an initial guess of mu and sigma, use these values to determine our final fit range + left_guess = np.nanpercentile(lq, 1) + right_guess = np.nanpercentile(lq, 95) + test_range = (left_guess, right_guess) + + hist, bins, _ = pgh.get_hist(lq, bins = 100, range = test_range) + bin_centers = (bins[:-1] + bins[1:]) / 2 + mu = bin_centers[np.argmax(hist)] + _, sigma, _ = pgh.get_gaussian_guess(hist, bins) + + left_edge = mu - 2.5*sigma + right_edge = mu + 2.5*sigma + fit_range = (left_edge, right_edge) + + return fit_range + +def get_lq_hist( + df: pd.DataFrame(), + lq_param: str, + cal_energy_param: str, + peak: float, + sidebands: bool = True +): + + """ + Function for getting a distribution of LQ values for a given peak. Returns a histogram of the + LQ distribution as well as an array of bin edges + """ + + + if sidebands: + # Get a histogram of events in the peak using sideband subtraction + # Uses a 6 keV window, and the sideband is to the right of the peak + # Default option + + peak_window = (df[cal_energy_param] < peak + 3) & (df[cal_energy_param] > peak - 3) + sideband_window = (df[cal_energy_param] < peak + 18) & (df[cal_energy_param] > peak + 12) + + fit_range = get_fit_range(df[lq_param][peak_window]) + + sideband_hist, bins, _ = pgh.get_hist(df[lq_param][sideband_window], bins = 100, range = fit_range) + dep_hist, _, _ = pgh.get_hist(df[lq_param][peak_window], bins = 100, range = fit_range) + final_hist = dep_hist - sideband_hist + var = np.sqrt(np.add(sideband_hist, dep_hist)) + + return final_hist, bins, var + + else: + # Return a histogram in a 5 keV range surrounding the specified peak + # Only use if peak statistics are low + + peak_window = (df[cal_energy_param] < peak + 2.5) & (df[cal_energy_param] > peak - 2.5) + fit_range = get_fit_range(df[lq_param][peak_window]) + dep_hist, bins, var = pgh.get_hist(df[lq_param][peak_window], bins = 100, range = fit_range) + + return dep_hist, bins, var + +def binned_lq_fit( + df: pd.DataFrame, + lq_param: str, + cal_energy_param: str, + peak: float, + cdf = pgf.gauss_cdf, + sidebands: bool = True +): + + """ Function for fitting a distribution of LQ values within a specified + energy peak. Fits a gaussian to the distribution + + Parameters + ---------- + df: pd.DataFrame() + Dataframe containing the data for fitting. Data must + contain the desired lq parameter and the calibrated + energy + lq_param: string + Name of the LQ parameter to fit + cal_energy_param: string + Name of the calibrated energy parameter of choice + peak: float + Energy value, in keV, of the peak who's LQ + distribution will be fit + cdf: callable + Function to be used for the binned fit + sidebands: bool + Whether or not to perform a sideband subtraction when + fitting the LQ distribution + + Returns + ------- + m1.values: array-like object + Resulting parameter values from the peak fit + m1.errors: array-like object + Resulting parameter errors from the peak fit + hist: array + Histogram that was used for the binned fit + bins: array + Array of bin edges used for the binned fit + """ + + hist, bins, var = get_lq_hist(df, lq_param, cal_energy_param, peak, sidebands) + + # Temporary fix for negative bin counts + # TODO: Adjust fitting to handle negative bin counts + hist[hist < 0] = 0 + + bin_centers = (bins[:-1] + bins[1:]) / 2 + + mu = bin_centers[np.argmax(hist)] + _, sigma, _ = pgh.get_gaussian_guess(hist, bins) + + c1 = cost.BinnedNLL(hist, bins, pgf.gauss_cdf, verbose = 0) + m1 = Minuit(c1, mu, sigma) + m1.simplex().migrad() + m1.hesse() + + return m1.values, m1.errors, hist, bins + +def fit_time_means(tstamps, means, reses): + out_dict = {} + current_tstamps = [] + current_means = [] + current_reses = [] + + # Temporary fix + # TODO: Create better method of measuring time stability + rolling_mean = means[np.where(~np.isnan(means))[0][0]] + # rolling_mean = means[ + # np.where( + # (np.abs(np.diff(means)) < (0.4 * np.array(reses)[1:])) + # & (~np.isnan(np.abs(np.diff(means)) < (0.4 * np.array(reses)[1:]))) + # )[0][0] + # ] + for i, tstamp in enumerate(tstamps): + if ( + ( + (np.abs(means[i] - rolling_mean) > 0.4 * reses[i]) + and (np.abs(means[i] - rolling_mean) > rolling_mean * 0.5) + ) + or np.isnan(means[i]) + or np.isnan(reses[i]) + ): + if i + 1 == len(means): + out_dict[tstamp] = np.nan + else: + if (np.abs(means[i + 1] - means[i]) < 0.4 * reses[i + 1]) and not ( + np.isnan(means[i]) + or np.isnan(means[i + 1]) + or np.isnan(reses[i]) + or np.isnan(reses[i + 1]) + ): + for ts in current_tstamps: + out_dict[ts] = rolling_mean + rolling_mean = means[i] + current_means = [means[i]] + current_tstamps = [tstamp] + current_reses = [reses[i]] + else: + out_dict[tstamp] = np.nan + else: + current_tstamps.append(tstamp) + current_means.append(means[i]) + current_reses.append(reses[i]) + rolling_mean = np.average( + current_means, weights=1 / np.array(current_reses) + ) + for tstamp in current_tstamps: + out_dict[tstamp] = rolling_mean + return out_dict + + +class cal_lq: + + """ A class for calibrating the LQ parameter and determining the LQ cut value """ + + def __init__( + self, + cal_dicts: dict, + cal_energy_param: str, + eres_func: callable, + cdf: callable = pgf.gauss_cdf, + selection_string: str = 'is_valid_cal&is_not_pulser', + plot_options: dict = {}, + ): + + """ + Parameters + ---------- + cal_dicts: dict + A dictionary containing the hit-level operations to apply + to the data. + cal_energy_param: string + The calibrated energy parameter of choice + eres_function: callable + The energy resolutions function + cdf: callable + The CDF used for the binned fits + selection_string: string + A string of flags to apply the data when running the calibration + plot_options: dict + A dict containing the plot functions the user wants to run, + and any user options to provide those plot functions + """ + + self.cal_dicts = cal_dicts + self.cal_energy_param = cal_energy_param + self.eres_func = eres_func + self.cdf = cdf + self.selection_string = selection_string + self.plot_options = plot_options + + def update_cal_dicts(self, update_dict): + + if re.match(r"(\d{8})T(\d{6})Z", list(self.cal_dicts)[0]): + for tstamp in self.cal_dicts: + if tstamp in update_dict: + self.cal_dicts[tstamp].update(update_dict[tstamp]) + else: + self.cal_dicts[tstamp].update(update_dict) + else: + self.cal_dicts.update(update_dict) + + def lq_timecorr(self, df, lq_param, output_name="LQ_Timecorr", display=0): + + """ + Calculates the average LQ value for DEP events for each specified run + timestamp. Applies a time normalization based on the average LQ value + in the DEP accross all timestamps. + """ + + log.info("Starting LQ time correction") + self.timecorr_df = pd.DataFrame( + columns=["timestamp", "mean", "mean_err", "res", "res_err"] + ) + try: + if "timestamp" in df: + tstamps = sorted(np.unique(df["timestamp"])) + means = [] + errors = [] + reses = [] + res_errs = [] + final_tstamps = [] + for tstamp, time_df in df.groupby("timestamp", sort=True): + try: + pars, errs, _, _ = binned_lq_fit( + time_df.query(f"{self.selection_string}"), + lq_param, + self.cal_energy_param, + peak = 1592.5, + cdf=self.cdf, + sidebands = False, + ) + self.timecorr_df = pd.concat( + [ + self.timecorr_df, + pd.DataFrame( + [ + { + "timestamp": tstamp, + "mean": pars["mu"], + "mean_err": errs["mu"], + "res": pars["sigma"] / pars["mu"], + "res_err": (pars["sigma"] / pars["mu"]) + * np.sqrt( + errs["sigma"] / pars["sigma"] + + errs["mu"] / pars["mu"] + ), + } + ] + ), + ] + ) + except: + self.timecorr_df = pd.concat( + [ + self.timecorr_df, + pd.DataFrame( + [ + { + "timestamp": tstamp, + "mean": np.nan, + "mean_err": np.nan, + "res": np.nan, + "res_err": np.nan, + } + ] + ), + ] + ) + self.timecorr_df.set_index("timestamp", inplace=True) + time_dict = fit_time_means( + np.array(self.timecorr_df.index), + np.array(self.timecorr_df["mean"]), + np.array(self.timecorr_df["res"]), + ) + + df[output_name] = df[lq_param] / np.array( + [time_dict[tstamp] for tstamp in df["timestamp"]] + ) + self.update_cal_dicts( + { + tstamp: { + output_name: { + "expression": f"{lq_param}/a", + "parameters": {"a": t_dict}, + } + } + for tstamp, t_dict in time_dict.items() + } + ) + log.info("LQ time correction finished") + else: + try: + pars, errs, _, _ = binned_lq_fit( + df.query(f"{self.selection_string}"), + lq_param, + self.cal_energy_param, + peak = 1592.5, + cdf=self.cdf, + sidebands = False, + ) + self.timecorr_df = pd.concat( + [ + self.timecorr_df, + pd.DataFrame( + [ + { + "mean": pars["mu"], + "mean_err": errs["mu"], + "res": pars["sigma"] / pars["mu"], + "res_err": (pars["sigma"] / pars["mu"]) + * np.sqrt( + errs["sigma"] / pars["sigma"] + + errs["mu"] / pars["mu"] + ), + } + ] + ), + ] + ) + except: + self.timecorr_df = pd.concat( + [ + self.timecorr_df, + pd.DataFrame( + [ + { + "mean": np.nan, + "mean_err": np.nan, + "res": np.nan, + "res_err": np.nan, + } + ] + ), + ] + ) + df[output_name] = df[lq_param] / pars["mu"] + self.update_cal_dicts( + { + output_name: { + "expression": f"{lq_param}/a", + "parameters": {"a": pars["mu"]}, + } + } + ) + log.info("LQ time correction finished") + except: + log.error("LQ time correction failed") + self.update_cal_dicts( + { + output_name: { + "expression": f"{lq_param}/a", + "parameters": {"a": np.nan}, + } + } + ) + + def drift_time_correction(self, df: pd.DataFrame(), lq_param, cal_energy_param: str, display: int = 0): + + """ + Deterimines the drift time correction parameters for LQ by fitting a degree 1 polynomial to + the LQ vs drift time distribution for DEP events. Corrects for any linear dependence and + centers the final LQ distribution to a mean of 0. + """ + + log.info("Starting LQ drift time correction") + try: + + dt_dict={} + pars = binned_lq_fit(df, lq_param, self.cal_energy_param, peak = 1592.5)[0] + mean = pars[0] + sigma = pars[1] + + lq_mask = (df[lq_param] < (2*sigma + mean)) & (df[lq_param] > (mean - 2*sigma)) + dep_mask = (df[cal_energy_param] < 1595) & (df[cal_energy_param] > 1590) + + ids = np.isnan(df[lq_param]) | np.isnan(df['dt_eff']) + result = linregress(df['dt_eff'][~ids & dep_mask & lq_mask], df[lq_param][~ids & dep_mask & lq_mask], alternative = 'greater') + self.dt_fit_pars = result + + df['LQ_Classifier'] = df[lq_param] - df['dt_eff']*self.dt_fit_pars[0] - self.dt_fit_pars[1] + + except: + log.error("LQ drift time correction failed") + self.dt_fit_pars = (np.nan, np.nan) + + + self.update_cal_dicts( + { + 'LQ_Classifier': { + "expression": f"{lq_param} - dt_eff*a - b", + "parameters": {"a": self.dt_fit_pars[0], "b": self.dt_fit_pars[1]} + } + } + ) + + def get_cut_lq_dep(self, df: pd.DataFrame(), lq_param: str, cal_energy_param: str): + + """ + Determines the cut value for LQ. Value is calculated by fitting the LQ distribution + for events in the DEP to a gaussian. The cut value is set at 3*sigma of the fit. + Sideband subtraction is used to determine the LQ distribution for DEP events. + Events greater than the cut value fail the cut. + """ + + log.info("Starting LQ Cut calculation") + try: + pars, errs, hist, bins = binned_lq_fit(df, 'LQ_Classifier', cal_energy_param, peak = 1592.5) + cut_val = 3*pars[1] + + self.cut_fit_pars = pars + self.cut_fit_errs = errs + self.fit_hist = (hist, bins) + self.cut_val = cut_val + + df['LQ_Cut'] = df[lq_param] < self.cut_val + + except: + log.error("LQ cut determination failed") + self.cut_val = np.nan + + self.update_cal_dicts( + { + "LQ_Cut": { + "expression": f"({lq_param} < a)", + "parameters": {"a": self.cut_val}, + } + } + ) + + def get_results_dict(self): + return { + "cal_energy_param": self.cal_energy_param, + "rt_correction": self.dt_fit_pars, + "cdf": self.cdf.__name__, + "1590-1596keV": self.timecorr_df.to_dict("index"), + "cut_value": self.cut_val, + "sfs": self.low_side_sf.to_dict("index"), + } + + def fill_plot_dict(self, data, plot_dict={}): + for key, item in self.plot_options.items(): + if item["options"] is not None: + plot_dict[key] = item["function"](self, data, **item["options"]) + else: + plot_dict[key] = item["function"](self, data) + return plot_dict + + def calibrate(self, df, initial_lq_param): + + """ Run the LQ calibration and calculate the cut value """ + + self.lq_timecorr(df, lq_param = 'LQ_Ecorr') + log.info('Finished LQ Time Correction') + + self.drift_time_correction(df, lq_param = 'LQ_Timecorr', cal_energy_param = self.cal_energy_param) + log.info('Finished LQ Drift Time Correction') + + self.get_cut_lq_dep(df, lq_param = 'LQ_Classifier', cal_energy_param = self.cal_energy_param) + log.info('Finished Calculating the LQ Cut Value') + + final_lq_param = 'LQ_Classifier' + peaks_of_interest = [1592.5, 1620.5, 2039, 2103.53, 2614.50] + self.low_side_sf = pd.DataFrame(columns=["peak", "sf", "sf_err"]) + fit_widths = [(40, 25), (25, 40), (0, 0), (25, 40), (50, 50)] + self.low_side_peak_dfs = {} + + log.info("Calculating peak survival fractions") + for i, peak in enumerate(peaks_of_interest): + try: + select_df = df.query( + f"{self.selection_string}" + ) + fwhm = self.eres_func(peak) + if peak == 2039: + emin = 2 * fwhm + emax = 2 * fwhm + peak_df = select_df.query( + f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})" + ) + + cut_df, sf, sf_err = aoe.compton_sf_sweep( + peak_df[self.cal_energy_param].to_numpy(), + peak_df[final_lq_param].to_numpy(), + self.cut_val, + peak, + fwhm, + cut_range = (0, 0.6), + mode = 'less', + ) + self.low_side_sf = pd.concat( + [ + self.low_side_sf, + pd.DataFrame([{"peak": peak, "sf": sf, "sf_err": sf_err}]), + ] + ) + self.low_side_peak_dfs[peak] = cut_df + else: + emin, emax = fit_widths[i] + peak_df = select_df.query( + f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})" + ) + cut_df, sf, sf_err = aoe.get_sf_sweep( + peak_df[self.cal_energy_param].to_numpy(), + peak_df[final_lq_param].to_numpy(), + self.cut_val, + peak, + fwhm, + cut_range = (0, 0.6), + mode = 'less', + ) + self.low_side_sf = pd.concat( + [ + self.low_side_sf, + pd.DataFrame([{"peak": peak, "sf": sf, "sf_err": sf_err}]), + ] + ) + self.low_side_peak_dfs[peak] = cut_df + log.info(f"{peak}keV: {sf:2.1f} +/- {sf_err:2.1f} %") + except: + self.low_side_sf = pd.concat( + [ + self.low_side_sf, + pd.DataFrame([{"peak": peak, "sf": np.nan, "sf_err": np.nan}]), + ] + ) + log.error( + f"LQ Survival fraction determination failed for {peak} peak" + ) + self.low_side_sf.set_index("peak", inplace=True) + + +def plot_lq_mean_time( + lq_class, data, lq_param="LQ_Timecorr", figsize=[12, 8], fontsize=12 +) -> plt.figure: + + """ Plots the mean LQ value calculated for each given timestamp """ + + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + fig, ax = plt.subplots(1, 1) + # try: + ax.errorbar( + [ + datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") + for tstamp in lq_class.timecorr_df.index + ], + lq_class.timecorr_df["mean"], + yerr=lq_class.timecorr_df["mean_err"], + linestyle=" ", + ) + + grouped_means = [ + cal_dict["LQ_Timecorr"]["parameters"]["a"] + for tstamp, cal_dict in lq_class.cal_dicts.items() + ] + ax.step( + [ + datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") + for tstamp in lq_class.cal_dicts + ], + grouped_means, + where="post", + ) + ax.fill_between( + [ + datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") + for tstamp in lq_class.cal_dicts + ], + y1=np.array(grouped_means) - 0.2 * np.array(lq_class.timecorr_df["res"]), + y2=np.array(grouped_means) + 0.2 * np.array(lq_class.timecorr_df["res"]), + color="green", + alpha=0.2, + ) + ax.fill_between( + [ + datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") + for tstamp in lq_class.cal_dicts + ], + y1=np.array(grouped_means) - 0.4 * np.array(lq_class.timecorr_df["res"]), + y2=np.array(grouped_means) + 0.4 * np.array(lq_class.timecorr_df["res"]), + color="yellow", + alpha=0.2, + ) + # except: + # pass + ax.set_xlabel("time") + ax.set_ylabel("LQ mean") + myFmt = mdates.DateFormatter("%b %d") + ax.xaxis.set_major_formatter(myFmt) + plt.close() + return fig + +def plot_drift_time_correction( + lq_class, data, lq_param="LQ_Timecorr", figsize=[12, 8], fontsize=12 +) -> plt.figure: + + """ Plots a 2D histogram of LQ versus effective drift time in a 6 keV + window around the DEP. Additioanlly plots the fit results for the + drift time correction. """ + + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + fig, ax = plt.subplots(1, 1) + + try: + + dep_range = (1590, 1595) + + initial_df = data[(data[lq_class.cal_energy_param] > dep_range[0]) & (data[lq_class.cal_energy_param] < dep_range[1])] + max_dt = 1500 + max_lq = 2.5 + + plt.hist2d(initial_df['dt_eff'], + initial_df[lq_param], + bins = 100, + range = ((0, max_dt), (0, max_lq)), norm = mcolors.LogNorm() + ) + + x = np.linspace(0, max_dt, 100) + model = lq_class.dt_fit_pars[0]*x + lq_class.dt_fit_pars[1] + + plt.plot(x, model, color = 'r') + + plt.xlabel('Drift Time (ns)') + plt.ylabel('LQ') + + plt.title('LQ versus Drift Time for DEP') + + except: + pass + + + plt.tight_layout() + plt.close() + return fig + +def plot_lq_cut_fit( + lq_class, data, figsize=[12,8], fontsize=12 +) -> plt.figure: + + """ Plots the final histogram of LQ values for events in the + DEP, and the fit results used for determining the cut + value """ + + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + fig, (ax1, ax2) = plt.subplots(2, 1, height_ratios = (2,1)) + + try: + + hist, bins = lq_class.fit_hist + fit_pars = lq_class.cut_fit_pars + + ax1.stairs(hist, bins, label = 'data') + xs = np.linspace(round(bins[0], 3), round(bins[-1], 3), len(bins)-1) + ls = np.sum(hist) + dx = np.diff(bins) + ax1.plot(xs, pgf.gauss_pdf(xs, fit_pars[0], fit_pars[1], ls)*dx, label = 'Gaussian Fit') + + #ax1.set_xlabel('LQ') + ax1.set_title('Fit of LQ events in DEP') + ax1.legend() + + bin_centers = (bins[:-1] + bins[1:]) / 2 + reses = (hist - (pgf.gauss_pdf(bin_centers, fit_pars[0], fit_pars[1], ls)*dx)) / (pgf.gauss_pdf(bin_centers, fit_pars[0], fit_pars[1], ls)*dx) + ax2.plot(bin_centers, reses, marker = 's', linestyle = '') + ax2.set_xlabel('LQ') + ax2.set_ylabel('residuals') + + except: + pass + + plt.tight_layout() + plt.close() + return fig + +def plot_survival_fraction_curves( + lq_class, data, figsize=[12, 8], fontsize=12 +) -> plt.figure: + + """ Plots the survival fraction curves as a function of + LQ cut values for every peak of interest """ + + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + + fig = plt.figure() + try: + plt.vlines( + lq_class.cut_val, + 0, + 100, + label=f"cut value: {lq_class.cut_val:1.2f}", + color="black", + ) + + for peak, survival_df in lq_class.low_side_peak_dfs.items(): + try: + plt.errorbar( + survival_df.index, + survival_df["sf"], + yerr=survival_df["sf_err"], + label=f'{aoe.get_peak_label(peak)} {peak} keV: {lq_class.low_side_sf.loc[peak]["sf"]:2.1f} +/- {lq_class.low_side_sf.loc[peak]["sf_err"]:2.1f} %', + ) + except: + pass + except: + pass + vals, labels = plt.yticks() + plt.yticks(vals, [f"{x:,.0f} %" for x in vals]) + plt.legend(loc="lower right") + plt.xlabel("cut value") + plt.ylabel("survival percentage") + plt.ylim([0, 105]) + plt.close() + return fig + + +def plot_sf_vs_energy( + lq_class, data, xrange=(900, 3000), n_bins=701, figsize=[12, 8], fontsize=12 +) -> plt.figure: + + """ Plots the survival fraction as a function of energy """ + + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + + fig = plt.figure() + try: + bins = np.linspace(xrange[0], xrange[1], n_bins) + counts_pass, bins_pass, _ = pgh.get_hist( + data.query(f"{lq_class.selection_string}&LQ_Cut")[ + lq_class.cal_energy_param + ], + bins=bins, + ) + counts, bins, _ = pgh.get_hist( + data.query(lq_class.selection_string)[lq_class.cal_energy_param], + bins=bins, + ) + survival_fracs = counts_pass / (counts + 10**-99) + + plt.step(pgh.get_bin_centers(bins_pass), 100 * survival_fracs) + except: + pass + plt.ylim([0, 100]) + vals, labels = plt.yticks() + plt.yticks(vals, [f"{x:,.0f} %" for x in vals]) + plt.xlabel("energy (keV)") + plt.ylabel("survival percentage") + plt.close() + return fig + +def plot_spectra( + lq_class, + data, + xrange=(900, 3000), + n_bins=2101, + xrange_inset=(1580, 1640), + n_bins_inset=200, + figsize=[12, 8], + fontsize=12, +) -> plt.figure: + + """ Plots a 2D histogram of the LQ classifier vs calibrated energy """ + + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + + fig, ax = plt.subplots() + try: + bins = np.linspace(xrange[0], xrange[1], n_bins) + ax.hist( + data.query(lq_class.selection_string)[lq_class.cal_energy_param], + bins=bins, + histtype="step", + label="before PSD", + ) + # ax.hist( + # data.query(f"{lq_class.selection_string}&AoE_Double_Sided_Cut")[ + # lq_class.cal_energy_param + # ], + # bins=bins, + # histtype="step", + # label="after double sided A/E cut", + # ) + ax.hist( + data.query(f"{lq_class.selection_string}&LQ_Cut")[ + lq_class.cal_energy_param + ], + bins=bins, + histtype="step", + label="after LQ cut", + ) + ax.hist( + data.query(f"{lq_class.selection_string} & (~LQ_Cut)")[ + lq_class.cal_energy_param + ], + bins=bins, + histtype="step", + label="rejected by LQ cut", + ) + + axins = ax.inset_axes([0.25, 0.07, 0.4, 0.3]) + bins = np.linspace(xrange_inset[0], xrange_inset[1], n_bins_inset) + select_df = data.query( + f"{lq_class.cal_energy_param}<{xrange_inset[1]}&{lq_class.cal_energy_param}>{xrange_inset[0]}" + ) + axins.hist( + select_df.query(lq_class.selection_string)[lq_class.cal_energy_param], + bins=bins, + histtype="step", + ) + # axins.hist( + # select_df.query(f"{lq_class.selection_string}&AoE_Double_Sided_Cut")[ + # lq_class.cal_energy_param + # ], + # bins=bins, + # histtype="step", + # ) + axins.hist( + select_df.query(f"{lq_class.selection_string}&LQ_Cut")[ + lq_class.cal_energy_param + ], + bins=bins, + histtype="step", + ) + axins.hist( + select_df.query(f"{lq_class.selection_string} & (~LQ_Cut)")[ + lq_class.cal_energy_param + ], + bins=bins, + histtype="step", + ) + except: + pass + ax.set_xlim(xrange) + ax.set_yscale("log") + plt.xlabel("energy (keV)") + plt.ylabel("counts") + plt.legend(loc="upper left") + plt.close() + return fig + + +def plot_classifier( + lq_class, + data, + lq_param="LQ_Classifier", + xrange=(800, 3000), + yrange=(-2, 8), + xn_bins=700, + yn_bins=500, + figsize=[12, 8], + fontsize=12, +) -> plt.figure: + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + + fig = plt.figure() + try: + plt.hist2d( + data.query(lq_class.selection_string)[lq_class.cal_energy_param], + data.query(lq_class.selection_string)[lq_param], + bins=[ + np.linspace(xrange[0], xrange[1], xn_bins), + np.linspace(yrange[0], yrange[1], yn_bins), + ], + norm=LogNorm(), + ) + except: + pass + plt.xlabel("energy (keV)") + plt.ylabel(lq_param) + plt.xlim(xrange) + plt.ylim(yrange) + plt.close() + return fig \ No newline at end of file From 249658c244d6c1253eaf09370835030da36887f8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 7 Nov 2023 15:47:38 +0000 Subject: [PATCH 027/191] style: pre-commit fixes --- src/pygama/pargen/lq_cal.py | 377 +++++++++++++++++++----------------- 1 file changed, 194 insertions(+), 183 deletions(-) diff --git a/src/pygama/pargen/lq_cal.py b/src/pygama/pargen/lq_cal.py index 77d91b8a8..f4ed1e492 100644 --- a/src/pygama/pargen/lq_cal.py +++ b/src/pygama/pargen/lq_cal.py @@ -9,20 +9,21 @@ from typing import Callable import matplotlib as mpl + mpl.use("agg") +import lgdo.lh5_store as lh5 import matplotlib.cm as cmx import matplotlib.colors as mcolors -import matplotlib.pyplot as plt import matplotlib.dates as mdates +import matplotlib.pyplot as plt import numpy as np import pandas as pd from iminuit import Minuit, cost, util -from scipy.stats import linregress from matplotlib.backends.backend_pdf import PdfPages from matplotlib.colors import LogNorm +from scipy.stats import linregress -import lgdo.lh5_store as lh5 import pygama.math.histogram as pgh import pygama.math.peak_fitting as pgf import pygama.pargen.AoE_cal as aoe @@ -32,80 +33,90 @@ def get_fit_range(lq: np.array) -> tuple(float, float): - """ Function for determining the fit range for a given distribution of lq values """ - + # Get an initial guess of mu and sigma, use these values to determine our final fit range left_guess = np.nanpercentile(lq, 1) right_guess = np.nanpercentile(lq, 95) test_range = (left_guess, right_guess) - hist, bins, _ = pgh.get_hist(lq, bins = 100, range = test_range) + hist, bins, _ = pgh.get_hist(lq, bins=100, range=test_range) bin_centers = (bins[:-1] + bins[1:]) / 2 mu = bin_centers[np.argmax(hist)] _, sigma, _ = pgh.get_gaussian_guess(hist, bins) - left_edge = mu - 2.5*sigma - right_edge = mu + 2.5*sigma + left_edge = mu - 2.5 * sigma + right_edge = mu + 2.5 * sigma fit_range = (left_edge, right_edge) - + return fit_range + def get_lq_hist( - df: pd.DataFrame(), - lq_param: str, - cal_energy_param: str, - peak: float, - sidebands: bool = True + df: pd.DataFrame(), + lq_param: str, + cal_energy_param: str, + peak: float, + sidebands: bool = True, ): - """ - Function for getting a distribution of LQ values for a given peak. Returns a histogram of the + Function for getting a distribution of LQ values for a given peak. Returns a histogram of the LQ distribution as well as an array of bin edges """ - if sidebands: # Get a histogram of events in the peak using sideband subtraction # Uses a 6 keV window, and the sideband is to the right of the peak # Default option - peak_window = (df[cal_energy_param] < peak + 3) & (df[cal_energy_param] > peak - 3) - sideband_window = (df[cal_energy_param] < peak + 18) & (df[cal_energy_param] > peak + 12) - + peak_window = (df[cal_energy_param] < peak + 3) & ( + df[cal_energy_param] > peak - 3 + ) + sideband_window = (df[cal_energy_param] < peak + 18) & ( + df[cal_energy_param] > peak + 12 + ) + fit_range = get_fit_range(df[lq_param][peak_window]) - sideband_hist, bins, _ = pgh.get_hist(df[lq_param][sideband_window], bins = 100, range = fit_range) - dep_hist, _, _ = pgh.get_hist(df[lq_param][peak_window], bins = 100, range = fit_range) + sideband_hist, bins, _ = pgh.get_hist( + df[lq_param][sideband_window], bins=100, range=fit_range + ) + dep_hist, _, _ = pgh.get_hist( + df[lq_param][peak_window], bins=100, range=fit_range + ) final_hist = dep_hist - sideband_hist var = np.sqrt(np.add(sideband_hist, dep_hist)) return final_hist, bins, var - + else: # Return a histogram in a 5 keV range surrounding the specified peak # Only use if peak statistics are low - peak_window = (df[cal_energy_param] < peak + 2.5) & (df[cal_energy_param] > peak - 2.5) + peak_window = (df[cal_energy_param] < peak + 2.5) & ( + df[cal_energy_param] > peak - 2.5 + ) fit_range = get_fit_range(df[lq_param][peak_window]) - dep_hist, bins, var = pgh.get_hist(df[lq_param][peak_window], bins = 100, range = fit_range) + dep_hist, bins, var = pgh.get_hist( + df[lq_param][peak_window], bins=100, range=fit_range + ) return dep_hist, bins, var + def binned_lq_fit( - df: pd.DataFrame, - lq_param: str, - cal_energy_param: str, - peak: float, - cdf = pgf.gauss_cdf, - sidebands: bool = True -): - - """ Function for fitting a distribution of LQ values within a specified + df: pd.DataFrame, + lq_param: str, + cal_energy_param: str, + peak: float, + cdf=pgf.gauss_cdf, + sidebands: bool = True, +): + """Function for fitting a distribution of LQ values within a specified energy peak. Fits a gaussian to the distribution - + Parameters ---------- df: pd.DataFrame() @@ -124,7 +135,7 @@ def binned_lq_fit( sidebands: bool Whether or not to perform a sideband subtraction when fitting the LQ distribution - + Returns ------- m1.values: array-like object @@ -136,31 +147,32 @@ def binned_lq_fit( bins: array Array of bin edges used for the binned fit """ - + hist, bins, var = get_lq_hist(df, lq_param, cal_energy_param, peak, sidebands) - + # Temporary fix for negative bin counts # TODO: Adjust fitting to handle negative bin counts hist[hist < 0] = 0 - + bin_centers = (bins[:-1] + bins[1:]) / 2 mu = bin_centers[np.argmax(hist)] _, sigma, _ = pgh.get_gaussian_guess(hist, bins) - c1 = cost.BinnedNLL(hist, bins, pgf.gauss_cdf, verbose = 0) + c1 = cost.BinnedNLL(hist, bins, pgf.gauss_cdf, verbose=0) m1 = Minuit(c1, mu, sigma) m1.simplex().migrad() m1.hesse() return m1.values, m1.errors, hist, bins + def fit_time_means(tstamps, means, reses): out_dict = {} current_tstamps = [] current_means = [] current_reses = [] - + # Temporary fix # TODO: Create better method of measuring time stability rolling_mean = means[np.where(~np.isnan(means))[0][0]] @@ -209,19 +221,18 @@ def fit_time_means(tstamps, means, reses): class cal_lq: - - """ A class for calibrating the LQ parameter and determining the LQ cut value """ - + + """A class for calibrating the LQ parameter and determining the LQ cut value""" + def __init__( - self, - cal_dicts: dict, - cal_energy_param: str, - eres_func: callable, - cdf: callable = pgf.gauss_cdf, - selection_string: str = 'is_valid_cal&is_not_pulser', - plot_options: dict = {}, + self, + cal_dicts: dict, + cal_energy_param: str, + eres_func: callable, + cdf: callable = pgf.gauss_cdf, + selection_string: str = "is_valid_cal&is_not_pulser", + plot_options: dict = {}, ): - """ Parameters ---------- @@ -240,7 +251,7 @@ def __init__( A dict containing the plot functions the user wants to run, and any user options to provide those plot functions """ - + self.cal_dicts = cal_dicts self.cal_energy_param = cal_energy_param self.eres_func = eres_func @@ -249,7 +260,6 @@ def __init__( self.plot_options = plot_options def update_cal_dicts(self, update_dict): - if re.match(r"(\d{8})T(\d{6})Z", list(self.cal_dicts)[0]): for tstamp in self.cal_dicts: if tstamp in update_dict: @@ -260,13 +270,12 @@ def update_cal_dicts(self, update_dict): self.cal_dicts.update(update_dict) def lq_timecorr(self, df, lq_param, output_name="LQ_Timecorr", display=0): - """ - Calculates the average LQ value for DEP events for each specified run - timestamp. Applies a time normalization based on the average LQ value + Calculates the average LQ value for DEP events for each specified run + timestamp. Applies a time normalization based on the average LQ value in the DEP accross all timestamps. """ - + log.info("Starting LQ time correction") self.timecorr_df = pd.DataFrame( columns=["timestamp", "mean", "mean_err", "res", "res_err"] @@ -285,9 +294,9 @@ def lq_timecorr(self, df, lq_param, output_name="LQ_Timecorr", display=0): time_df.query(f"{self.selection_string}"), lq_param, self.cal_energy_param, - peak = 1592.5, + peak=1592.5, cdf=self.cdf, - sidebands = False, + sidebands=False, ) self.timecorr_df = pd.concat( [ @@ -354,9 +363,9 @@ def lq_timecorr(self, df, lq_param, output_name="LQ_Timecorr", display=0): df.query(f"{self.selection_string}"), lq_param, self.cal_energy_param, - peak = 1592.5, + peak=1592.5, cdf=self.cdf, - sidebands = False, + sidebands=False, ) self.timecorr_df = pd.concat( [ @@ -414,9 +423,10 @@ def lq_timecorr(self, df, lq_param, output_name="LQ_Timecorr", display=0): } ) - def drift_time_correction(self, df: pd.DataFrame(), lq_param, cal_energy_param: str, display: int = 0): - - """ + def drift_time_correction( + self, df: pd.DataFrame(), lq_param, cal_energy_param: str, display: int = 0 + ): + """ Deterimines the drift time correction parameters for LQ by fitting a degree 1 polynomial to the LQ vs drift time distribution for DEP events. Corrects for any linear dependence and centers the final LQ distribution to a mean of 0. @@ -424,60 +434,67 @@ def drift_time_correction(self, df: pd.DataFrame(), lq_param, cal_energy_param: log.info("Starting LQ drift time correction") try: - - dt_dict={} - pars = binned_lq_fit(df, lq_param, self.cal_energy_param, peak = 1592.5)[0] + dt_dict = {} + pars = binned_lq_fit(df, lq_param, self.cal_energy_param, peak=1592.5)[0] mean = pars[0] sigma = pars[1] - lq_mask = (df[lq_param] < (2*sigma + mean)) & (df[lq_param] > (mean - 2*sigma)) - dep_mask = (df[cal_energy_param] < 1595) & (df[cal_energy_param] > 1590) + lq_mask = (df[lq_param] < (2 * sigma + mean)) & ( + df[lq_param] > (mean - 2 * sigma) + ) + dep_mask = (df[cal_energy_param] < 1595) & (df[cal_energy_param] > 1590) - ids = np.isnan(df[lq_param]) | np.isnan(df['dt_eff']) - result = linregress(df['dt_eff'][~ids & dep_mask & lq_mask], df[lq_param][~ids & dep_mask & lq_mask], alternative = 'greater') + ids = np.isnan(df[lq_param]) | np.isnan(df["dt_eff"]) + result = linregress( + df["dt_eff"][~ids & dep_mask & lq_mask], + df[lq_param][~ids & dep_mask & lq_mask], + alternative="greater", + ) self.dt_fit_pars = result - df['LQ_Classifier'] = df[lq_param] - df['dt_eff']*self.dt_fit_pars[0] - self.dt_fit_pars[1] - + df["LQ_Classifier"] = ( + df[lq_param] - df["dt_eff"] * self.dt_fit_pars[0] - self.dt_fit_pars[1] + ) + except: log.error("LQ drift time correction failed") self.dt_fit_pars = (np.nan, np.nan) - self.update_cal_dicts( { - 'LQ_Classifier': { + "LQ_Classifier": { "expression": f"{lq_param} - dt_eff*a - b", - "parameters": {"a": self.dt_fit_pars[0], "b": self.dt_fit_pars[1]} + "parameters": {"a": self.dt_fit_pars[0], "b": self.dt_fit_pars[1]}, } } ) def get_cut_lq_dep(self, df: pd.DataFrame(), lq_param: str, cal_energy_param: str): - """ Determines the cut value for LQ. Value is calculated by fitting the LQ distribution for events in the DEP to a gaussian. The cut value is set at 3*sigma of the fit. Sideband subtraction is used to determine the LQ distribution for DEP events. Events greater than the cut value fail the cut. """ - + log.info("Starting LQ Cut calculation") try: - pars, errs, hist, bins = binned_lq_fit(df, 'LQ_Classifier', cal_energy_param, peak = 1592.5) - cut_val = 3*pars[1] + pars, errs, hist, bins = binned_lq_fit( + df, "LQ_Classifier", cal_energy_param, peak=1592.5 + ) + cut_val = 3 * pars[1] self.cut_fit_pars = pars self.cut_fit_errs = errs self.fit_hist = (hist, bins) self.cut_val = cut_val - df['LQ_Cut'] = df[lq_param] < self.cut_val + df["LQ_Cut"] = df[lq_param] < self.cut_val except: log.error("LQ cut determination failed") self.cut_val = np.nan - + self.update_cal_dicts( { "LQ_Cut": { @@ -486,7 +503,7 @@ def get_cut_lq_dep(self, df: pd.DataFrame(), lq_param: str, cal_energy_param: st } } ) - + def get_results_dict(self): return { "cal_energy_param": self.cal_energy_param, @@ -504,32 +521,33 @@ def fill_plot_dict(self, data, plot_dict={}): else: plot_dict[key] = item["function"](self, data) return plot_dict - + def calibrate(self, df, initial_lq_param): - - """ Run the LQ calibration and calculate the cut value """ - - self.lq_timecorr(df, lq_param = 'LQ_Ecorr') - log.info('Finished LQ Time Correction') - - self.drift_time_correction(df, lq_param = 'LQ_Timecorr', cal_energy_param = self.cal_energy_param) - log.info('Finished LQ Drift Time Correction') - - self.get_cut_lq_dep(df, lq_param = 'LQ_Classifier', cal_energy_param = self.cal_energy_param) - log.info('Finished Calculating the LQ Cut Value') - - final_lq_param = 'LQ_Classifier' + """Run the LQ calibration and calculate the cut value""" + + self.lq_timecorr(df, lq_param="LQ_Ecorr") + log.info("Finished LQ Time Correction") + + self.drift_time_correction( + df, lq_param="LQ_Timecorr", cal_energy_param=self.cal_energy_param + ) + log.info("Finished LQ Drift Time Correction") + + self.get_cut_lq_dep( + df, lq_param="LQ_Classifier", cal_energy_param=self.cal_energy_param + ) + log.info("Finished Calculating the LQ Cut Value") + + final_lq_param = "LQ_Classifier" peaks_of_interest = [1592.5, 1620.5, 2039, 2103.53, 2614.50] self.low_side_sf = pd.DataFrame(columns=["peak", "sf", "sf_err"]) fit_widths = [(40, 25), (25, 40), (0, 0), (25, 40), (50, 50)] self.low_side_peak_dfs = {} - + log.info("Calculating peak survival fractions") for i, peak in enumerate(peaks_of_interest): try: - select_df = df.query( - f"{self.selection_string}" - ) + select_df = df.query(f"{self.selection_string}") fwhm = self.eres_func(peak) if peak == 2039: emin = 2 * fwhm @@ -544,8 +562,8 @@ def calibrate(self, df, initial_lq_param): self.cut_val, peak, fwhm, - cut_range = (0, 0.6), - mode = 'less', + cut_range=(0, 0.6), + mode="less", ) self.low_side_sf = pd.concat( [ @@ -565,8 +583,8 @@ def calibrate(self, df, initial_lq_param): self.cut_val, peak, fwhm, - cut_range = (0, 0.6), - mode = 'less', + cut_range=(0, 0.6), + mode="less", ) self.low_side_sf = pd.concat( [ @@ -583,18 +601,15 @@ def calibrate(self, df, initial_lq_param): pd.DataFrame([{"peak": peak, "sf": np.nan, "sf_err": np.nan}]), ] ) - log.error( - f"LQ Survival fraction determination failed for {peak} peak" - ) + log.error(f"LQ Survival fraction determination failed for {peak} peak") self.low_side_sf.set_index("peak", inplace=True) def plot_lq_mean_time( lq_class, data, lq_param="LQ_Timecorr", figsize=[12, 8], fontsize=12 ) -> plt.figure: - - """ Plots the mean LQ value calculated for each given timestamp """ - + """Plots the mean LQ value calculated for each given timestamp""" + plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize fig, ax = plt.subplots(1, 1) @@ -614,28 +629,19 @@ def plot_lq_mean_time( for tstamp, cal_dict in lq_class.cal_dicts.items() ] ax.step( - [ - datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") - for tstamp in lq_class.cal_dicts - ], + [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in lq_class.cal_dicts], grouped_means, where="post", ) ax.fill_between( - [ - datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") - for tstamp in lq_class.cal_dicts - ], + [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in lq_class.cal_dicts], y1=np.array(grouped_means) - 0.2 * np.array(lq_class.timecorr_df["res"]), y2=np.array(grouped_means) + 0.2 * np.array(lq_class.timecorr_df["res"]), color="green", alpha=0.2, ) ax.fill_between( - [ - datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") - for tstamp in lq_class.cal_dicts - ], + [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in lq_class.cal_dicts], y1=np.array(grouped_means) - 0.4 * np.array(lq_class.timecorr_df["res"]), y2=np.array(grouped_means) + 0.4 * np.array(lq_class.timecorr_df["res"]), color="yellow", @@ -650,97 +656,103 @@ def plot_lq_mean_time( plt.close() return fig + def plot_drift_time_correction( lq_class, data, lq_param="LQ_Timecorr", figsize=[12, 8], fontsize=12 ) -> plt.figure: - - """ Plots a 2D histogram of LQ versus effective drift time in a 6 keV - window around the DEP. Additioanlly plots the fit results for the - drift time correction. """ - + """Plots a 2D histogram of LQ versus effective drift time in a 6 keV + window around the DEP. Additioanlly plots the fit results for the + drift time correction.""" + plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize fig, ax = plt.subplots(1, 1) - + try: - dep_range = (1590, 1595) - initial_df = data[(data[lq_class.cal_energy_param] > dep_range[0]) & (data[lq_class.cal_energy_param] < dep_range[1])] + initial_df = data[ + (data[lq_class.cal_energy_param] > dep_range[0]) + & (data[lq_class.cal_energy_param] < dep_range[1]) + ] max_dt = 1500 max_lq = 2.5 - plt.hist2d(initial_df['dt_eff'], - initial_df[lq_param], - bins = 100, - range = ((0, max_dt), (0, max_lq)), norm = mcolors.LogNorm() - ) + plt.hist2d( + initial_df["dt_eff"], + initial_df[lq_param], + bins=100, + range=((0, max_dt), (0, max_lq)), + norm=mcolors.LogNorm(), + ) x = np.linspace(0, max_dt, 100) - model = lq_class.dt_fit_pars[0]*x + lq_class.dt_fit_pars[1] + model = lq_class.dt_fit_pars[0] * x + lq_class.dt_fit_pars[1] + + plt.plot(x, model, color="r") - plt.plot(x, model, color = 'r') + plt.xlabel("Drift Time (ns)") + plt.ylabel("LQ") - plt.xlabel('Drift Time (ns)') - plt.ylabel('LQ') + plt.title("LQ versus Drift Time for DEP") - plt.title('LQ versus Drift Time for DEP') - except: pass - - + plt.tight_layout() plt.close() return fig -def plot_lq_cut_fit( - lq_class, data, figsize=[12,8], fontsize=12 -) -> plt.figure: - - """ Plots the final histogram of LQ values for events in the - DEP, and the fit results used for determining the cut - value """ - + +def plot_lq_cut_fit(lq_class, data, figsize=[12, 8], fontsize=12) -> plt.figure: + """Plots the final histogram of LQ values for events in the + DEP, and the fit results used for determining the cut + value""" + plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize - fig, (ax1, ax2) = plt.subplots(2, 1, height_ratios = (2,1)) - + fig, (ax1, ax2) = plt.subplots(2, 1, height_ratios=(2, 1)) + try: - hist, bins = lq_class.fit_hist fit_pars = lq_class.cut_fit_pars - ax1.stairs(hist, bins, label = 'data') - xs = np.linspace(round(bins[0], 3), round(bins[-1], 3), len(bins)-1) + ax1.stairs(hist, bins, label="data") + xs = np.linspace(round(bins[0], 3), round(bins[-1], 3), len(bins) - 1) ls = np.sum(hist) dx = np.diff(bins) - ax1.plot(xs, pgf.gauss_pdf(xs, fit_pars[0], fit_pars[1], ls)*dx, label = 'Gaussian Fit') + ax1.plot( + xs, + pgf.gauss_pdf(xs, fit_pars[0], fit_pars[1], ls) * dx, + label="Gaussian Fit", + ) - #ax1.set_xlabel('LQ') - ax1.set_title('Fit of LQ events in DEP') + # ax1.set_xlabel('LQ') + ax1.set_title("Fit of LQ events in DEP") ax1.legend() bin_centers = (bins[:-1] + bins[1:]) / 2 - reses = (hist - (pgf.gauss_pdf(bin_centers, fit_pars[0], fit_pars[1], ls)*dx)) / (pgf.gauss_pdf(bin_centers, fit_pars[0], fit_pars[1], ls)*dx) - ax2.plot(bin_centers, reses, marker = 's', linestyle = '') - ax2.set_xlabel('LQ') - ax2.set_ylabel('residuals') - + reses = ( + hist - (pgf.gauss_pdf(bin_centers, fit_pars[0], fit_pars[1], ls) * dx) + ) / (pgf.gauss_pdf(bin_centers, fit_pars[0], fit_pars[1], ls) * dx) + ax2.plot(bin_centers, reses, marker="s", linestyle="") + ax2.set_xlabel("LQ") + ax2.set_ylabel("residuals") + except: pass - + plt.tight_layout() plt.close() - return fig - + return fig + + def plot_survival_fraction_curves( lq_class, data, figsize=[12, 8], fontsize=12 ) -> plt.figure: - - """ Plots the survival fraction curves as a function of - LQ cut values for every peak of interest """ - + """Plots the survival fraction curves as a function of + LQ cut values for every peak of interest""" + plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize @@ -774,14 +786,13 @@ def plot_survival_fraction_curves( plt.ylim([0, 105]) plt.close() return fig - - + + def plot_sf_vs_energy( lq_class, data, xrange=(900, 3000), n_bins=701, figsize=[12, 8], fontsize=12 ) -> plt.figure: - - """ Plots the survival fraction as a function of energy """ - + """Plots the survival fraction as a function of energy""" + plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize @@ -809,7 +820,8 @@ def plot_sf_vs_energy( plt.xlabel("energy (keV)") plt.ylabel("survival percentage") plt.close() - return fig + return fig + def plot_spectra( lq_class, @@ -821,9 +833,8 @@ def plot_spectra( figsize=[12, 8], fontsize=12, ) -> plt.figure: - - """ Plots a 2D histogram of the LQ classifier vs calibrated energy """ - + """Plots a 2D histogram of the LQ classifier vs calibrated energy""" + plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize @@ -935,4 +946,4 @@ def plot_classifier( plt.xlim(xrange) plt.ylim(yrange) plt.close() - return fig \ No newline at end of file + return fig From 7b1c040809a5174a7acb72e266f32d8ff0360fed Mon Sep 17 00:00:00 2001 From: Erin Engelhardt <51338203+erin717@users.noreply.github.com> Date: Tue, 7 Nov 2023 10:56:54 -0500 Subject: [PATCH 028/191] Fix spelling errors --- src/pygama/pargen/lq_cal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pygama/pargen/lq_cal.py b/src/pygama/pargen/lq_cal.py index f4ed1e492..35ad12a5d 100644 --- a/src/pygama/pargen/lq_cal.py +++ b/src/pygama/pargen/lq_cal.py @@ -273,7 +273,7 @@ def lq_timecorr(self, df, lq_param, output_name="LQ_Timecorr", display=0): """ Calculates the average LQ value for DEP events for each specified run timestamp. Applies a time normalization based on the average LQ value - in the DEP accross all timestamps. + in the DEP across all timestamps. """ log.info("Starting LQ time correction") @@ -661,7 +661,7 @@ def plot_drift_time_correction( lq_class, data, lq_param="LQ_Timecorr", figsize=[12, 8], fontsize=12 ) -> plt.figure: """Plots a 2D histogram of LQ versus effective drift time in a 6 keV - window around the DEP. Additioanlly plots the fit results for the + window around the DEP. Additionally plots the fit results for the drift time correction.""" plt.rcParams["figure.figsize"] = figsize From 453e4ab65feef0a0bc5d0587abf3602b4b25f918 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 8 Nov 2023 11:59:23 +0100 Subject: [PATCH 029/191] [evt] bugfix: tcm.array_id must have an integer dtype --- src/pygama/evt/tcm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pygama/evt/tcm.py b/src/pygama/evt/tcm.py index fea54d167..be319231a 100644 --- a/src/pygama/evt/tcm.py +++ b/src/pygama/evt/tcm.py @@ -81,7 +81,7 @@ def generate_tcm_cols( for ii, array in enumerate(coin_data): array = np.array(array) array_id = array_ids[ii] if array_ids is not None else ii - array_id = np.full_like(array, array_id) + array_id = np.full_like(array, array_id, dtype=int) col_dict = {"array_id": array_id, "coin_data": array} if array_idxs is not None: col_dict["array_idx"] = array_idxs[ii] From d2739656feb1263c4df8fc8d7b602c4c2a7c4176 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 8 Nov 2023 11:59:56 +0100 Subject: [PATCH 030/191] [evt] [tests] add some basic build_tcm tests --- tests/evt/test_build_tcm.py | 72 +++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 tests/evt/test_build_tcm.py diff --git a/tests/evt/test_build_tcm.py b/tests/evt/test_build_tcm.py new file mode 100644 index 000000000..505196825 --- /dev/null +++ b/tests/evt/test_build_tcm.py @@ -0,0 +1,72 @@ +import os + +import lgdo +import numpy as np +from lgdo import LH5Store + +from pygama import evt + + +def test_generate_tcm_cols(lgnd_test_data): + f_raw = lgnd_test_data.get_path( + "lh5/prod-ref-l200/generated/tier/raw/cal/p03/r001/l200-p03-r001-cal-20230318T012144Z-tier_raw.lh5" + ) + tables = lgdo.ls(f_raw) + store = LH5Store() + coin_data = [] + for tbl in tables: + ts, _ = store.read_object(f"{tbl}/raw/timestamp", f_raw) + coin_data.append(ts) + + tcm_cols = evt.generate_tcm_cols( + coin_data, 0, "last", [int(tb[2:]) for tb in tables] + ) + assert isinstance(tcm_cols, dict) + for v in tcm_cols.values(): + assert np.issubdtype(v.dtype, np.integer) + + # fmt: off + assert np.array_equal( + tcm_cols["cumulative_length"], + [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + ], + ) + assert np.array_equal( + tcm_cols["array_id"], + [ + 1084804, 1084803, 1121600, 1084804, 1121600, 1084804, 1121600, + 1084804, 1084804, 1084804, 1084803, 1084804, 1084804, 1121600, + 1121600, 1084804, 1121600, 1084804, 1121600, 1084803, 1084803, + 1121600, 1121600, 1121600, 1084803, 1084803, 1084803, 1084803, + 1084803, 1084803, + ], + ) + assert np.array_equal( + tcm_cols["array_idx"], + [ + 0, 0, 0, 1, 1, 2, 2, 3, 4, 5, 1, 6, 7, 3, 4, 8, 5, 9, 6, 2, 3, 7, + 8, 9, 4, 5, 6, 7, 8, 9, + ], + ) + # fmt: on + + +def test_build_tcm(lgnd_test_data, tmptestdir): + f_raw = lgnd_test_data.get_path( + "lh5/prod-ref-l200/generated/tier/raw/cal/p03/r001/l200-p03-r001-cal-20230318T012144Z-tier_raw.lh5" + ) + out_file = f"{tmptestdir}/pygama-test-tcm.lh5" + evt.build_tcm( + [(f_raw, ["ch1084803/raw", "ch1084804/raw", "ch1121600/raw"])], + "timestamp", + out_file=out_file, + out_name="hardware_tcm", + wo_mode="of", + ) + assert os.path.exists(out_file) + store = LH5Store() + obj, n_rows = store.read_object("hardware_tcm", out_file) + assert isinstance(obj, lgdo.Struct) + assert list(obj.keys()) == ["cumulative_length", "array_id", "array_idx"] From 9d21d3778cb1acfdd2b61ad6aeeb30c8b7efc0ff Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 8 Nov 2023 12:05:10 +0100 Subject: [PATCH 031/191] [evt] array_idx must be made of integers too --- src/pygama/evt/tcm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pygama/evt/tcm.py b/src/pygama/evt/tcm.py index be319231a..ad87d7a47 100644 --- a/src/pygama/evt/tcm.py +++ b/src/pygama/evt/tcm.py @@ -84,7 +84,7 @@ def generate_tcm_cols( array_id = np.full_like(array, array_id, dtype=int) col_dict = {"array_id": array_id, "coin_data": array} if array_idxs is not None: - col_dict["array_idx"] = array_idxs[ii] + col_dict["array_idx"] = array_idxs.astype(int)[ii] dfs.append(pd.DataFrame(col_dict, copy=False)) # don't copy the data! # concat and sort From fd7d535c177a318f30823398774185c13d508eec Mon Sep 17 00:00:00 2001 From: valerioda Date: Fri, 17 Nov 2023 14:21:19 +0100 Subject: [PATCH 032/191] added pargen routine for filter optimization based on fft data --- src/pygama/pargen/noise_optimization.py | 383 ++++++++++++++++++++++++ 1 file changed, 383 insertions(+) create mode 100644 src/pygama/pargen/noise_optimization.py diff --git a/src/pygama/pargen/noise_optimization.py b/src/pygama/pargen/noise_optimization.py new file mode 100644 index 000000000..c378cf5d9 --- /dev/null +++ b/src/pygama/pargen/noise_optimization.py @@ -0,0 +1,383 @@ +""" +This module contains the functions for performing the filter optimisation. +This happens with a grid search performed on ENC peak. +""" + +import inspect +import json +import logging +import os +import pathlib +import pickle as pkl +import sys +import time +from collections import namedtuple + +import lgdo +import lgdo.lh5_store as lh5 +import matplotlib as mpl +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import scipy.stats +from iminuit import Minuit, cost, util +from matplotlib.backends.backend_pdf import PdfPages +from matplotlib.colors import LogNorm +from scipy.interpolate import splev, splrep +from scipy.optimize import minimize + +import pygama.math.peak_fitting as pgf +from pygama.math.histogram import get_hist +from pygama.pargen.dsp_optimize import run_one_dsp + +log = logging.getLogger(__name__) +sto = lh5.LH5Store() + + +def noise_optimization( + raw_list: list[str], + dsp_proc_chain: dict, + par_dsp: dict, + opt_dict: dict, + lh5_path: str, + verbose: bool = False, + display: int = 0, +) -> dict: + """ + This function calculates the optimal filter par. + Parameters + ---------- + raw_list : str + raw files to run the macro on + dsp_proc_chain: str + Path to minimal dsp config file + par_dsp: str + Dictionary with default dsp parameters + opt_dict: str + Dictionary with parameters for optimization + lh5_path: str + Name of channel to process, should be name of lh5 group in raw files + Returns + ------- + res_dict : dict + """ + + t0 = time.time() + tb_data = load_data(raw_list, lh5_path, n_events=opt_dict["n_events"]) + t1 = time.time() + log.info(f"Time to open raw files {t1-t0:.2f} s") + if verbose: print(f"Time to open raw files {t1-t0:.2f} s") + + with open(dsp_proc_chain) as r: + dsp_proc_chain = json.load(r) + + samples = np.arange(opt_dict["start"], opt_dict["stop"], opt_dict["step"]) + samples_val = np.arange(opt_dict["start"], opt_dict["stop"], opt_dict["step_val"]) + + opt_dict_par = opt_dict["optimization"] + + res_dict = {} + if display > 0: + plot_dict = {} + plot_dict["nopt"] = {} + ene_pars = [par for par in opt_dict_par.keys()] + for ene_par in ene_pars: + log.info(f"\nRunning optimization for {ene_par} filter") + if verbose: print(f"\nRunning optimization for {ene_par} filter") + wf_par = opt_dict_par[ene_par]["waveform_out"] + dict_str = opt_dict_par[ene_par]["dict_str"] + filter_par = opt_dict_par[ene_par]["filter_par"] + ene_str = opt_dict_par[ene_par]["ene_str"] + if display > 0: + plot_dict["nopt"][dict_str] = {} + par_dict_plot = plot_dict["nopt"][dict_str] + + dsp_proc_chain["outputs"] = [ene_str] + sample_list, fom_list, fom_err_list = [], [], [] + for i, x in enumerate(samples): + x = f"{x:.1f}" + log.info(f"\nCase {i}, par = {x} us") + if verbose: print(f"\nCase {i}, par = {x} us") + par_dsp[lh5_path][dict_str][filter_par] = f"{x}*us" + + t2 = time.time() + dsp_data = run_one_dsp(tb_data, dsp_proc_chain, db_dict=par_dsp[lh5_path]) + log.info(f"Time to process dsp data {time.time()-t2:.2f} s") + if verbose: print(f"Time to process dsp data {time.time()-t2:.2f} s") + energies = dsp_data[ene_str].nda + + if opt_dict["perform_fit"]: + fom_results = simple_gaussian_fit(energies, dx=opt_dict["dx"]) + else: + fom_results = calculate_spread( + energies, + opt_dict["percentile_low"], + opt_dict["percentile_high"], + opt_dict["n_bootstrap_samples"], + ) + sample_list.append(float(x)) + fom_list.append(fom_results["fom"]) + fom_err_list.append(fom_results["fom_err"]) + if display > 0: + par_dict_plot[x] = {} + par_dict_plot[x]["energies"] = energies + par_dict_plot[x]["fom"] = fom_results["fom"] + par_dict_plot[x]["fom_err"] = fom_results["fom_err"] + sample_list = np.array(sample_list) + fom_list = np.array(fom_list) + fom_err_list = np.array(fom_err_list) + + guess_par = sample_list[np.nanargmin(fom_list)] + if verbose: print(f"guess par: {guess_par:.2f} us") + + tck = splrep(sample_list, fom_list, k=opt_dict["fit_deg"]) + + def spl_func(x_val): + return splev(x_val, tck) + + result = minimize(spl_func, guess_par) + best_par = result.x[0] + if (best_par < np.min(sample_list)) or (best_par > np.max(sample_list)): + log.info(f"Par from minimization not accepted {best_par:.2f}, setting par to guess") + if verbose: + print(f"Par from minimization not accepted {best_par:.2f}, setting par to guess") + best_par = guess_par + + best_val = spl_func(best_par) + + b_best_pars = np.zeros(opt_dict["n_bootstrap_samples"]) + for i in range(opt_dict["n_bootstrap_samples"]): + indices = np.random.choice(len(sample_list), len(sample_list), replace=True) + b_sample_list = sample_list[indices] + b_fom_list = fom_list[indices] + b_best_pars[i] = b_sample_list[np.nanargmin(b_fom_list)] + best_par_err = np.std(b_best_pars) + log.info(f"best par: {best_par:.2f} ± {best_par_err:.2f} us") + if verbose: + print(f"best par: {best_par:.2f} ± {best_par_err:.2f} us") + + par_dict_plot["best_par"] = best_par + par_dict_plot["best_par_err"] = best_par_err + par_dict_plot["best_val"] = best_val + + res_dict[dict_str] = { + filter_par: f"{best_par:.2f}*us", + f"{filter_par}_err": f"{best_par_err:.2f}*us", + } + + if display > 0: + plot_range = opt_dict["plot_range"] + fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white") + for i, x in enumerate(sample_list): + x = f"{x:.1f}" + energies = par_dict_plot[x]["energies"] + par_dict_plot[x].pop("energies") + hist, bins, var = get_hist( + energies, range=plot_range, dx=opt_dict["dx"] + ) + bc = (bins[:-1] + bins[1:]) / 2.0 + string_res = ( + f"par = {x} us, FOM = {fom_list[i]:.3f} ± {fom_err_list[i]:.3f} ADC" + ) + ax.plot(bc, hist, ds="steps", label=string_res) + log.info(string_res) + if verbose: print(string_res) + ax.set_xlabel("energy (ADC)") + ax.set_ylabel("counts") + ax.legend(loc="upper right") + par_dict_plot["distribution"] = fig + if display > 1: + plt.show() + else: + plt.close() + + fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white") + ax.errorbar( + sample_list, + fom_list, + yerr=fom_err_list, + color="b", + fmt="x", + ms=4, + ls="", + capsize=4, + label="samples", + ) + ax.plot(samples_val, spl_func(samples_val), "k:", label="fit") + ax.errorbar( + best_par, + best_val, + xerr=best_par_err, + color="r", + fmt="o", + ms=6, + ls="", + capsize=4, + label=rf"best par: {best_par:.2f} ± {best_par_err:.2f} $\mu$s", + ) + ax.set_xlabel(rf"{ene_par} parameter ($\mu$s)") + ax.set_ylabel("FOM (ADC)") + ax.legend() + if display > 1: + plt.show() + else: + plt.close() + par_dict_plot["optimization"] = fig + + if display > 0: + return res_dict, plot_dict + else: + return res_dict + + +def load_data( + raw_list: list[str], + lh5_path: str, + bls: bool = True, + n_events: int = 10000, + threshold: int = 200, +) -> lgdo.Table: + sto = lh5.LH5Store() + + energies = sto.read_object(f"{lh5_path}/raw/daqenergy", raw_list)[0] + + if bls: + idxs = np.where(energies.nda == 0)[0] + else: + idxs = np.where(energies.nda > threshold)[0] + + waveforms = sto.read_object( + f"{lh5_path}/raw/waveform", raw_list, n_rows=n_events, idx=idxs + )[0] + daqenergy = sto.read_object( + f"{lh5_path}/raw/daqenergy", raw_list, n_rows=n_events, idx=idxs + )[0] + + tb_data = lh5.Table(col_dict={"waveform": waveforms, "daqenergy": daqenergy}) + + return tb_data + + +def calculate_spread(energies, percentile_low, percentile_high, n_samples): + spreads = np.zeros(n_samples) + for i in range(n_samples): + resampled = np.random.choice(energies, size=len(energies), replace=True) + spread = np.percentile(resampled, percentile_high) - np.percentile( + resampled, percentile_low + ) + spreads[i] = spread + + mean_spread = np.mean(spreads) + std_spread = np.std(spreads, ddof=1) / np.sqrt(n_samples) + + results = {} + results["fom"] = mean_spread + results["fom_err"] = std_spread + return results + + +def simple_gaussian_fit(energies, dx=1, sigma_thr=4, allowed_p_val=1e-20): + fit_range = [np.percentile(energies, 0.2), np.percentile(energies, 99.8)] + + hist, bins, var = get_hist(energies, range=fit_range, dx=dx) + guess, bounds = simple_gaussian_guess(hist, bins, pgf.extended_gauss_pdf) + fit_range = [guess[1] - sigma_thr * guess[2], guess[1] + sigma_thr * guess[2]] + + energies_fit = energies[(energies > fit_range[0]) & (energies < fit_range[1])] + pars, errs, cov = pgf.fit_unbinned( + pgf.extended_gauss_pdf, + energies_fit, + guess=guess, + bounds=bounds, + ) + + mu, mu_err = pars[1], errs[1] + fwhm = pars[2] * 2 * np.sqrt(2 * np.log(2)) + fwhm_err = errs[2] * 2 * np.sqrt(2 * np.log(2)) + + hist, bins, var = get_hist(energies_fit, range=fit_range, dx=dx) + gof_pars = pars + gof_pars[0] *= dx + chisq, dof = pgf.goodness_of_fit( + hist, bins, None, pgf.gauss_pdf, gof_pars, method="Pearson" + ) + p_val = scipy.stats.chi2.sf(chisq, dof + len(gof_pars)) + + if ( + sum(sum(c) if c is not None else 0 for c in cov[:3, :][:, :3]) == np.inf + or sum(sum(c) if c is not None else 0 for c in cov[:3, :][:, :3]) == 0 + or np.isnan(sum(sum(c) if c is not None else 0 for c in cov[:3, :][:, :3])) + ): + log.debug("fit failed, cov estimation failed") + fit_failed = True + elif (np.abs(np.array(errs)[:3] / np.array(pars)[:3]) < 1e-7).any() or np.isnan( + np.array(errs)[:3] + ).any(): + log.debug("fit failed, parameter error too low") + fit_failed = True + elif p_val < allowed_p_val or np.isnan(p_val): + log.debug("fit failed, parameter error too low") + fit_failed = True + else: + fit_failed = False + + if fit_failed: + log.debug(f"Returning values from guess") + mu = guess[1] + mu_err = 0 + fwhm = guess[2] * 2 * np.sqrt(2 * np.log(2)) + fwhm_err = 0 + + results = { + "pars": pars[:3], + "errors": errs[:3], + "covariance": cov[:3], + "mu": mu, + "mu_err": mu_err, + "fom": fwhm, + "fom_err": fwhm_err, + "chisq": chisq / dof, + "p_val": p_val, + } + return results + + +def simple_gaussian_guess(hist, bins, func, toll=0.2): + max_idx = np.argmax(hist) + mu = bins[max_idx] + max_amp = np.max(hist) + + idx = np.where(hist > max_amp / 2) + ilo, ihi = idx[0][0], idx[0][-1] + + sigma = (bins[ihi] - bins[ilo]) / 2.355 + + if sigma == 0: + log.debug("error in sigma evaluation, using 2*(bin width) as sigma") + sigma = 2 * (bins[1] - bins[0]) + + dx = np.diff(bins)[0] + n_bins_range = int((4 * sigma) // dx) + + min_idx = max_idx - n_bins_range + max_idx = max_idx + n_bins_range + min_idx = max(0, min_idx) + max_idx = min(len(hist), max_idx) + + n_sig = np.sum(hist[min_idx:max_idx]) + + guess = [n_sig, mu, sigma] + bounds = [ + (n_sig + n_sig * toll, n_sig + n_sig * toll), + (mu - sigma, mu + sigma), + (sigma - sigma * toll, sigma + sigma * toll), + ] + + for i, par in enumerate(inspect.getfullargspec(func)[0][1:]): + if par == "lower_range" or par == "upper_range": + guess.append(np.inf) + bounds.append(None) + elif par == "n_bkg" or par == "hstep" or par == "components": + guess.append(0) + bounds.append(None) + return guess, bounds From 4cc4f0ce8dc37bfd1d03eb586a3c1a910cff8859 Mon Sep 17 00:00:00 2001 From: Valerio Dandrea Date: Fri, 17 Nov 2023 14:58:00 +0100 Subject: [PATCH 033/191] added extended_gauss_pdf --- src/pygama/math/peak_fitting.py | 7 ++++++- src/pygama/pargen/noise_optimization.py | 22 ++++++++++++---------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/pygama/math/peak_fitting.py b/src/pygama/math/peak_fitting.py index 67e07409d..0a2ad98c4 100644 --- a/src/pygama/math/peak_fitting.py +++ b/src/pygama/math/peak_fitting.py @@ -504,9 +504,14 @@ def gauss_pdf(x, mu, sigma, n_sig): """ Basic Gaussian pdf args; mu, sigma, n_sig (number of signal events) """ - return n_sig * gauss_norm(x,mu,sigma) +def extended_gauss_pdf(x, mu, sigma, n_sig): + """ + Basic Gaussian pdf args; mu, sigma, n_sig (number of signal events) + """ + return n_sig, gauss_norm(x,mu,sigma) + def gauss_uniform(x, n_sig, mu, sigma, n_bkg, components = False): """ diff --git a/src/pygama/pargen/noise_optimization.py b/src/pygama/pargen/noise_optimization.py index c378cf5d9..61c63252a 100644 --- a/src/pygama/pargen/noise_optimization.py +++ b/src/pygama/pargen/noise_optimization.py @@ -81,7 +81,7 @@ def noise_optimization( plot_dict = {} plot_dict["nopt"] = {} ene_pars = [par for par in opt_dict_par.keys()] - for ene_par in ene_pars: + for ene_par in ene_pars[:1]: log.info(f"\nRunning optimization for {ene_par} filter") if verbose: print(f"\nRunning optimization for {ene_par} filter") wf_par = opt_dict_par[ene_par]["waveform_out"] @@ -224,6 +224,8 @@ def spl_func(x_val): plt.close() par_dict_plot["optimization"] = fig + log.info(f"Time to complete the optimization {time.time()-t0:.2f} s") + if verbose: print(f"Time to complete the optimization {time.time()-t0:.2f} s") if display > 0: return res_dict, plot_dict else: @@ -281,7 +283,7 @@ def simple_gaussian_fit(energies, dx=1, sigma_thr=4, allowed_p_val=1e-20): hist, bins, var = get_hist(energies, range=fit_range, dx=dx) guess, bounds = simple_gaussian_guess(hist, bins, pgf.extended_gauss_pdf) - fit_range = [guess[1] - sigma_thr * guess[2], guess[1] + sigma_thr * guess[2]] + fit_range = [guess[0] - sigma_thr * guess[1], guess[0] + sigma_thr * guess[1]] energies_fit = energies[(energies > fit_range[0]) & (energies < fit_range[1])] pars, errs, cov = pgf.fit_unbinned( @@ -291,13 +293,13 @@ def simple_gaussian_fit(energies, dx=1, sigma_thr=4, allowed_p_val=1e-20): bounds=bounds, ) - mu, mu_err = pars[1], errs[1] - fwhm = pars[2] * 2 * np.sqrt(2 * np.log(2)) - fwhm_err = errs[2] * 2 * np.sqrt(2 * np.log(2)) + mu, mu_err = pars[0], errs[0] + fwhm = pars[1] * 2 * np.sqrt(2 * np.log(2)) + fwhm_err = errs[1] * 2 * np.sqrt(2 * np.log(2)) hist, bins, var = get_hist(energies_fit, range=fit_range, dx=dx) gof_pars = pars - gof_pars[0] *= dx + gof_pars[2] *= dx chisq, dof = pgf.goodness_of_fit( hist, bins, None, pgf.gauss_pdf, gof_pars, method="Pearson" ) @@ -323,9 +325,9 @@ def simple_gaussian_fit(energies, dx=1, sigma_thr=4, allowed_p_val=1e-20): if fit_failed: log.debug(f"Returning values from guess") - mu = guess[1] + mu = guess[0] mu_err = 0 - fwhm = guess[2] * 2 * np.sqrt(2 * np.log(2)) + fwhm = guess[1] * 2 * np.sqrt(2 * np.log(2)) fwhm_err = 0 results = { @@ -366,11 +368,11 @@ def simple_gaussian_guess(hist, bins, func, toll=0.2): n_sig = np.sum(hist[min_idx:max_idx]) - guess = [n_sig, mu, sigma] + guess = [mu, sigma, n_sig] bounds = [ - (n_sig + n_sig * toll, n_sig + n_sig * toll), (mu - sigma, mu + sigma), (sigma - sigma * toll, sigma + sigma * toll), + (n_sig + n_sig * toll, n_sig + n_sig * toll), ] for i, par in enumerate(inspect.getfullargspec(func)[0][1:]): From d211bc07df08a1f9158d46433fc6cc259ab5962a Mon Sep 17 00:00:00 2001 From: valerioda Date: Fri, 17 Nov 2023 15:00:03 +0100 Subject: [PATCH 034/191] minor change --- src/pygama/pargen/noise_optimization.py | 29 +++++++++++++++++-------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/src/pygama/pargen/noise_optimization.py b/src/pygama/pargen/noise_optimization.py index 61c63252a..0003893a3 100644 --- a/src/pygama/pargen/noise_optimization.py +++ b/src/pygama/pargen/noise_optimization.py @@ -66,7 +66,8 @@ def noise_optimization( tb_data = load_data(raw_list, lh5_path, n_events=opt_dict["n_events"]) t1 = time.time() log.info(f"Time to open raw files {t1-t0:.2f} s") - if verbose: print(f"Time to open raw files {t1-t0:.2f} s") + if verbose: + print(f"Time to open raw files {t1-t0:.2f} s") with open(dsp_proc_chain) as r: dsp_proc_chain = json.load(r) @@ -83,7 +84,8 @@ def noise_optimization( ene_pars = [par for par in opt_dict_par.keys()] for ene_par in ene_pars[:1]: log.info(f"\nRunning optimization for {ene_par} filter") - if verbose: print(f"\nRunning optimization for {ene_par} filter") + if verbose: + print(f"\nRunning optimization for {ene_par} filter") wf_par = opt_dict_par[ene_par]["waveform_out"] dict_str = opt_dict_par[ene_par]["dict_str"] filter_par = opt_dict_par[ene_par]["filter_par"] @@ -97,13 +99,15 @@ def noise_optimization( for i, x in enumerate(samples): x = f"{x:.1f}" log.info(f"\nCase {i}, par = {x} us") - if verbose: print(f"\nCase {i}, par = {x} us") + if verbose: + print(f"\nCase {i}, par = {x} us") par_dsp[lh5_path][dict_str][filter_par] = f"{x}*us" t2 = time.time() dsp_data = run_one_dsp(tb_data, dsp_proc_chain, db_dict=par_dsp[lh5_path]) log.info(f"Time to process dsp data {time.time()-t2:.2f} s") - if verbose: print(f"Time to process dsp data {time.time()-t2:.2f} s") + if verbose: + print(f"Time to process dsp data {time.time()-t2:.2f} s") energies = dsp_data[ene_str].nda if opt_dict["perform_fit"]: @@ -128,7 +132,8 @@ def noise_optimization( fom_err_list = np.array(fom_err_list) guess_par = sample_list[np.nanargmin(fom_list)] - if verbose: print(f"guess par: {guess_par:.2f} us") + if verbose: + print(f"guess par: {guess_par:.2f} us") tck = splrep(sample_list, fom_list, k=opt_dict["fit_deg"]) @@ -138,9 +143,13 @@ def spl_func(x_val): result = minimize(spl_func, guess_par) best_par = result.x[0] if (best_par < np.min(sample_list)) or (best_par > np.max(sample_list)): - log.info(f"Par from minimization not accepted {best_par:.2f}, setting par to guess") + log.info( + f"Par from minimization not accepted {best_par:.2f}, setting par to guess" + ) if verbose: - print(f"Par from minimization not accepted {best_par:.2f}, setting par to guess") + print( + f"Par from minimization not accepted {best_par:.2f}, setting par to guess" + ) best_par = guess_par best_val = spl_func(best_par) @@ -181,7 +190,8 @@ def spl_func(x_val): ) ax.plot(bc, hist, ds="steps", label=string_res) log.info(string_res) - if verbose: print(string_res) + if verbose: + print(string_res) ax.set_xlabel("energy (ADC)") ax.set_ylabel("counts") ax.legend(loc="upper right") @@ -225,7 +235,8 @@ def spl_func(x_val): par_dict_plot["optimization"] = fig log.info(f"Time to complete the optimization {time.time()-t0:.2f} s") - if verbose: print(f"Time to complete the optimization {time.time()-t0:.2f} s") + if verbose: + print(f"Time to complete the optimization {time.time()-t0:.2f} s") if display > 0: return res_dict, plot_dict else: From ab48f47b7b0d4b82afdbd4c5e2b9442bc8ff0b6c Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 17 Nov 2023 18:03:48 +0100 Subject: [PATCH 035/191] changes to loading for trimming --- src/pygama/pargen/energy_optimisation.py | 33 ++++++++++------------ src/pygama/pargen/utils.py | 35 ++++++++++++++++-------- 2 files changed, 37 insertions(+), 31 deletions(-) diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index b5d59278b..a6049ae14 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -894,15 +894,15 @@ def get_wf_indexes(sorted_indexs, n_events): return out_list -def index_data(data, indexes): +def index_data(data, indexes, wf_field="waveform"): new_baselines = lh5.Array(data["baseline"].nda[indexes]) - new_waveform_values = data["waveform"]["values"].nda[indexes] - new_waveform_dts = data["waveform"]["dt"].nda[indexes] - new_waveform_t0 = data["waveform"]["t0"].nda[indexes] + new_waveform_values = data[wf_field]["values"].nda[indexes] + new_waveform_dts = data[wf_field]["dt"].nda[indexes] + new_waveform_t0 = data[wf_field]["t0"].nda[indexes] new_waveform = lh5.WaveformTable( None, new_waveform_t0, "ns", new_waveform_dts, "ns", new_waveform_values ) - new_data = lh5.Table(col_dict={"waveform": new_waveform, "baseline": new_baselines}) + new_data = lh5.Table(col_dict={wf_field: new_waveform, "baseline": new_baselines}) return new_data @@ -954,15 +954,15 @@ def event_selection( initial_idxs = np.where(initial_mask)[0] guess_keV = 2620 / np.nanpercentile(rough_energy, 99) - Euc_min = threshold / guess_keV * 0.6 + Euc_min = threshold / guess_keV Euc_max = 2620 / guess_keV * 1.1 - dEuc = 1 # / guess_keV + dEuc = 5 / guess_keV hist, bins, var = pgh.get_hist(rough_energy, range=(Euc_min, Euc_max), dx=dEuc) detected_peaks_locs, detected_peaks_keV, roughpars = pgc.hpge_find_E_peaks( hist, bins, var, - np.array([238.632, 583.191, 727.330, 860.564, 1620.5, 2103.53, 2614.553]), + np.array([238.632, 583.191, 727.330, 860.564, 1592.5, 1620.5, 2103.53, 2614.553]), ) log.debug(f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}") @@ -997,13 +997,9 @@ def event_selection( idx_list = get_wf_indexes(sort_index, idx_list_lens) idxs = np.array(sorted(np.concatenate(masks))) - waveforms = sto.read_object( - f"{lh5_path}/{wf_field}", raw_files, idx=idxs, n_rows=len(idxs) - )[0] - baseline = sto.read_object( - f"{lh5_path}/baseline", raw_files, idx=idxs, n_rows=len(idxs) + input_data = sto.read_object( + f"{lh5_path}", raw_files, idx=idxs, n_rows=len(idxs) )[0] - input_data = lh5.Table(col_dict={f"{wf_field}": waveforms, "baseline": baseline}) if isinstance(dsp_config, str): with open(dsp_config) as r: @@ -1022,6 +1018,7 @@ def event_selection( ct_mask = cts.get_cut_indexes(tb_data, cut_dict) final_events = [] + out_events = [] for peak_idx in peak_idxs: peak = peaks_keV[peak_idx] kev_width = kev_widths[peak_idx] @@ -1070,18 +1067,16 @@ def event_selection( log.info(f"lower lim is :{e_lower_lim}, upper lim is {e_upper_lim}") final_mask = (energy > e_lower_lim) & (energy < e_upper_lim) final_events.append(peak_ids[final_mask][:n_events]) + out_events.append(idxs[final_events[-1]]) log.info(f"{len(peak_ids[final_mask][:n_events])} passed selections for {peak}") if len(peak_ids[final_mask]) < 0.5 * n_events: log.warning("Less than half number of specified events found") elif len(peak_ids[final_mask]) < 0.1 * n_events: log.error("Less than 10% number of specified events found") - + out_events = np.unique(np.array(out_events).flatten()) sort_index = np.argsort(np.concatenate(final_events)) idx_list = get_wf_indexes(sort_index, [len(mask) for mask in final_events]) - idxs = np.array(sorted(np.concatenate(final_events))) - - final_data = index_data(input_data, idxs) - return final_data, idx_list + return out_events, idx_list def fwhm_slope(x, m0, m1, m2): diff --git a/src/pygama/pargen/utils.py b/src/pygama/pargen/utils.py index e6c9f3c75..61f9dd6cf 100644 --- a/src/pygama/pargen/utils.py +++ b/src/pygama/pargen/utils.py @@ -52,7 +52,18 @@ def load_data( sto = lh5.LH5Store() + out_df = pd.DataFrame(columns=params) + + if isinstance(files, dict): + + keys = lh5.ls(files[list(files)[0]][0], lh5_path if lh5_path[-1] == "/" else lh5_path + "/") + keys = [key.split("/")[-1] for key in keys] + if list(files)[0] in cal_dict: + params = get_params(keys + list(cal_dict[list(files)[0]].keys()), params) + else: + params = get_params(keys + list(cal_dict.keys()), params) + df = [] all_files = [] masks = np.array([], dtype=bool) @@ -64,9 +75,11 @@ def load_data( file_df = table.eval(cal_dict).get_dataframe() file_df["run_timestamp"] = np.full(len(file_df), tstamp, dtype=object) params.append("run_timestamp") + for param in params: + if param not in df: + file_df[param] = lh5.load_nda(tfiles, [param], lh5_path)[param] if threshold is not None: - mask = file_df[cal_energy_param] < threshold - + mask = file_df[cal_energy_param] > threshold file_df.drop(np.where(mask)[0], inplace=True) else: mask = np.zeros(len(file_df), dtype=bool) @@ -77,8 +90,16 @@ def load_data( df = pd.concat(df) elif isinstance(files, list): + + keys = lh5.ls(files[0], lh5_path if lh5_path[-1] == "/" else lh5_path + "/") + keys = [key.split("/")[-1] for key in keys] + params = get_params(keys + list(cal_dict.keys()), params) + table = sto.read_object(lh5_path, files)[0] df = table.eval(cal_dict).get_dataframe() + for param in params: + if param not in df: + df[param] = lh5.load_nda(files, [param], lh5_path)[param] if threshold is not None: masks = df[cal_energy_param] > threshold df.drop(np.where(~masks)[0], inplace=True) @@ -86,20 +107,10 @@ def load_data( masks = np.ones(len(df), dtype=bool) all_files = files - if lh5_path[-1] != "/": - lh5_path += "/" - keys = lh5.ls(all_files[0], lh5_path) - keys = [key.split("/")[-1] for key in keys] - params = get_params(keys + list(df.keys()), params) - for col in list(df.keys()): if col not in params: df.drop(col, inplace=True, axis=1) - param_dict = {} - for param in params: - if param not in df: - df[param] = lh5.load_nda(all_files, [param], lh5_path)[param][masks] log.debug(f"data loaded") if return_selection_mask: return df, masks From 25c329e17451d1d34d80f0e22dcac359c7ea80ba Mon Sep 17 00:00:00 2001 From: valerioda Date: Sat, 18 Nov 2023 18:08:24 +0100 Subject: [PATCH 036/191] basic cuts and function for fft calculation --- src/pygama/pargen/noise_optimization.py | 56 +++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/src/pygama/pargen/noise_optimization.py b/src/pygama/pargen/noise_optimization.py index 0003893a3..dad9f4249 100644 --- a/src/pygama/pargen/noise_optimization.py +++ b/src/pygama/pargen/noise_optimization.py @@ -28,7 +28,9 @@ import pygama.math.peak_fitting as pgf from pygama.math.histogram import get_hist +from pygama.pargen.cuts import generate_cuts, get_cut_indexes from pygama.pargen.dsp_optimize import run_one_dsp +from pygama.pargen.energy_optimisation import index_data log = logging.getLogger(__name__) sto = lh5.LH5Store() @@ -65,13 +67,21 @@ def noise_optimization( t0 = time.time() tb_data = load_data(raw_list, lh5_path, n_events=opt_dict["n_events"]) t1 = time.time() - log.info(f"Time to open raw files {t1-t0:.2f} s") + log.info(f"Time to open raw files {t1-t0:.2f} s, n. baselines {len(tb_data)}") if verbose: - print(f"Time to open raw files {t1-t0:.2f} s") + print(f"Time to open raw files {t1-t0:.2f} s, n. baselines {len(tb_data)}") with open(dsp_proc_chain) as r: dsp_proc_chain = json.load(r) + dsp_data = run_one_dsp(tb_data, dsp_proc_chain) + cut_dict = generate_cuts(dsp_data, parameters=opt_dict["cut_pars"]) + idxs = get_cut_indexes(dsp_data, cut_dict) + tb_data = index_data(tb_data, idxs) + log.info(f"... {len(tb_data)} baselines after cuts") + if verbose: + print(f"... {len(tb_data)} baselines after cuts") + samples = np.arange(opt_dict["start"], opt_dict["stop"], opt_dict["step"]) samples_val = np.arange(opt_dict["start"], opt_dict["stop"], opt_dict["step_val"]) @@ -79,10 +89,23 @@ def noise_optimization( res_dict = {} if display > 0: + freq, pow_spectrum = calculate_fft(tb_data) + fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white") + ax.plot(freq, pow_spectrum) + ax.set_xscale("log") + ax.set_yscale("log") + ax.set_xlabel("frequency (MHz)", ha="right", x=1) + ax.set_ylabel(f"power spectral density", ha="right", y=1) + plot_dict = {} plot_dict["nopt"] = {} + plot_dict["nopt"]["fft"] = {} + plot_dict["nopt"]["fft"]["frequency"] = freq + plot_dict["nopt"]["fft"]["pow_spectrum"] = pow_spectrum + plot_dict["nopt"]["fft"]["fig"] = fig + ene_pars = [par for par in opt_dict_par.keys()] - for ene_par in ene_pars[:1]: + for ene_par in ene_pars: log.info(f"\nRunning optimization for {ene_par} filter") if verbose: print(f"\nRunning optimization for {ene_par} filter") @@ -265,8 +288,13 @@ def load_data( daqenergy = sto.read_object( f"{lh5_path}/raw/daqenergy", raw_list, n_rows=n_events, idx=idxs )[0] + baseline = sto.read_object( + f"{lh5_path}/raw/baseline", raw_list, n_rows=n_events, idx=idxs + )[0] - tb_data = lh5.Table(col_dict={"waveform": waveforms, "daqenergy": daqenergy}) + tb_data = lh5.Table( + col_dict={"waveform": waveforms, "daqenergy": daqenergy, "baseline": baseline} + ) return tb_data @@ -394,3 +422,23 @@ def simple_gaussian_guess(hist, bins, func, toll=0.2): guess.append(0) bounds.append(None) return guess, bounds + + +def calculate_fft(tb_data, cut=1): + bls = tb_data["waveform"].values.nda + nev, size = bls.shape + + sample_time_us = float(tb_data["waveform"].dt.nda[0]) / 1000 + sampling_rate = 1 / sample_time_us + fft_size = size // 2 + 1 + + frequency = np.linspace(0, sampling_rate / 2, fft_size) + power_spectrum = np.zeros(fft_size, dtype=np.float64) + + for bl in bls: + fft = np.fft.rfft(bl) + abs_fft = np.abs(fft) + power_spectrum += np.square(abs_fft) + power_spectrum /= nev + + return frequency[cut:], power_spectrum[cut:] From 2807bb555a75f940655b43a5c0e1f03f32bbfff1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 20 Nov 2023 10:49:02 +0000 Subject: [PATCH 037/191] style: pre-commit fixes --- src/pygama/pargen/energy_optimisation.py | 12 +++++++----- src/pygama/pargen/utils.py | 8 ++++---- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index a6049ae14..24703fda2 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -956,13 +956,15 @@ def event_selection( guess_keV = 2620 / np.nanpercentile(rough_energy, 99) Euc_min = threshold / guess_keV Euc_max = 2620 / guess_keV * 1.1 - dEuc = 5 / guess_keV + dEuc = 5 / guess_keV hist, bins, var = pgh.get_hist(rough_energy, range=(Euc_min, Euc_max), dx=dEuc) detected_peaks_locs, detected_peaks_keV, roughpars = pgc.hpge_find_E_peaks( hist, bins, var, - np.array([238.632, 583.191, 727.330, 860.564, 1592.5, 1620.5, 2103.53, 2614.553]), + np.array( + [238.632, 583.191, 727.330, 860.564, 1592.5, 1620.5, 2103.53, 2614.553] + ), ) log.debug(f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}") @@ -997,9 +999,9 @@ def event_selection( idx_list = get_wf_indexes(sort_index, idx_list_lens) idxs = np.array(sorted(np.concatenate(masks))) - input_data = sto.read_object( - f"{lh5_path}", raw_files, idx=idxs, n_rows=len(idxs) - )[0] + input_data = sto.read_object(f"{lh5_path}", raw_files, idx=idxs, n_rows=len(idxs))[ + 0 + ] if isinstance(dsp_config, str): with open(dsp_config) as r: diff --git a/src/pygama/pargen/utils.py b/src/pygama/pargen/utils.py index 61f9dd6cf..65a35ee7d 100644 --- a/src/pygama/pargen/utils.py +++ b/src/pygama/pargen/utils.py @@ -54,10 +54,11 @@ def load_data( out_df = pd.DataFrame(columns=params) - if isinstance(files, dict): - - keys = lh5.ls(files[list(files)[0]][0], lh5_path if lh5_path[-1] == "/" else lh5_path + "/") + keys = lh5.ls( + files[list(files)[0]][0], + lh5_path if lh5_path[-1] == "/" else lh5_path + "/", + ) keys = [key.split("/")[-1] for key in keys] if list(files)[0] in cal_dict: params = get_params(keys + list(cal_dict[list(files)[0]].keys()), params) @@ -90,7 +91,6 @@ def load_data( df = pd.concat(df) elif isinstance(files, list): - keys = lh5.ls(files[0], lh5_path if lh5_path[-1] == "/" else lh5_path + "/") keys = [key.split("/")[-1] for key in keys] params = get_params(keys + list(cal_dict.keys()), params) From f49ec8ccea71964011af67b073da3a0bf0081493 Mon Sep 17 00:00:00 2001 From: valerioda Date: Mon, 27 Nov 2023 22:06:54 +0100 Subject: [PATCH 038/191] using fft processor to calculate power spectral density of baselines by calling new processing chain --- src/pygama/pargen/noise_optimization.py | 143 ++++++++++++------------ 1 file changed, 71 insertions(+), 72 deletions(-) diff --git a/src/pygama/pargen/noise_optimization.py b/src/pygama/pargen/noise_optimization.py index dad9f4249..12695c357 100644 --- a/src/pygama/pargen/noise_optimization.py +++ b/src/pygama/pargen/noise_optimization.py @@ -65,15 +65,17 @@ def noise_optimization( """ t0 = time.time() - tb_data = load_data(raw_list, lh5_path, n_events=opt_dict["n_events"]) + tb_data = load_data( + raw_list, + lh5_path, + n_events=opt_dict["n_events"], + wf_field=opt_dict["wf_field"], + ) t1 = time.time() log.info(f"Time to open raw files {t1-t0:.2f} s, n. baselines {len(tb_data)}") if verbose: print(f"Time to open raw files {t1-t0:.2f} s, n. baselines {len(tb_data)}") - with open(dsp_proc_chain) as r: - dsp_proc_chain = json.load(r) - dsp_data = run_one_dsp(tb_data, dsp_proc_chain) cut_dict = generate_cuts(dsp_data, parameters=opt_dict["cut_pars"]) idxs = get_cut_indexes(dsp_data, cut_dict) @@ -89,48 +91,53 @@ def noise_optimization( res_dict = {} if display > 0: - freq, pow_spectrum = calculate_fft(tb_data) + # fft + dsp_data = run_one_dsp(tb_data, dsp_proc_chain, db_dict=par_dsp[lh5_path]) + psd = np.mean(dsp_data["wf_psd"].values.nda, axis=0) + sample_us = float(tb_data[opt_dict["wf_field"]].dt.nda[0]) / 1000 + freq = np.linspace(0, (1 / sample_us) / 2, len(psd)) fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white") - ax.plot(freq, pow_spectrum) + ax.plot(freq, psd) ax.set_xscale("log") ax.set_yscale("log") - ax.set_xlabel("frequency (MHz)", ha="right", x=1) - ax.set_ylabel(f"power spectral density", ha="right", y=1) + ax.set_xlabel("frequency (MHz)") + ax.set_ylabel(f"power spectral density") plot_dict = {} plot_dict["nopt"] = {} plot_dict["nopt"]["fft"] = {} plot_dict["nopt"]["fft"]["frequency"] = freq - plot_dict["nopt"]["fft"]["pow_spectrum"] = pow_spectrum + plot_dict["nopt"]["fft"]["psd"] = psd plot_dict["nopt"]["fft"]["fig"] = fig + result_dict = {} ene_pars = [par for par in opt_dict_par.keys()] - for ene_par in ene_pars: - log.info(f"\nRunning optimization for {ene_par} filter") + log.info(f"\nRunning optimization for {ene_pars}") + if verbose: + print(f"\nRunning optimization for {ene_pars}") + for i, x in enumerate(samples): + x = f"{x:.1f}" + log.info(f"\nCase {i}, par = {x} us") if verbose: - print(f"\nRunning optimization for {ene_par} filter") - wf_par = opt_dict_par[ene_par]["waveform_out"] - dict_str = opt_dict_par[ene_par]["dict_str"] - filter_par = opt_dict_par[ene_par]["filter_par"] - ene_str = opt_dict_par[ene_par]["ene_str"] - if display > 0: - plot_dict["nopt"][dict_str] = {} - par_dict_plot = plot_dict["nopt"][dict_str] - - dsp_proc_chain["outputs"] = [ene_str] - sample_list, fom_list, fom_err_list = [], [], [] - for i, x in enumerate(samples): - x = f"{x:.1f}" - log.info(f"\nCase {i}, par = {x} us") - if verbose: - print(f"\nCase {i}, par = {x} us") + print(f"\nCase {i}, par = {x} us") + for ene_par in ene_pars: + dict_str = opt_dict_par[ene_par]["dict_str"] + filter_par = opt_dict_par[ene_par]["filter_par"] par_dsp[lh5_path][dict_str][filter_par] = f"{x}*us" - t2 = time.time() - dsp_data = run_one_dsp(tb_data, dsp_proc_chain, db_dict=par_dsp[lh5_path]) - log.info(f"Time to process dsp data {time.time()-t2:.2f} s") - if verbose: - print(f"Time to process dsp data {time.time()-t2:.2f} s") + t2 = time.time() + dsp_data = run_one_dsp(tb_data, dsp_proc_chain, db_dict=par_dsp[lh5_path]) + log.info(f"Time to process dsp data {time.time()-t2:.2f} s") + if verbose: + print(f"Time to process dsp data {time.time()-t2:.2f} s") + + for ene_par in ene_pars: + dict_str = opt_dict_par[ene_par]["dict_str"] + ene_str = opt_dict_par[ene_par]["ene_str"] + if dict_str not in result_dict: + result_dict[dict_str] = {} + par_dict_res = result_dict[dict_str] + energies = dsp_data[ene_str].nda if opt_dict["perform_fit"]: @@ -142,17 +149,28 @@ def noise_optimization( opt_dict["percentile_high"], opt_dict["n_bootstrap_samples"], ) - sample_list.append(float(x)) - fom_list.append(fom_results["fom"]) - fom_err_list.append(fom_results["fom_err"]) - if display > 0: - par_dict_plot[x] = {} - par_dict_plot[x]["energies"] = energies - par_dict_plot[x]["fom"] = fom_results["fom"] - par_dict_plot[x]["fom_err"] = fom_results["fom_err"] - sample_list = np.array(sample_list) - fom_list = np.array(fom_list) - fom_err_list = np.array(fom_err_list) + + par_dict_res[x] = {} + par_dict_res[x]["energies"] = energies + par_dict_res[x]["fom"] = fom_results["fom"] + par_dict_res[x]["fom_err"] = fom_results["fom_err"] + + for ene_par in ene_pars: + log.info(f"\nOptimization for {ene_par}") + if verbose: + print(f"\nOptimization for {ene_par}") + dict_str = opt_dict_par[ene_par]["dict_str"] + par_dict_res = result_dict[dict_str] + sample_list = np.array([float(x) for x in result_dict[dict_str].keys()]) + fom_list = np.array( + [result_dict[dict_str][x]["fom"] for x in result_dict[dict_str].keys()] + ) + fom_err_list = np.array( + [result_dict[dict_str][x]["fom_err"] for x in result_dict[dict_str].keys()] + ) + + print(ene_par, sample_list) + print(ene_par, fom_list) guess_par = sample_list[np.nanargmin(fom_list)] if verbose: @@ -188,9 +206,9 @@ def spl_func(x_val): if verbose: print(f"best par: {best_par:.2f} ± {best_par_err:.2f} us") - par_dict_plot["best_par"] = best_par - par_dict_plot["best_par_err"] = best_par_err - par_dict_plot["best_val"] = best_val + par_dict_res["best_par"] = best_par + par_dict_res["best_par_err"] = best_par_err + par_dict_res["best_val"] = best_val res_dict[dict_str] = { filter_par: f"{best_par:.2f}*us", @@ -202,8 +220,8 @@ def spl_func(x_val): fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white") for i, x in enumerate(sample_list): x = f"{x:.1f}" - energies = par_dict_plot[x]["energies"] - par_dict_plot[x].pop("energies") + energies = par_dict_res[x]["energies"] + par_dict_res[x].pop("energies") hist, bins, var = get_hist( energies, range=plot_range, dx=opt_dict["dx"] ) @@ -218,7 +236,7 @@ def spl_func(x_val): ax.set_xlabel("energy (ADC)") ax.set_ylabel("counts") ax.legend(loc="upper right") - par_dict_plot["distribution"] = fig + par_dict_res["distribution"] = fig if display > 1: plt.show() else: @@ -255,7 +273,8 @@ def spl_func(x_val): plt.show() else: plt.close() - par_dict_plot["optimization"] = fig + par_dict_res["optimization"] = fig + plot_dict["nopt"][dict_str] = par_dict_res log.info(f"Time to complete the optimization {time.time()-t0:.2f} s") if verbose: @@ -272,6 +291,7 @@ def load_data( bls: bool = True, n_events: int = 10000, threshold: int = 200, + wf_field="waveform", ) -> lgdo.Table: sto = lh5.LH5Store() @@ -283,7 +303,7 @@ def load_data( idxs = np.where(energies.nda > threshold)[0] waveforms = sto.read_object( - f"{lh5_path}/raw/waveform", raw_list, n_rows=n_events, idx=idxs + f"{lh5_path}/raw/{wf_field}", raw_list, n_rows=n_events, idx=idxs )[0] daqenergy = sto.read_object( f"{lh5_path}/raw/daqenergy", raw_list, n_rows=n_events, idx=idxs @@ -295,7 +315,6 @@ def load_data( tb_data = lh5.Table( col_dict={"waveform": waveforms, "daqenergy": daqenergy, "baseline": baseline} ) - return tb_data @@ -422,23 +441,3 @@ def simple_gaussian_guess(hist, bins, func, toll=0.2): guess.append(0) bounds.append(None) return guess, bounds - - -def calculate_fft(tb_data, cut=1): - bls = tb_data["waveform"].values.nda - nev, size = bls.shape - - sample_time_us = float(tb_data["waveform"].dt.nda[0]) / 1000 - sampling_rate = 1 / sample_time_us - fft_size = size // 2 + 1 - - frequency = np.linspace(0, sampling_rate / 2, fft_size) - power_spectrum = np.zeros(fft_size, dtype=np.float64) - - for bl in bls: - fft = np.fft.rfft(bl) - abs_fft = np.abs(fft) - power_spectrum += np.square(abs_fft) - power_spectrum /= nev - - return frequency[cut:], power_spectrum[cut:] From d2ad7d2a7fdb85f369d56d4783bfcac354f7974c Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 28 Nov 2023 13:01:23 +0100 Subject: [PATCH 039/191] routine to create dplms dictionary for Ge processing --- src/pygama/pargen/dplms_ge_dict.py | 726 +++++++++++++++++++++++ src/pygama/pargen/energy_optimisation.py | 55 +- 2 files changed, 754 insertions(+), 27 deletions(-) create mode 100644 src/pygama/pargen/dplms_ge_dict.py diff --git a/src/pygama/pargen/dplms_ge_dict.py b/src/pygama/pargen/dplms_ge_dict.py new file mode 100644 index 000000000..f6d0902d9 --- /dev/null +++ b/src/pygama/pargen/dplms_ge_dict.py @@ -0,0 +1,726 @@ +""" +This module is for creating dplms dictionary for ge processing +""" + +from __future__ import annotations + +import itertools +import json +import logging +import os +import pathlib +import pickle +import time +from collections import OrderedDict + +import lgdo +import lgdo.lh5_store as lh5 +import matplotlib.pyplot as plt +import numpy as np +from lgdo import Array +from scipy.signal import convolve, convolve2d + +from pygama.math.histogram import get_hist +from pygama.math.peak_fitting import ( + extended_gauss_step_pdf, + extended_radford_pdf, + gauss_step_pdf, + radford_pdf, +) +from pygama.pargen.cuts import find_pulser_properties, generate_cuts, get_cut_indexes +from pygama.pargen.dsp_optimize import run_one_dsp +from pygama.pargen.energy_cal import hpge_find_E_peaks +from pygama.pargen.energy_optimisation import ( + event_selection, + fom_FWHM, + fom_FWHM_with_dt_corr_fit, + index_data, +) +from pygama.pargen.noise_optimization import calculate_spread + +log = logging.getLogger(__name__) +sto = lh5.LH5Store() + + +def dplms_ge_dict( + lh5_path: str, + fft_files: list[str], + cal_files: list[str], + dsp_config: dict, + par_dsp: dict, + par_dsp_lh5: str, + dplms_dict: dict, + decay_const: float = 0, + ene_par: str = "dplmsEmax", + display: int = 0, +) -> dict: + """ + This function calculates the dplms dictionary for HPGe detectors. + + Parameters + ---------- + lh5_path: str + Name of channel to process, should be name of lh5 group in raw files + fft_files : list[str] + raw files with fft data + cal_files : list[str] + raw files with cal data + dsp_config: dict + dsp config file + par_dsp: dict + Dictionary with db parameters for dsp processing + par_dsp_lh5: str + Path for saving dplms coefficients + dplms_dict: dict + Dictionary with various parameters + + Returns + ------- + out_dict : dict + """ + + t0 = time.time() + log.info(f"\nSelecting baselines") + raw_bls = load_data( + fft_files, + lh5_path, + "bls", + n_events=dplms_dict["n_baselines"], + raw_wf_field=dplms_dict["raw_wf_field"] + ) + + dsp_bls = run_one_dsp(raw_bls, dsp_config, db_dict=par_dsp[lh5_path]) + cut_dict = generate_cuts(dsp_bls, parameters=dplms_dict["bls_cut_pars"]) + idxs = get_cut_indexes(dsp_bls, cut_dict) + bl_field = dplms_dict["bl_field"] + log.info(f"... {len(dsp_bls[bl_field].values.nda[idxs,:])} baselines after cuts") + + bls = dsp_bls[bl_field].values.nda[idxs, : dplms_dict["bsize"]] + bls_par = {} + bls_cut_pars = [par for par in dplms_dict["bls_cut_pars"].keys()] + for par in bls_cut_pars: + bls_par[par] = dsp_bls[par].nda + t1 = time.time() + log.info( + f"total events {len(raw_bls)}, {len(bls)} baseline selected in {(t1-t0):.2f} s" + ) + + log.info( + "\nCalculating noise matrix of length", + dplms_dict["length"], + "n. events", + bls.shape[0], + "size", + bls.shape[1], + ) + nmat = noise_matrix(bls, dplms_dict["length"]) + t2 = time.time() + log.info(f"Time to calculate noise matrix {(t2-t1):.2f} s") + + log.info("\nSelecting signals") + peaks_keV = np.array(dplms_dict["peaks_keV"]) + wsize = dplms_dict["wsize"] + wf_field = dplms_dict["wf_field"] + kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]] + + raw_cal, idx_list = event_selection( + cal_files, + f"{lh5_path}/raw", + dsp_config, + par_dsp[lh5_path], + peaks_keV, + np.arange(0, len(peaks_keV), 1).tolist(), + kev_widths, + cut_parameters=dplms_dict["wfs_cut_pars"], + n_events=dplms_dict["n_signals"], + ) + t3 = time.time() + log.info(f"Time to run event selection {(t3-t2):.2f} s, total events {len(raw_cal)}") + + raw_cal = index_data(raw_cal, idx_list[-1]) + log.info(f"Produce dsp data for {len(raw_cal)} events") + dsp_cal = run_one_dsp(raw_cal, dsp_config, db_dict=par_dsp[lh5_path]) + t4 = time.time() + log.info(f"Time to run dsp production {(t4-t3):.2f} s") + + # minimal processing chain + with open(dsp_config) as r: + dsp_config = json.load(r) + dsp_config["outputs"] = [ene_par, "dt_eff"] + + # dictionary for peak fitting + peak_dict = { + "peak": peaks_keV[-1], + "kev_width": kev_widths[-1], + "parameter": ene_par, + "func": extended_gauss_step_pdf, + "gof_func": gauss_step_pdf, + } + + if display > 0: + plot_dict = {} + plot_dict["dplms"] = {} + fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white") + + # penalized coefficients + dp_coeffs = dplms_dict["dp_coeffs"] + if lh5_path in dplms_dict["noisy_bl"]: + log.info("Setting explicit zero area condition") + za_coeff = dp_coeffs["za"] + else: + za_coeff = dplms_dict["dp_def"]["za"] + dp_coeffs.pop("za") + coeff_keys = [key for key in dp_coeffs.keys()] + lists = [dp_coeffs[key] for key in dp_coeffs.keys()] + + prod = list(itertools.product(*lists)) + grid_dict = {} + min_fom = float("inf") + min_idx = None + + for i, values in enumerate(prod): + coeff_values = dict(zip(coeff_keys, values)) + + log.info( + "\nCase", + i, + "->", + ", ".join(f"{key} = {value}" for key, value in coeff_values.items()), + ) + grid_dict[i] = coeff_values + + sel_dict = signal_selection(dsp_cal, dplms_dict, coeff_values) + wfs = dsp_cal[wf_field].nda[sel_dict["idxs"], :] + log.info(f"... {len(wfs)} signals after signal selection") + + ref, rmat, pmat, fmat = signal_matrices(wfs, dplms_dict["length"], decay_const) + + t_tmp = time.time() + nm_coeff = coeff_values["nm"] + ft_coeff = coeff_values["ft"] + x, y, refy = filter_synthesis( + ref, + nm_coeff * nmat, + rmat, + za_coeff, + pmat, + ft_coeff * fmat, + dplms_dict["length"], + wsize, + ) + par_dsp[lh5_path]["dplms"] = {} + par_dsp[lh5_path]["dplms"]["length"] = dplms_dict["length"] + par_dsp[lh5_path]["dplms"]["coefficients"] = x.tolist() + log.info( + f"Filter synthesis in {time.time()-t_tmp:.1f} s, filter area", np.sum(x) + ) + + t_tmp = time.time() + dsp_opt = run_one_dsp(raw_bls, dsp_config, db_dict=par_dsp[lh5_path]) + energies = dsp_opt[ene_par].nda + enc_results = calculate_spread(energies, 10, 90, 1000) + enc, enc_err = enc_results["fom"], enc_results["fom_err"] + log.info( + f"ENC: mean = {energies.mean():.2f} ADC, FOM = {enc:.2f} ± {enc_err:.2f} ADC, evaluated in {time.time()-t_tmp:.1f} s" + ) + grid_dict[i]["enc"] = enc + grid_dict[i]["enc_err"] = enc_err + + if display > 0: + hist, bins, var = get_hist(energies, range=(-20, 20), dx=0.1) + bc = (bins[:-1] + bins[1:]) / 2.0 + ax.plot( + bc, + hist, + ds="steps", + label=f"{ene_par} - ENC = {enc:.3f} ± {enc_err:.3f} ADC", + ) + ax.set_xlabel("energy (ADC)") + ax.set_ylabel("counts") + ax.legend(loc="upper right") + + t_tmp = time.time() + dsp_opt = run_one_dsp(raw_cal, dsp_config, db_dict=par_dsp[lh5_path]) + + try: + res = fom_FWHM_with_dt_corr_fit( + dsp_opt, + peak_dict, + "QDrift", + idxs=np.where(~np.isnan(dsp_opt["dt_eff"].nda))[0], + ) + except: + log.debug("FWHM not calculated") + continue + + fwhm, fwhm_err, alpha, chisquare = ( + res["fwhm"], + res["fwhm_err"], + res["alpha"], + res["chisquare"], + ) + log.info(f"FWHM = {fwhm:.2f} ± {fwhm_err:.2f} keV, evaluated in {time.time()-t_tmp:.1f} s") + + grid_dict[i]["fwhm"] = fwhm + grid_dict[i]["fwhm_err"] = fwhm_err + grid_dict[i]["alpha"] = alpha + + if ( + fwhm < dplms_dict["fwhm_limit"] + and fwhm_err < dplms_dict["err_limit"] + and chisquare < dplms_dict["chi_limit"] + ): + if fwhm < min_fom: + min_idx, min_fom = i, fwhm + + if min_idx is not None: + min_result = grid_dict[min_idx] + best_case_values = {key: min_result[key] for key in min_result.keys()} + + enc = best_case_values.get("enc", None) + enc_err = best_case_values.get("enc_err", 0) + fwhm = best_case_values.get("fwhm", None) + fwhm_err = best_case_values.get("fwhm_err", 0) + alpha = best_case_values.get("alpha", 0) + nm_coeff = best_case_values.get("nm", dplms_dict["dp_def"]["nm"]) + ft_coeff = best_case_values.get("ft", dplms_dict["dp_def"]["nm"]) + rt_coeff = best_case_values.get("rt", dplms_dict["dp_def"]["rt"]) + pt_coeff = best_case_values.get("pt", dplms_dict["dp_def"]["pt"]) + + if all( + v is not None + for v in [ + enc, + enc_err, + fwhm, + fwhm_err, + alpha, + nm_coeff, + ft_coeff, + rt_coeff, + pt_coeff, + ] + ): + log.info(f"\nBest case: FWHM = {fwhm:.2f} ± {fwhm_err:.2f} keV, ctc {alpha}") + else: + log.error("Some values are missing in the best case results") + else: + log.error("Filter synthesis failed") + nm_coeff = dplms_dict["dp_def"]["nm"] + ft_coeff = dplms_dict["dp_def"]["ft"] + rt_coeff = dplms_dict["dp_def"]["rt"] + pt_coeff = dplms_dict["dp_def"]["pt"] + + # filter synthesis + sel_dict = signal_selection(dsp_cal, dplms_dict, best_case_values) + idxs = sel_dict["idxs"] + wfs = dsp_cal[wf_field].nda[idxs, :] + ref, rmat, pmat, fmat = signal_matrices(wfs, dplms_dict["length"], decay_const) + + x, y, refy = filter_synthesis( + ref, + nm_coeff * nmat, + rmat, + za_coeff, + pmat, + ft_coeff * fmat, + dplms_dict["length"], + wsize, + ) + + sto.write_object( + Array(x), + name="dplms", + lh5_file=par_dsp_lh5, + wo_mode="overwrite", + group=lh5_path, + ) + + out_dict = { + "dplms": { + "length": dplms_dict["length"], + "coefficients": f"loadlh5('{par_dsp_lh5}', '{lh5_path}/dplms')", + "dp_coeffs": { + "nm": nm_coeff, + "za": za_coeff, + "ft": ft_coeff, + "rt": rt_coeff, + "pt": pt_coeff, + }, + } + } + out_alpha_dict = { + f"{ene_par}_ctc": { + "expression": f"{ene_par}*(1+dt_eff*a)", + "parameters": {"a": round(alpha, 9)}, + } + } + out_dict.update({"ctc_params": out_alpha_dict}) + + log.info(f"Time to complete DPLMS filter synthesis {time.time()-t0:.1f}") + + if display > 0: + plot_dict["dplms"]["enc_hist"] = fig + plot_dict["dplms"]["enc"] = enc + plot_dict["dplms"]["enc_err"] = enc_err + plot_dict["dplms"]["ref"] = ref + plot_dict["dplms"]["coefficients"] = x + + bl_idxs = np.random.choice(len(bls), dplms_dict["n_plot"]) + bls = bls[bl_idxs] + fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white") + for ii, wf in enumerate(bls): + if ii < 10: + ax.plot(wf, label=f"mean = {wf.mean():.1f}") + else: + ax.plot(wf) + ax.legend(title=f"{lh5_path}", loc="upper right") + plot_dict["dplms"]["bls"] = fig + fig, ax = plt.subplots(nrows=2, ncols=3, figsize=(16, 9), facecolor="white") + for ii, par in enumerate(bls_cut_pars): + mean = cut_dict[par]["Mean Value"] + llo, lup = cut_dict[par]["Lower Boundary"], cut_dict[par]["Upper Boundary"] + plo, pup = mean - 2 * (mean - llo), mean + 2 * (lup - mean) + hh, bb = np.histogram(bls_par[par], bins=np.linspace(plo, pup, 200)) + ax.flat[ii].plot(bb[1:], hh, ds="steps", label=f"cut on {par}") + ax.flat[ii].axvline(lup, color="k", linestyle=":", label="selection") + ax.flat[ii].axvline(llo, color="k", linestyle=":") + ax.flat[ii].set_xlabel(par) + ax.flat[ii].set_yscale("log") + ax.flat[ii].legend(title=f"{lh5_path}", loc="upper right") + plot_dict["dplms"]["bl_sel"] = fig + + wf_idxs = np.random.choice(len(wfs), dplms_dict["n_plot"]) + wfs = wfs[wf_idxs] + peak_pos = dsp_cal["peak_pos"].nda + peak_pos_neg = dsp_cal["peak_pos_neg"].nda + centroid = dsp_cal["centroid"].nda + risetime = dsp_cal["tp_90"].nda - dsp_cal["tp_10"].nda + rt_low = dplms_dict["rt_low"] + rt_high = dplms_dict["rt_high"] + peak_lim = dplms_dict["peak_lim"] + cal_par = {} + wfs_cut_pars = [par for par in dplms_dict["wfs_cut_pars"].keys()] + for par in wfs_cut_pars: + cal_par[par] = dsp_cal[par].nda + fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white") + for ii, wf in enumerate(wfs): + if ii < 10: + ax.plot(wf, label=f"centr = {centroid[ii]}") + else: + ax.plot(wf) + ax.legend(title=f"{lh5_path}", loc="upper right") + axin = ax.inset_axes([0.1, 0.15, 0.35, 0.5]) + for wf in wfs: + axin.plot(wf) + axin.set_xlim(wsize / 2 - dplms_dict["zoom"], wsize / 2 + dplms_dict["zoom"]) + axin.set_yticklabels("") + plot_dict["dplms"]["wfs"] = fig + fig, ax = plt.subplots(nrows=2, ncols=3, figsize=(16, 9), facecolor="white") + wfs_cut_pars.append("centroid") + wfs_cut_pars.append("peak_pos") + wfs_cut_pars.append("risetime") + for ii, par in enumerate(wfs_cut_pars): + pspace = np.linspace( + wsize / 2 - peak_lim, wsize / 2 + peak_lim, 2 * peak_lim + ) + if par == "centroid": + llo, lup = sel_dict["ct_ll"], sel_dict["ct_hh"] + hh, bb = np.histogram(centroid, bins=pspace) + elif par == "peak_pos": + llo, lup = sel_dict["pp_ll"], sel_dict["pp_hh"] + hh, bb = np.histogram(peak_pos, bins=pspace) + elif par == "risetime": + llo, lup = sel_dict["rt_ll"], sel_dict["rt_hh"] + rt_bins = int((rt_high - rt_low) / dplms_dict["period"]) + rt_space = np.linspace(rt_low, rt_high, rt_bins) + hh, bb = np.histogram(risetime, bins=rt_space) + else: + llo, lup = np.min(cal_par[par]), np.max(cal_par[par]) + hh, bb = np.histogram(cal_par[par], bins=np.linspace(llo, lup, 200)) + ax.flat[ii + 1].plot(bb[1:], hh, ds="steps", label=f"cut on {par}") + ax.flat[ii + 1].axvline( + llo, color="k", linestyle=":", label=f"sel. {llo:.1f} {lup:.1f}" + ) + if par != "centroid": + ax.flat[ii + 1].axvline(lup, color="k", linestyle=":") + ax.flat[ii + 1].set_xlabel(par) + ax.flat[ii + 1].set_yscale("log") + ax.flat[ii + 1].legend(title=f"{lh5_path}", loc="upper right") + roughenergy = dsp_cal["trapTmax"].nda + roughenergy_sel = roughenergy[idxs] + ell, ehh = roughenergy.min(), roughenergy.max() + he, be = np.histogram(roughenergy, bins=np.linspace(ell, ehh, 1000)) + hs, be = np.histogram(roughenergy_sel, bins=np.linspace(ell, ehh, 1000)) + ax.flat[0].plot(be[1:], he, c="b", ds="steps", label="initial") + ax.flat[0].plot(be[1:], hs, c="r", ds="steps", label="selected") + ax.flat[0].set_xlabel("rough energy (ADC)") + ax.flat[0].set_yscale("log") + ax.flat[0].legend(loc="upper right", title=f"{lh5_path}") + plot_dict["dplms"]["wf_sel"] = fig + + fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white") + ax.plot(np.flip(x), "r-", label=f"filter") + ax.axhline(0, color="black", linestyle=":") + ax.legend(loc="upper right", title=f"{lh5_path}") + axin = ax.inset_axes([0.6, 0.1, 0.35, 0.33]) + axin.plot(np.flip(x), "r-") + axin.set_xlim( + dplms_dict["length"] / 2 - dplms_dict["zoom"], + dplms_dict["length"] / 2 + dplms_dict["zoom"], + ) + axin.set_yticklabels("") + ax.indicate_inset_zoom(axin) + + return out_dict, plot_dict + else: + return out_dict + + +def load_data( + raw_file: list[str], + lh5_path: str, + sel_type: str, + peaks: np.array = [], + n_events: int = 5000, + e_lower_lim: float = 1200, + e_upper_lim: float = 2700, + raw_wf_field: str = "waveform", +) -> lgdo.Table: + sto = lh5.LH5Store() + df = lh5.load_dfs(raw_file, ["daqenergy", "timestamp"], f"{lh5_path}/raw") + + if sel_type == "bls": + cuts = np.where(df.daqenergy.values == 0)[0] + idx_list = [] + waveforms = sto.read_object( + f"{lh5_path}/raw/{raw_wf_field}", raw_file, n_rows=n_events, idx=cuts + )[0] + daqenergy = sto.read_object( + f"{lh5_path}/raw/daqenergy", raw_file, n_rows=n_events, idx=cuts + )[0] + tb_data = lh5.Table(col_dict={"waveform": waveforms, "daqenergy": daqenergy}) + return tb_data + else: + pulser_props = find_pulser_properties(df, energy="daqenergy") + if len(pulser_props) > 0: + final_mask = None + for entry in pulser_props: + pulser_e, pulser_err = entry[0], entry[1] + if pulser_err < 10: + pulser_err = 10 + e_cut = (df.daqenergy.values < pulser_e + pulser_err) & ( + df.daqenergy.values > pulser_e - pulser_err + ) + if final_mask is None: + final_mask = e_cut + else: + final_mask = final_mask | e_cut + ids = final_mask + log.debug(f"pulser found: {pulser_props}") + else: + log.debug("no pulser") + ids = np.zeros(len(df.daqenergy.values), dtype=bool) + if sel_type == "pul": + cuts = np.where(ids == True)[0] + log.debug(f"{len(cuts)} events found for pulser") + waveforms = sto.read_object( + f"{lh5_path}/raw/waveform", raw_file, n_rows=n_events, idx=cuts + )[0] + daqenergy = sto.read_object( + f"{lh5_path}/raw/daqenergy", raw_file, n_rows=n_events, idx=cuts + )[0] + tb_data = lh5.Table( + col_dict={"waveform": waveforms, "daqenergy": daqenergy} + ) + return tb_data + else: + # Get events around peak using raw file values + initial_mask = (df.daqenergy.values > 0) & (~ids) + rough_energy = df.daqenergy.values[initial_mask] + initial_idxs = np.where(initial_mask)[0] + + guess_keV = 2620 / np.nanpercentile(rough_energy, 99) + Euc_min = 0 # threshold / guess_keV * 0.6 + Euc_max = 2620 / guess_keV * 1.1 + dEuc = 1 # / guess_keV + hist, bins, var = get_hist(rough_energy, range=(Euc_min, Euc_max), dx=dEuc) + detected_peaks_locs, detected_peaks_keV, roughpars = hpge_find_E_peaks( + hist, bins, var, peaks + ) + log.debug( + f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}" + ) + e_lower_lim = (e_lower_lim - roughpars[1]) / roughpars[0] + e_upper_lim = (e_upper_lim - roughpars[1]) / roughpars[0] + log.debug(f"lower_lim: {e_lower_lim}, upper_lim: {e_upper_lim}") + mask = (rough_energy > e_lower_lim) & (rough_energy < e_upper_lim) + cuts = initial_idxs[mask][:] + log.debug(f"{len(cuts)} events found in energy range") + rough_energy = rough_energy[mask] + rough_energy = rough_energy[:n_events] + rough_energy = rough_energy * roughpars[0] + roughpars[1] + waveforms = sto.read_object( + f"{lh5_path}/raw/waveform", raw_file, n_rows=n_events, idx=cuts + )[0] + daqenergy = sto.read_object( + f"{lh5_path}/raw/daqenergy", raw_file, n_rows=n_events, idx=cuts + )[0] + tb_data = lh5.Table( + col_dict={"waveform": waveforms, "daqenergy": daqenergy} + ) + return tb_data, rough_energy + + +def is_valid_centroid( + centroid: np.array, lim: int, size: int, full_size: int +) -> list[bool]: + llim = size / 2 - lim + hlim = full_size - size / 2 + idxs = (centroid > llim) & (centroid < hlim) + return idxs, llim, hlim + + +def is_not_pile_up( + peak_pos: np.array, peak_pos_neg: np.array, thr: int, lim: int, size: int +) -> list[bool]: + bin_edges = np.linspace(size / 2 - lim, size / 2 + lim, 2 * lim) + hist, bin_edges = np.histogram(peak_pos, bins=bin_edges) + + thr = thr * hist.max() / 100 + low_thr_idxs = np.where(hist[: hist.argmax()] < thr)[0] + upp_thr_idxs = np.where(hist[hist.argmax() :] < thr)[0] + + idx_low = low_thr_idxs[-1] if low_thr_idxs.size > 0 else 0 + idx_upp = ( + upp_thr_idxs[0] + hist.argmax() if upp_thr_idxs.size > 0 else len(hist) - 1 + ) + + llow, lupp = bin_edges[idx_low], bin_edges[idx_upp] + + idxs = [] + for n, nn in zip(peak_pos, peak_pos_neg): + condition1 = np.count_nonzero(n > 0) == 1 + condition2 = ( + np.count_nonzero((n > 0) & ((n < llow) | (n > lupp) & (n < size))) == 0 + ) + condition3 = np.count_nonzero(nn > 0) == 0 + idxs.append(condition1 and condition2 and condition3) + return idxs, llow, lupp + + +def is_valid_risetime(risetime: np.array, llim: int, perc: float): + hlim = np.percentile(risetime[~np.isnan(risetime)], perc) + idxs = (risetime >= llim) & (risetime <= hlim) + return idxs, llim, hlim + + +def signal_selection(dsp_cal, dplms_dict, coeff_values): + peak_pos = dsp_cal["peak_pos"].nda + peak_pos_neg = dsp_cal["peak_pos_neg"].nda + centroid = dsp_cal["centroid"].nda + risetime = dsp_cal["tp_90"].nda - dsp_cal["tp_10"].nda + + rt_low = dplms_dict["rt_low"] + rt_high = dplms_dict["rt_high"] + peak_lim = dplms_dict["peak_lim"] + wsize = dplms_dict["wsize"] + bsize = dplms_dict["bsize"] + + centroid_lim = dplms_dict["centroid_lim"] + if "rt" in coeff_values: + perc = coeff_values["rt"] + else: + perc = dplms_dict["dp_def"]["rt"] + if "pt" in coeff_values: + thr = coeff_values["pt"] + else: + thr = dplms_dict["dp_def"]["rt"] + + idxs_ct, ct_ll, ct_hh = is_valid_centroid(centroid, centroid_lim, wsize, bsize) + log.info(f"... {len(peak_pos[idxs_ct,:])} signals after alignment") + + idxs_pp, pp_ll, pp_hh = is_not_pile_up(peak_pos, peak_pos_neg, thr, peak_lim, wsize) + log.info(f"... {len(peak_pos[idxs_pp,:])} signals after pile-up cut") + + idxs_rt, rt_ll, rt_hh = is_valid_risetime(risetime, rt_low, perc) + log.info(f"... {len(peak_pos[idxs_rt,:])} signals after risetime cut") + + idxs = idxs_ct & idxs_pp & idxs_rt + sel_dict = { + "idxs": idxs, + "ct_ll": ct_ll, + "ct_hh": ct_hh, + "pp_ll": pp_ll, + "pp_hh": pp_hh, + "rt_ll": rt_ll, + "rt_hh": rt_hh, + } + return sel_dict + + +def noise_matrix(bls: np.array, length: int) -> np.array: + nev, size = bls.shape + ref = np.mean(bls, axis=0) + offset = np.mean(ref) + bls = bls - offset + nmat = np.matmul(bls.T, bls, dtype=float) / nev + kernel = np.identity(size - length + 1) + nmat = convolve2d(nmat, kernel, boundary="symm", mode="valid") / (size - length + 1) + return nmat + + +def signal_matrices( + wfs: np.array, length: int, decay_const: float, ff: int = 2 +) -> np.array: + nev, size = wfs.shape + lo = size // 2 - 100 + flo = size // 2 - length // 2 + fhi = size // 2 + length // 2 + offsets = np.mean(wfs[:, :lo], axis=1) + wfs = wfs - offsets[:, np.newaxis] + + # Reference signal + ref = np.sum(wfs, axis=0) + ref /= np.max(ref) + rmat = np.outer(ref[flo:fhi], ref[flo:fhi]) + + # Pile-up matrix + if decay_const > 0: + decay = np.exp(-np.arange(length) / decay_const) + else: + decay = np.zeros(length) + pmat = np.outer(decay, decay) + + # Flat top matrix + flo -= ff // 2 + fhi += ff // 2 + wfs = wfs[:, flo:fhi] + fmat = np.matmul(wfs.T, wfs, dtype=float) / nev + m1 = ((1, -1), (-1, 1)) + fmat = convolve2d(fmat, m1, boundary="symm", mode="valid") + if ff > 0: + fmat = convolve2d(fmat, np.identity(ff), boundary="symm", mode="valid") / ff + return ref, rmat, pmat, fmat + + +def filter_synthesis( + ref: np.array, + nmat: np.array, + rmat: np.array, + za: int, + pmat: np.array, + fmat: np.array, + length: int, + size: int, +) -> np.array: + mat = nmat + rmat + za * np.ones([length, length]) + pmat + fmat + flo = (size // 2) - (length // 2) + fhi = (size // 2) + (length // 2) + x = np.linalg.solve(mat, ref[flo:fhi]) + y = convolve(ref, np.flip(x), mode="valid") + maxy = np.max(y) + x /= maxy + y /= maxy + refy = ref[(size // 2) - (len(y) // 2) : (size // 2) + (len(y) // 2)] + return x, y, refy diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index b5d59278b..633132dd0 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -861,6 +861,7 @@ def fom_FWHM_fit(tb_in, kwarg_dict): csqr, n_sig, n_sig_err, + _, ) = get_peak_fwhm_with_dt_corr( Energies, alpha, dt, func, gof_func, peak=peak, kev_width=kev_width, kev=True ) @@ -942,6 +943,7 @@ def event_selection( else: final_mask = final_mask | e_cut ids = final_mask + print(f"pulser found: {pulser_props}") log.debug(f"pulser found: {pulser_props}") else: log.debug("no_pulser") @@ -954,16 +956,14 @@ def event_selection( initial_idxs = np.where(initial_mask)[0] guess_keV = 2620 / np.nanpercentile(rough_energy, 99) - Euc_min = threshold / guess_keV * 0.6 + Euc_min = 0 # threshold / guess_keV Euc_max = 2620 / guess_keV * 1.1 - dEuc = 1 # / guess_keV + dEuc = 1 / guess_keV hist, bins, var = pgh.get_hist(rough_energy, range=(Euc_min, Euc_max), dx=dEuc) detected_peaks_locs, detected_peaks_keV, roughpars = pgc.hpge_find_E_peaks( - hist, - bins, - var, - np.array([238.632, 583.191, 727.330, 860.564, 1620.5, 2103.53, 2614.553]), + hist, bins, var, peaks_keV, n_sigma=3 ) + print(f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}") log.debug(f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}") masks = [] @@ -1084,11 +1084,11 @@ def event_selection( return final_data, idx_list -def fwhm_slope(x, m0, m1, m2): +def fwhm_slope(x, m0, m1): """ Fit the energy resolution curve """ - return np.sqrt(m0 + m1 * x + m2 * (x**2)) + return np.sqrt(m0 + m1 * x) def interpolate_energy(peak_energies, points, err_points, energy): @@ -1096,7 +1096,7 @@ def interpolate_energy(peak_energies, points, err_points, energy): if len(points[~nan_mask]) < 3: return np.nan, np.nan, np.nan else: - param_guess = [2, 0.001, 0.000001] # + param_guess = [2, 0.001] # param_bounds = (0, [10., 1. ])# try: fit_pars, fit_covs = curve_fit( @@ -1146,6 +1146,11 @@ def fom_FWHM(tb_in, kwarg_dict, ctc_parameter, alpha, idxs=None, display=0): dt = np.subtract(tb_in["tp_99"].nda, tb_in["tp_0_est"].nda, dtype="float64") elif ctc_parameter == "rt": dt = np.subtract(tb_in["tp_99"].nda, tb_in["tp_01"].nda, dtype="float64") + + if idxs is not None: + Energies = Energies[idxs] + dt = dt[idxs] + if np.isnan(Energies).any() or np.isnan(dt).any(): if np.isnan(Energies).any(): log.debug(f"nan energy values for peak {peak}") @@ -1160,10 +1165,6 @@ def fom_FWHM(tb_in, kwarg_dict, ctc_parameter, alpha, idxs=None, display=0): "n_sig_err": np.nan, } - if idxs is not None: - Energies = Energies[idxs] - dt = dt[idxs] - # Return fwhm of optimal alpha in kev with error try: ( @@ -1216,39 +1217,37 @@ def single_peak_fom(data, kwarg_dict): return out_dict -def new_fom(data, kwarg_dict): +def new_fom(data, kwarg_dict, alpha=None): peaks = kwarg_dict["peaks_keV"] idx_list = kwarg_dict["idx_list"] ctc_param = kwarg_dict["ctc_param"] peak_dicts = kwarg_dict["peak_dicts"] - out_dict = fom_FWHM_with_dt_corr_fit( - data, peak_dicts[-1], ctc_param, idxs=idx_list[-1], display=0 - ) - alpha = out_dict["alpha"] + if alpha is None: + out_dict = fom_FWHM_with_dt_corr_fit( + data, peak_dicts[-1], ctc_param, idxs=idx_list[-1], display=0 + ) + alpha = out_dict["alpha"] + log.info(alpha) fwhms = [] fwhm_errs = [] n_sig = [] n_sig_err = [] - for i, peak in enumerate(peaks[:-1]): + chisquares = [] + for i, peak in enumerate(peaks): out_peak_dict = fom_FWHM( data, peak_dicts[i], ctc_param, alpha, idxs=idx_list[i], display=0 ) - # n_sig_minimum = peak_dicts[i]["n_sig_minimum"] - # if peak_dict["n_sig"] Date: Tue, 28 Nov 2023 12:53:29 +0000 Subject: [PATCH 040/191] style: pre-commit fixes --- src/pygama/pargen/dplms_ge_dict.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/pygama/pargen/dplms_ge_dict.py b/src/pygama/pargen/dplms_ge_dict.py index f6d0902d9..8651ddc2f 100644 --- a/src/pygama/pargen/dplms_ge_dict.py +++ b/src/pygama/pargen/dplms_ge_dict.py @@ -86,7 +86,7 @@ def dplms_ge_dict( lh5_path, "bls", n_events=dplms_dict["n_baselines"], - raw_wf_field=dplms_dict["raw_wf_field"] + raw_wf_field=dplms_dict["raw_wf_field"], ) dsp_bls = run_one_dsp(raw_bls, dsp_config, db_dict=par_dsp[lh5_path]) @@ -135,7 +135,9 @@ def dplms_ge_dict( n_events=dplms_dict["n_signals"], ) t3 = time.time() - log.info(f"Time to run event selection {(t3-t2):.2f} s, total events {len(raw_cal)}") + log.info( + f"Time to run event selection {(t3-t2):.2f} s, total events {len(raw_cal)}" + ) raw_cal = index_data(raw_cal, idx_list[-1]) log.info(f"Produce dsp data for {len(raw_cal)} events") @@ -259,7 +261,9 @@ def dplms_ge_dict( res["alpha"], res["chisquare"], ) - log.info(f"FWHM = {fwhm:.2f} ± {fwhm_err:.2f} keV, evaluated in {time.time()-t_tmp:.1f} s") + log.info( + f"FWHM = {fwhm:.2f} ± {fwhm_err:.2f} keV, evaluated in {time.time()-t_tmp:.1f} s" + ) grid_dict[i]["fwhm"] = fwhm grid_dict[i]["fwhm_err"] = fwhm_err @@ -301,7 +305,9 @@ def dplms_ge_dict( pt_coeff, ] ): - log.info(f"\nBest case: FWHM = {fwhm:.2f} ± {fwhm_err:.2f} keV, ctc {alpha}") + log.info( + f"\nBest case: FWHM = {fwhm:.2f} ± {fwhm_err:.2f} keV, ctc {alpha}" + ) else: log.error("Some values are missing in the best case results") else: From 1192f17fdbcf6a1074ec29596ca95777eb94c593 Mon Sep 17 00:00:00 2001 From: Valerio D'Andrea Date: Tue, 28 Nov 2023 15:41:39 +0100 Subject: [PATCH 041/191] removed dependency on nopt routine --- src/pygama/pargen/dplms_ge_dict.py | 32 ------------------------------ 1 file changed, 32 deletions(-) diff --git a/src/pygama/pargen/dplms_ge_dict.py b/src/pygama/pargen/dplms_ge_dict.py index 8651ddc2f..71311f4ce 100644 --- a/src/pygama/pargen/dplms_ge_dict.py +++ b/src/pygama/pargen/dplms_ge_dict.py @@ -36,7 +36,6 @@ fom_FWHM_with_dt_corr_fit, index_data, ) -from pygama.pargen.noise_optimization import calculate_spread log = logging.getLogger(__name__) sto = lh5.LH5Store() @@ -217,30 +216,6 @@ def dplms_ge_dict( f"Filter synthesis in {time.time()-t_tmp:.1f} s, filter area", np.sum(x) ) - t_tmp = time.time() - dsp_opt = run_one_dsp(raw_bls, dsp_config, db_dict=par_dsp[lh5_path]) - energies = dsp_opt[ene_par].nda - enc_results = calculate_spread(energies, 10, 90, 1000) - enc, enc_err = enc_results["fom"], enc_results["fom_err"] - log.info( - f"ENC: mean = {energies.mean():.2f} ADC, FOM = {enc:.2f} ± {enc_err:.2f} ADC, evaluated in {time.time()-t_tmp:.1f} s" - ) - grid_dict[i]["enc"] = enc - grid_dict[i]["enc_err"] = enc_err - - if display > 0: - hist, bins, var = get_hist(energies, range=(-20, 20), dx=0.1) - bc = (bins[:-1] + bins[1:]) / 2.0 - ax.plot( - bc, - hist, - ds="steps", - label=f"{ene_par} - ENC = {enc:.3f} ± {enc_err:.3f} ADC", - ) - ax.set_xlabel("energy (ADC)") - ax.set_ylabel("counts") - ax.legend(loc="upper right") - t_tmp = time.time() dsp_opt = run_one_dsp(raw_cal, dsp_config, db_dict=par_dsp[lh5_path]) @@ -281,8 +256,6 @@ def dplms_ge_dict( min_result = grid_dict[min_idx] best_case_values = {key: min_result[key] for key in min_result.keys()} - enc = best_case_values.get("enc", None) - enc_err = best_case_values.get("enc_err", 0) fwhm = best_case_values.get("fwhm", None) fwhm_err = best_case_values.get("fwhm_err", 0) alpha = best_case_values.get("alpha", 0) @@ -294,8 +267,6 @@ def dplms_ge_dict( if all( v is not None for v in [ - enc, - enc_err, fwhm, fwhm_err, alpha, @@ -366,9 +337,6 @@ def dplms_ge_dict( log.info(f"Time to complete DPLMS filter synthesis {time.time()-t0:.1f}") if display > 0: - plot_dict["dplms"]["enc_hist"] = fig - plot_dict["dplms"]["enc"] = enc - plot_dict["dplms"]["enc_err"] = enc_err plot_dict["dplms"]["ref"] = ref plot_dict["dplms"]["coefficients"] = x From e71d0db9b616abc4b5a88be677f978dfdbf20c7b Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 28 Nov 2023 16:19:52 +0100 Subject: [PATCH 042/191] loading full raw table --- src/pygama/pargen/dplms_ge_dict.py | 98 +++++++++++------------------- 1 file changed, 35 insertions(+), 63 deletions(-) diff --git a/src/pygama/pargen/dplms_ge_dict.py b/src/pygama/pargen/dplms_ge_dict.py index 71311f4ce..3ca56cb66 100644 --- a/src/pygama/pargen/dplms_ge_dict.py +++ b/src/pygama/pargen/dplms_ge_dict.py @@ -80,13 +80,7 @@ def dplms_ge_dict( t0 = time.time() log.info(f"\nSelecting baselines") - raw_bls = load_data( - fft_files, - lh5_path, - "bls", - n_events=dplms_dict["n_baselines"], - raw_wf_field=dplms_dict["raw_wf_field"], - ) + raw_bls = load_data(fft_files, lh5_path, "bls", n_events=dplms_dict["n_baselines"]) dsp_bls = run_one_dsp(raw_bls, dsp_config, db_dict=par_dsp[lh5_path]) cut_dict = generate_cuts(dsp_bls, parameters=dplms_dict["bls_cut_pars"]) @@ -216,6 +210,10 @@ def dplms_ge_dict( f"Filter synthesis in {time.time()-t_tmp:.1f} s, filter area", np.sum(x) ) + t_tmp = time.time() + dsp_opt = run_one_dsp(raw_bls, dsp_config, db_dict=par_dsp[lh5_path]) + energies = dsp_opt[ene_par].nda + t_tmp = time.time() dsp_opt = run_one_dsp(raw_cal, dsp_config, db_dict=par_dsp[lh5_path]) @@ -459,7 +457,6 @@ def load_data( n_events: int = 5000, e_lower_lim: float = 1200, e_upper_lim: float = 2700, - raw_wf_field: str = "waveform", ) -> lgdo.Table: sto = lh5.LH5Store() df = lh5.load_dfs(raw_file, ["daqenergy", "timestamp"], f"{lh5_path}/raw") @@ -467,13 +464,9 @@ def load_data( if sel_type == "bls": cuts = np.where(df.daqenergy.values == 0)[0] idx_list = [] - waveforms = sto.read_object( - f"{lh5_path}/raw/{raw_wf_field}", raw_file, n_rows=n_events, idx=cuts + tb_data = sto.read_object( + f"{lh5_path}/raw", raw_file, n_rows=n_events, idx=cuts )[0] - daqenergy = sto.read_object( - f"{lh5_path}/raw/daqenergy", raw_file, n_rows=n_events, idx=cuts - )[0] - tb_data = lh5.Table(col_dict={"waveform": waveforms, "daqenergy": daqenergy}) return tb_data else: pulser_props = find_pulser_properties(df, energy="daqenergy") @@ -495,55 +488,34 @@ def load_data( else: log.debug("no pulser") ids = np.zeros(len(df.daqenergy.values), dtype=bool) - if sel_type == "pul": - cuts = np.where(ids == True)[0] - log.debug(f"{len(cuts)} events found for pulser") - waveforms = sto.read_object( - f"{lh5_path}/raw/waveform", raw_file, n_rows=n_events, idx=cuts - )[0] - daqenergy = sto.read_object( - f"{lh5_path}/raw/daqenergy", raw_file, n_rows=n_events, idx=cuts - )[0] - tb_data = lh5.Table( - col_dict={"waveform": waveforms, "daqenergy": daqenergy} - ) - return tb_data - else: - # Get events around peak using raw file values - initial_mask = (df.daqenergy.values > 0) & (~ids) - rough_energy = df.daqenergy.values[initial_mask] - initial_idxs = np.where(initial_mask)[0] - - guess_keV = 2620 / np.nanpercentile(rough_energy, 99) - Euc_min = 0 # threshold / guess_keV * 0.6 - Euc_max = 2620 / guess_keV * 1.1 - dEuc = 1 # / guess_keV - hist, bins, var = get_hist(rough_energy, range=(Euc_min, Euc_max), dx=dEuc) - detected_peaks_locs, detected_peaks_keV, roughpars = hpge_find_E_peaks( - hist, bins, var, peaks - ) - log.debug( - f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}" - ) - e_lower_lim = (e_lower_lim - roughpars[1]) / roughpars[0] - e_upper_lim = (e_upper_lim - roughpars[1]) / roughpars[0] - log.debug(f"lower_lim: {e_lower_lim}, upper_lim: {e_upper_lim}") - mask = (rough_energy > e_lower_lim) & (rough_energy < e_upper_lim) - cuts = initial_idxs[mask][:] - log.debug(f"{len(cuts)} events found in energy range") - rough_energy = rough_energy[mask] - rough_energy = rough_energy[:n_events] - rough_energy = rough_energy * roughpars[0] + roughpars[1] - waveforms = sto.read_object( - f"{lh5_path}/raw/waveform", raw_file, n_rows=n_events, idx=cuts - )[0] - daqenergy = sto.read_object( - f"{lh5_path}/raw/daqenergy", raw_file, n_rows=n_events, idx=cuts - )[0] - tb_data = lh5.Table( - col_dict={"waveform": waveforms, "daqenergy": daqenergy} - ) - return tb_data, rough_energy + + # Get events around peak using raw file values + initial_mask = (df.daqenergy.values > 0) & (~ids) + rough_energy = df.daqenergy.values[initial_mask] + initial_idxs = np.where(initial_mask)[0] + + guess_keV = 2620 / np.nanpercentile(rough_energy, 99) + Euc_min = 0 # threshold / guess_keV * 0.6 + Euc_max = 2620 / guess_keV * 1.1 + dEuc = 1 # / guess_keV + hist, bins, var = get_hist(rough_energy, range=(Euc_min, Euc_max), dx=dEuc) + detected_peaks_locs, detected_peaks_keV, roughpars = hpge_find_E_peaks( + hist, bins, var, peaks + ) + log.debug(f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}") + e_lower_lim = (e_lower_lim - roughpars[1]) / roughpars[0] + e_upper_lim = (e_upper_lim - roughpars[1]) / roughpars[0] + log.debug(f"lower_lim: {e_lower_lim}, upper_lim: {e_upper_lim}") + mask = (rough_energy > e_lower_lim) & (rough_energy < e_upper_lim) + cuts = initial_idxs[mask][:] + log.debug(f"{len(cuts)} events found in energy range") + rough_energy = rough_energy[mask] + rough_energy = rough_energy[:n_events] + rough_energy = rough_energy * roughpars[0] + roughpars[1] + tb_data = sto.read_object( + f"{lh5_path}/raw", raw_file, n_rows=n_events, idx=cuts + )[0] + return tb_data, rough_energy def is_valid_centroid( From f1f3e23228f52fa60e69f7d5482bd28c7e2d1cec Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 28 Nov 2023 16:23:28 +0100 Subject: [PATCH 043/191] loading full raw table --- src/pygama/pargen/noise_optimization.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/src/pygama/pargen/noise_optimization.py b/src/pygama/pargen/noise_optimization.py index 12695c357..5a0b51f84 100644 --- a/src/pygama/pargen/noise_optimization.py +++ b/src/pygama/pargen/noise_optimization.py @@ -69,7 +69,6 @@ def noise_optimization( raw_list, lh5_path, n_events=opt_dict["n_events"], - wf_field=opt_dict["wf_field"], ) t1 = time.time() log.info(f"Time to open raw files {t1-t0:.2f} s, n. baselines {len(tb_data)}") @@ -291,7 +290,6 @@ def load_data( bls: bool = True, n_events: int = 10000, threshold: int = 200, - wf_field="waveform", ) -> lgdo.Table: sto = lh5.LH5Store() @@ -301,20 +299,7 @@ def load_data( idxs = np.where(energies.nda == 0)[0] else: idxs = np.where(energies.nda > threshold)[0] - - waveforms = sto.read_object( - f"{lh5_path}/raw/{wf_field}", raw_list, n_rows=n_events, idx=idxs - )[0] - daqenergy = sto.read_object( - f"{lh5_path}/raw/daqenergy", raw_list, n_rows=n_events, idx=idxs - )[0] - baseline = sto.read_object( - f"{lh5_path}/raw/baseline", raw_list, n_rows=n_events, idx=idxs - )[0] - - tb_data = lh5.Table( - col_dict={"waveform": waveforms, "daqenergy": daqenergy, "baseline": baseline} - ) + tb_data = sto.read_object(f"{lh5_path}/raw", raw_list, n_rows=n_events, idx=idxs)[0] return tb_data From 32f843cba257903bbaa342892f8e45e67738ebd8 Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 28 Nov 2023 16:45:53 +0100 Subject: [PATCH 044/191] smale change on loading data --- src/pygama/pargen/dplms_ge_dict.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/pygama/pargen/dplms_ge_dict.py b/src/pygama/pargen/dplms_ge_dict.py index 3ca56cb66..75ee98e04 100644 --- a/src/pygama/pargen/dplms_ge_dict.py +++ b/src/pygama/pargen/dplms_ge_dict.py @@ -459,10 +459,11 @@ def load_data( e_upper_lim: float = 2700, ) -> lgdo.Table: sto = lh5.LH5Store() - df = lh5.load_dfs(raw_file, ["daqenergy", "timestamp"], f"{lh5_path}/raw") + + daqenergy = sto.read_object(f"{lh5_path}/raw/daqenergy", raw_file)[0].nda if sel_type == "bls": - cuts = np.where(df.daqenergy.values == 0)[0] + cuts = np.where(daqenergy == 0)[0] idx_list = [] tb_data = sto.read_object( f"{lh5_path}/raw", raw_file, n_rows=n_events, idx=cuts @@ -476,8 +477,8 @@ def load_data( pulser_e, pulser_err = entry[0], entry[1] if pulser_err < 10: pulser_err = 10 - e_cut = (df.daqenergy.values < pulser_e + pulser_err) & ( - df.daqenergy.values > pulser_e - pulser_err + e_cut = (daqenergy < pulser_e + pulser_err) & ( + daqenergy > pulser_e - pulser_err ) if final_mask is None: final_mask = e_cut @@ -487,11 +488,11 @@ def load_data( log.debug(f"pulser found: {pulser_props}") else: log.debug("no pulser") - ids = np.zeros(len(df.daqenergy.values), dtype=bool) + ids = np.zeros(len(daqenergy), dtype=bool) # Get events around peak using raw file values - initial_mask = (df.daqenergy.values > 0) & (~ids) - rough_energy = df.daqenergy.values[initial_mask] + initial_mask = (daqenergy > 0) & (~ids) + rough_energy = daqenergy[initial_mask] initial_idxs = np.where(initial_mask)[0] guess_keV = 2620 / np.nanpercentile(rough_energy, 99) From 401b91ec140f742703065038bbd1fc9388651003 Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 28 Nov 2023 17:51:55 +0100 Subject: [PATCH 045/191] raw table passed to the routine, loading moved out --- src/pygama/pargen/noise_optimization.py | 44 +++++-------------------- 1 file changed, 8 insertions(+), 36 deletions(-) diff --git a/src/pygama/pargen/noise_optimization.py b/src/pygama/pargen/noise_optimization.py index 5a0b51f84..8da8e9a51 100644 --- a/src/pygama/pargen/noise_optimization.py +++ b/src/pygama/pargen/noise_optimization.py @@ -14,7 +14,6 @@ from collections import namedtuple import lgdo -import lgdo.lh5_store as lh5 import matplotlib as mpl import matplotlib.pyplot as plt import numpy as np @@ -33,11 +32,10 @@ from pygama.pargen.energy_optimisation import index_data log = logging.getLogger(__name__) -sto = lh5.LH5Store() def noise_optimization( - raw_list: list[str], + tb_data: lgdo.Table, dsp_proc_chain: dict, par_dsp: dict, opt_dict: dict, @@ -49,8 +47,8 @@ def noise_optimization( This function calculates the optimal filter par. Parameters ---------- - raw_list : str - raw files to run the macro on + tb_data : str + raw table to run the macro on dsp_proc_chain: str Path to minimal dsp config file par_dsp: str @@ -65,16 +63,9 @@ def noise_optimization( """ t0 = time.time() - tb_data = load_data( - raw_list, - lh5_path, - n_events=opt_dict["n_events"], - ) - t1 = time.time() - log.info(f"Time to open raw files {t1-t0:.2f} s, n. baselines {len(tb_data)}") + log.info(f"Select baselines {len(tb_data)}") if verbose: - print(f"Time to open raw files {t1-t0:.2f} s, n. baselines {len(tb_data)}") - + print(f"Select baselines {len(tb_data)}") dsp_data = run_one_dsp(tb_data, dsp_proc_chain) cut_dict = generate_cuts(dsp_data, parameters=opt_dict["cut_pars"]) idxs = get_cut_indexes(dsp_data, cut_dict) @@ -124,11 +115,11 @@ def noise_optimization( filter_par = opt_dict_par[ene_par]["filter_par"] par_dsp[lh5_path][dict_str][filter_par] = f"{x}*us" - t2 = time.time() + t1 = time.time() dsp_data = run_one_dsp(tb_data, dsp_proc_chain, db_dict=par_dsp[lh5_path]) - log.info(f"Time to process dsp data {time.time()-t2:.2f} s") + log.info(f"Time to process dsp data {time.time()-t1:.2f} s") if verbose: - print(f"Time to process dsp data {time.time()-t2:.2f} s") + print(f"Time to process dsp data {time.time()-t1:.2f} s") for ene_par in ene_pars: dict_str = opt_dict_par[ene_par]["dict_str"] @@ -284,25 +275,6 @@ def spl_func(x_val): return res_dict -def load_data( - raw_list: list[str], - lh5_path: str, - bls: bool = True, - n_events: int = 10000, - threshold: int = 200, -) -> lgdo.Table: - sto = lh5.LH5Store() - - energies = sto.read_object(f"{lh5_path}/raw/daqenergy", raw_list)[0] - - if bls: - idxs = np.where(energies.nda == 0)[0] - else: - idxs = np.where(energies.nda > threshold)[0] - tb_data = sto.read_object(f"{lh5_path}/raw", raw_list, n_rows=n_events, idx=idxs)[0] - return tb_data - - def calculate_spread(energies, percentile_low, percentile_high, n_samples): spreads = np.zeros(n_samples) for i in range(n_samples): From e70d1d835438608849312bc5125413aea35d7f0a Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 28 Nov 2023 18:48:11 +0100 Subject: [PATCH 046/191] moved load data out of pargen routine --- src/pygama/pargen/dplms_ge_dict.py | 130 +++-------------------- src/pygama/pargen/energy_optimisation.py | 13 +-- 2 files changed, 23 insertions(+), 120 deletions(-) diff --git a/src/pygama/pargen/dplms_ge_dict.py b/src/pygama/pargen/dplms_ge_dict.py index 75ee98e04..67caf4ced 100644 --- a/src/pygama/pargen/dplms_ge_dict.py +++ b/src/pygama/pargen/dplms_ge_dict.py @@ -34,7 +34,6 @@ event_selection, fom_FWHM, fom_FWHM_with_dt_corr_fit, - index_data, ) log = logging.getLogger(__name__) @@ -43,8 +42,8 @@ def dplms_ge_dict( lh5_path: str, - fft_files: list[str], - cal_files: list[str], + raw_fft: lgdo.Table, + raw_cal: lgdo.Table, dsp_config: dict, par_dsp: dict, par_dsp_lh5: str, @@ -60,10 +59,10 @@ def dplms_ge_dict( ---------- lh5_path: str Name of channel to process, should be name of lh5 group in raw files - fft_files : list[str] - raw files with fft data - cal_files : list[str] - raw files with cal data + fft_files : lgdo.Table + table with fft data + raw_cal : lgdo.Table + table with cal data dsp_config: dict dsp config file par_dsp: dict @@ -80,22 +79,20 @@ def dplms_ge_dict( t0 = time.time() log.info(f"\nSelecting baselines") - raw_bls = load_data(fft_files, lh5_path, "bls", n_events=dplms_dict["n_baselines"]) - - dsp_bls = run_one_dsp(raw_bls, dsp_config, db_dict=par_dsp[lh5_path]) - cut_dict = generate_cuts(dsp_bls, parameters=dplms_dict["bls_cut_pars"]) - idxs = get_cut_indexes(dsp_bls, cut_dict) + dsp_fft = run_one_dsp(raw_fft, dsp_config, db_dict=par_dsp[lh5_path]) + cut_dict = generate_cuts(dsp_fft, parameters=dplms_dict["bls_cut_pars"]) + idxs = get_cut_indexes(dsp_fft, cut_dict) bl_field = dplms_dict["bl_field"] - log.info(f"... {len(dsp_bls[bl_field].values.nda[idxs,:])} baselines after cuts") + log.info(f"... {len(dsp_fft[bl_field].values.nda[idxs,:])} baselines after cuts") - bls = dsp_bls[bl_field].values.nda[idxs, : dplms_dict["bsize"]] + bls = dsp_fft[bl_field].values.nda[idxs, : dplms_dict["bsize"]] bls_par = {} bls_cut_pars = [par for par in dplms_dict["bls_cut_pars"].keys()] for par in bls_cut_pars: - bls_par[par] = dsp_bls[par].nda + bls_par[par] = dsp_fft[par].nda t1 = time.time() log.info( - f"total events {len(raw_bls)}, {len(bls)} baseline selected in {(t1-t0):.2f} s" + f"total events {len(raw_fft)}, {len(bls)} baseline selected in {(t1-t0):.2f} s" ) log.info( @@ -111,36 +108,16 @@ def dplms_ge_dict( log.info(f"Time to calculate noise matrix {(t2-t1):.2f} s") log.info("\nSelecting signals") - peaks_keV = np.array(dplms_dict["peaks_keV"]) wsize = dplms_dict["wsize"] wf_field = dplms_dict["wf_field"] + peaks_keV = np.array(dplms_dict["peaks_keV"]) kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]] - raw_cal, idx_list = event_selection( - cal_files, - f"{lh5_path}/raw", - dsp_config, - par_dsp[lh5_path], - peaks_keV, - np.arange(0, len(peaks_keV), 1).tolist(), - kev_widths, - cut_parameters=dplms_dict["wfs_cut_pars"], - n_events=dplms_dict["n_signals"], - ) - t3 = time.time() - log.info( - f"Time to run event selection {(t3-t2):.2f} s, total events {len(raw_cal)}" - ) - - raw_cal = index_data(raw_cal, idx_list[-1]) log.info(f"Produce dsp data for {len(raw_cal)} events") dsp_cal = run_one_dsp(raw_cal, dsp_config, db_dict=par_dsp[lh5_path]) - t4 = time.time() - log.info(f"Time to run dsp production {(t4-t3):.2f} s") + t3 = time.time() + log.info(f"Time to run dsp production {(t3-t2):.2f} s") - # minimal processing chain - with open(dsp_config) as r: - dsp_config = json.load(r) dsp_config["outputs"] = [ene_par, "dt_eff"] # dictionary for peak fitting @@ -155,7 +132,6 @@ def dplms_ge_dict( if display > 0: plot_dict = {} plot_dict["dplms"] = {} - fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white") # penalized coefficients dp_coeffs = dplms_dict["dp_coeffs"] @@ -210,10 +186,6 @@ def dplms_ge_dict( f"Filter synthesis in {time.time()-t_tmp:.1f} s, filter area", np.sum(x) ) - t_tmp = time.time() - dsp_opt = run_one_dsp(raw_bls, dsp_config, db_dict=par_dsp[lh5_path]) - energies = dsp_opt[ene_par].nda - t_tmp = time.time() dsp_opt = run_one_dsp(raw_cal, dsp_config, db_dict=par_dsp[lh5_path]) @@ -449,76 +421,6 @@ def dplms_ge_dict( return out_dict -def load_data( - raw_file: list[str], - lh5_path: str, - sel_type: str, - peaks: np.array = [], - n_events: int = 5000, - e_lower_lim: float = 1200, - e_upper_lim: float = 2700, -) -> lgdo.Table: - sto = lh5.LH5Store() - - daqenergy = sto.read_object(f"{lh5_path}/raw/daqenergy", raw_file)[0].nda - - if sel_type == "bls": - cuts = np.where(daqenergy == 0)[0] - idx_list = [] - tb_data = sto.read_object( - f"{lh5_path}/raw", raw_file, n_rows=n_events, idx=cuts - )[0] - return tb_data - else: - pulser_props = find_pulser_properties(df, energy="daqenergy") - if len(pulser_props) > 0: - final_mask = None - for entry in pulser_props: - pulser_e, pulser_err = entry[0], entry[1] - if pulser_err < 10: - pulser_err = 10 - e_cut = (daqenergy < pulser_e + pulser_err) & ( - daqenergy > pulser_e - pulser_err - ) - if final_mask is None: - final_mask = e_cut - else: - final_mask = final_mask | e_cut - ids = final_mask - log.debug(f"pulser found: {pulser_props}") - else: - log.debug("no pulser") - ids = np.zeros(len(daqenergy), dtype=bool) - - # Get events around peak using raw file values - initial_mask = (daqenergy > 0) & (~ids) - rough_energy = daqenergy[initial_mask] - initial_idxs = np.where(initial_mask)[0] - - guess_keV = 2620 / np.nanpercentile(rough_energy, 99) - Euc_min = 0 # threshold / guess_keV * 0.6 - Euc_max = 2620 / guess_keV * 1.1 - dEuc = 1 # / guess_keV - hist, bins, var = get_hist(rough_energy, range=(Euc_min, Euc_max), dx=dEuc) - detected_peaks_locs, detected_peaks_keV, roughpars = hpge_find_E_peaks( - hist, bins, var, peaks - ) - log.debug(f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}") - e_lower_lim = (e_lower_lim - roughpars[1]) / roughpars[0] - e_upper_lim = (e_upper_lim - roughpars[1]) / roughpars[0] - log.debug(f"lower_lim: {e_lower_lim}, upper_lim: {e_upper_lim}") - mask = (rough_energy > e_lower_lim) & (rough_energy < e_upper_lim) - cuts = initial_idxs[mask][:] - log.debug(f"{len(cuts)} events found in energy range") - rough_energy = rough_energy[mask] - rough_energy = rough_energy[:n_events] - rough_energy = rough_energy * roughpars[0] + roughpars[1] - tb_data = sto.read_object( - f"{lh5_path}/raw", raw_file, n_rows=n_events, idx=cuts - )[0] - return tb_data, rough_energy - - def is_valid_centroid( centroid: np.array, lim: int, size: int, full_size: int ) -> list[bool]: diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index 633132dd0..79aef0d25 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -943,7 +943,6 @@ def event_selection( else: final_mask = final_mask | e_cut ids = final_mask - print(f"pulser found: {pulser_props}") log.debug(f"pulser found: {pulser_props}") else: log.debug("no_pulser") @@ -956,14 +955,13 @@ def event_selection( initial_idxs = np.where(initial_mask)[0] guess_keV = 2620 / np.nanpercentile(rough_energy, 99) - Euc_min = 0 # threshold / guess_keV + Euc_min = threshold / guess_keV * 0.6 Euc_max = 2620 / guess_keV * 1.1 - dEuc = 1 / guess_keV + dEuc = 1 # / guess_keV hist, bins, var = pgh.get_hist(rough_energy, range=(Euc_min, Euc_max), dx=dEuc) detected_peaks_locs, detected_peaks_keV, roughpars = pgc.hpge_find_E_peaks( hist, bins, var, peaks_keV, n_sigma=3 ) - print(f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}") log.debug(f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}") masks = [] @@ -1084,11 +1082,14 @@ def event_selection( return final_data, idx_list -def fwhm_slope(x, m0, m1): +def fwhm_slope(x, m0, m1, m2=None): """ Fit the energy resolution curve """ - return np.sqrt(m0 + m1 * x) + if m2 is None: + return np.sqrt(m0 + m1 * x) + else: + return np.sqrt(m0 + m1 * x + m2 * (x**2)) def interpolate_energy(peak_energies, points, err_points, energy): From cf1e1ab0ae3ba32533a4fb24ee3d8f5db8dffc46 Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 28 Nov 2023 18:53:40 +0100 Subject: [PATCH 047/191] small changes --- src/pygama/pargen/energy_optimisation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index 79aef0d25..ffd231f97 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -960,7 +960,10 @@ def event_selection( dEuc = 1 # / guess_keV hist, bins, var = pgh.get_hist(rough_energy, range=(Euc_min, Euc_max), dx=dEuc) detected_peaks_locs, detected_peaks_keV, roughpars = pgc.hpge_find_E_peaks( - hist, bins, var, peaks_keV, n_sigma=3 + hist, + bins, + var, + np.array([238.632, 583.191, 727.330, 860.564, 1620.5, 2103.53, 2614.553]), ) log.debug(f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}") From 4771a8ba45f42a8fdfa1318f504f028ea9f9764c Mon Sep 17 00:00:00 2001 From: valerioda Date: Wed, 29 Nov 2023 11:15:48 +0100 Subject: [PATCH 048/191] removed wf_field dependency --- src/pygama/pargen/noise_optimization.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/pygama/pargen/noise_optimization.py b/src/pygama/pargen/noise_optimization.py index 8da8e9a51..4e9ff229e 100644 --- a/src/pygama/pargen/noise_optimization.py +++ b/src/pygama/pargen/noise_optimization.py @@ -81,10 +81,9 @@ def noise_optimization( res_dict = {} if display > 0: - # fft dsp_data = run_one_dsp(tb_data, dsp_proc_chain, db_dict=par_dsp[lh5_path]) psd = np.mean(dsp_data["wf_psd"].values.nda, axis=0) - sample_us = float(tb_data[opt_dict["wf_field"]].dt.nda[0]) / 1000 + sample_us = float(dsp_data["wf_presum"].dt.nda[0]) / 1000 freq = np.linspace(0, (1 / sample_us) / 2, len(psd)) fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white") ax.plot(freq, psd) From 4f208ba081c271760602edccae57e25c222a1a8f Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 29 Nov 2023 15:46:35 +0100 Subject: [PATCH 049/191] addded ability to fix ecal pars --- src/pygama/pargen/energy_cal.py | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/src/pygama/pargen/energy_cal.py b/src/pygama/pargen/energy_cal.py index a55df8a92..4de39688c 100644 --- a/src/pygama/pargen/energy_cal.py +++ b/src/pygama/pargen/energy_cal.py @@ -842,7 +842,7 @@ def poly_wrapper(x, *pars): return pgf.poly(x, pars) -def hpge_fit_E_scale(mus, mu_vars, Es_keV, deg=0): +def hpge_fit_E_scale(mus, mu_vars, Es_keV, deg=0, fixed=None): """Find best fit of poly(E) = mus +/- sqrt(mu_vars) Compare to hpge_fit_E_cal_func which fits for E = poly(mu) @@ -857,7 +857,9 @@ def hpge_fit_E_scale(mus, mu_vars, Es_keV, deg=0): deg : int degree for energy scale fit. deg=0 corresponds to a simple scaling mu = scale * E. Otherwise deg follows the definition in np.polyfit - + fixed : dict + dict where keys are index of polyfit pars to fix and vals are the value + to fix at, can be None to fix at guess value Returns ------- pars : array @@ -873,7 +875,16 @@ def hpge_fit_E_scale(mus, mu_vars, Es_keV, deg=0): else: poly_pars = np.polyfit(Es_keV, mus, deg=deg, w=1 / np.sqrt(mu_vars)) c = cost.LeastSquares(Es_keV, mus, np.sqrt(mu_vars), poly_wrapper) + if fixed is not None: + for idx, val in fixed.items(): + if val is True or val is None: + pass + else: + poly_pars[idx] = val m = Minuit(c, *poly_pars) + if fixed is not None: + for idx in list(fixed): + m.fixed[idx]=True m.simplex() m.migrad() m.hesse() @@ -883,7 +894,7 @@ def hpge_fit_E_scale(mus, mu_vars, Es_keV, deg=0): return pars, errs, cov -def hpge_fit_E_cal_func(mus, mu_vars, Es_keV, E_scale_pars, deg=0): +def hpge_fit_E_cal_func(mus, mu_vars, Es_keV, E_scale_pars, deg=0, fixed=None): """Find best fit of E = poly(mus +/- sqrt(mu_vars)) This is an inversion of hpge_fit_E_scale. E uncertainties are computed from mu_vars / dmu/dE where mu = poly(E) is the @@ -903,6 +914,9 @@ def hpge_fit_E_cal_func(mus, mu_vars, Es_keV, E_scale_pars, deg=0): deg : int degree for energy scale fit. deg=0 corresponds to a simple scaling mu = scale * E. Otherwise deg follows the definition in np.polyfit + fixed : dict + dict where keys are index of polyfit pars to fix and vals are the value + to fix at, can be None to fix at guess value Returns ------- @@ -923,8 +937,17 @@ def hpge_fit_E_cal_func(mus, mu_vars, Es_keV, E_scale_pars, deg=0): dmudEs += E_scale_pars[n] * mus ** (len(E_scale_pars) - 2 - n) E_weights = dmudEs * mu_vars poly_pars = np.polyfit(mus, Es_keV, deg=deg, w=1 / E_weights) + if fixed is not None: + for idx, val in fixed.items(): + if val is True or val is None: + pass + else: + poly_pars[idx] = val c = cost.LeastSquares(mus, Es_keV, E_weights, poly_wrapper) m = Minuit(c, *poly_pars) + if fixed is not None: + for idx in list(fixed): + m.fixed[idx]=True m.simplex() m.migrad() m.hesse() From 0e1c96821831bd805065bbb42e0addd514579241 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 29 Nov 2023 15:47:08 +0100 Subject: [PATCH 050/191] changes to high_stats_fitting to add calibration par tuning --- src/pygama/pargen/ecal_th.py | 273 +++++++++++++++++++++++++---------- 1 file changed, 194 insertions(+), 79 deletions(-) diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py index 7d21cd91d..81556d9d8 100644 --- a/src/pygama/pargen/ecal_th.py +++ b/src/pygama/pargen/ecal_th.py @@ -191,9 +191,6 @@ def __init__( self.simplex = simplex self.tail_weight = tail_weight - self.output_dict = {} - self.hit_dict = {} - def fit_energy_res(self): fitted_peaks = self.results["fitted_keV"] fwhms = self.results["pk_fwhms"][:, 0] @@ -558,8 +555,8 @@ def calibrate_parameter(self, data): log.info(f"Calibration pars are {self.pars}") if ~np.isnan(self.pars).all(): self.fit_energy_res() - self.hit_dict[self.cal_energy_param] = self.gen_pars_dict() - data[f"{self.energy_param}_cal"] = pgf.poly(data[self.energy_param], self.pars) + self.hit_dict = {self.cal_energy_param:self.gen_pars_dict()} + data[self.cal_energy_param] = pgf.poly(data[self.energy_param], self.pars) def fill_plot_dict(self, data, plot_dict={}): for key, item in self.plot_options.items(): @@ -654,7 +651,6 @@ class high_stats_fitting(calibrate_parameter): pgf.radford_pdf, pgf.gauss_step_pdf, pgf.gauss_step_pdf, - pgf.gauss_step_pdf, pgf.radford_pdf, pgf.gauss_step_pdf, pgf.gauss_step_pdf, @@ -677,9 +673,15 @@ def __init__( plot_options={}, simplex=False, tail_weight=20, + cal_energy_param = None, + deg=2, + fixed = None, ): self.energy_param = energy_param - self.cal_energy_param = energy_param + if cal_energy_param is None: + self.cal_energy_param = energy_param + else: + self.cal_energy_param = cal_energy_param self.selection_string = selection_string self.threshold = threshold self.p_val = p_val @@ -691,6 +693,8 @@ def __init__( self.output_dict = {} self.pars = [1, 0] self.tail_weight = tail_weight + self.fixed = fixed + self.deg=deg def get_results_dict(self, data): if self.results: @@ -733,74 +737,107 @@ def get_results_dict(self, data): else: return {} - def fit_peaks(self, data): - log.debug(f"Fitting {self.energy_param}") - try: - n_bins = [ - int((self.range_keV[i][1] + self.range_keV[i][0]) / self.binning[i]) - for i in range(len(self.glines)) - ] - ( - pk_pars, - pk_errors, - pk_covs, - pk_binws, - pk_ranges, - pk_pvals, - valid_pks, - pk_funcs, - ) = cal.hpge_fit_E_peaks( - data.query(self.selection_string)[self.energy_param], - self.glines, - self.range_keV, - n_bins=n_bins, - funcs=self.funcs, - method="unbinned", - gof_funcs=self.gof_funcs, - n_events=None, - allowed_p_val=self.p_val, - tail_weight=20, - ) - for idx, peak in enumerate(self.glines): - self.funcs[idx] = pk_funcs[idx] - if pk_funcs[idx] == pgf.extended_radford_pdf: - self.gof_funcs[idx] = pgf.radford_pdf - else: - self.gof_funcs[idx] = pgf.gauss_step_pdf + def run_fit(self, data): + hist, bins, var = pgh.get_hist(data.query(self.selection_string)[self.energy_param], + range=(np.amin(self.glines)*0.8,np.amax(self.glines)*1.1), dx=0.5 ) + (got_peak_locations, + got_peak_energies, + roughpars) = hpge_get_E_peaks( + hist, + bins, + var, + np.array([1,0]), + n_sigma=3, + peaks_keV=glines) + + found_mask = np.in1d(self.glines,got_peak_energies) + self.results["got_peaks_locs"] = got_peak_locations + self.results["got_peaks_keV"] = got_peak_energies + + log.info(f"{len(got_peak_energies)} peaks obtained:") + log.info(f"\t Energy | Position ") + for i, (Li, Ei) in enumerate(zip(got_peak_locations, got_peak_energies)): + log.info(f"\t{i}".ljust(4) + str(Ei).ljust(9) + f"| {Li:g}".ljust(5)) + + + self.glines = np.array(self.glines)[found_mask].tolist() + self.range_keV = np.array(self.range_keV)[found_mask].tolist() + self.binning = np.array(self.binning)[found_mask].tolist() + self.funcs = np.array(self.funcs)[found_mask].tolist() + self.gof_funcs = np.array(self.gof_funcs)[found_mask].tolist() + + n_bins = [ + int((self.range_keV[i][1] + self.range_keV[i][0]) / self.binning[i]) + for i in range(len(self.glines)) + ] + ( + pk_pars, + pk_errors, + pk_covs, + pk_binws, + pk_ranges, + pk_pvals, + valid_pks, + pk_funcs, + ) = cal.hpge_fit_E_peaks( + data.query(self.selection_string)[self.energy_param], + self.glines, + self.range_keV, + n_bins=n_bins, + funcs=self.funcs, + method="unbinned", + gof_funcs=self.gof_funcs, + n_events=None, + allowed_p_val=self.p_val, + tail_weight=20, + ) + for idx, peak in enumerate(self.glines): + self.funcs[idx] = pk_funcs[idx] + if pk_funcs[idx] == pgf.extended_radford_pdf: + self.gof_funcs[idx] = pgf.radford_pdf + else: + self.gof_funcs[idx] = pgf.gauss_step_pdf - self.results["got_peaks_keV"] = self.glines - self.results["pk_pars"] = pk_pars - self.results["pk_errors"] = pk_errors - self.results["pk_covs"] = pk_covs - self.results["pk_binws"] = pk_binws - self.results["pk_ranges"] = pk_ranges - self.results["pk_pvals"] = pk_pvals + self.results["got_peaks_keV"] = self.glines + self.results["pk_pars"] = pk_pars + self.results["pk_errors"] = pk_errors + self.results["pk_covs"] = pk_covs + self.results["pk_binws"] = pk_binws + self.results["pk_ranges"] = pk_ranges + self.results["pk_pvals"] = pk_pvals - for i, pk in enumerate(self.results["got_peaks_keV"]): - try: - if self.results["pk_pars"][i]["n_sig"] < 10: - valid_pks[i] = False - elif ( - 2 * self.results["pk_errors"][i]["n_sig"] - > self.results["pk_pars"][i]["n_sig"] - ): - valid_pks[i] = False - except: - pass + for i, pk in enumerate(self.results["got_peaks_keV"]): + try: + if self.results["pk_pars"][i]["n_sig"] < 10: + valid_pks[i] = False + elif ( + 2 * self.results["pk_errors"][i]["n_sig"] + > self.results["pk_pars"][i]["n_sig"] + ): + valid_pks[i] = False + except: + pass - self.results["pk_validities"] = valid_pks + self.results["pk_validities"] = valid_pks - # Drop failed fits - fitted_peaks_keV = self.results["fitted_keV"] = np.asarray(self.glines)[ - valid_pks - ] - pk_pars = np.asarray(pk_pars, dtype=object)[valid_pks] # ragged - pk_errors = np.asarray(pk_errors, dtype=object)[valid_pks] - pk_covs = np.asarray(pk_covs, dtype=object)[valid_pks] - pk_binws = np.asarray(pk_binws)[valid_pks] - pk_ranges = np.asarray(pk_ranges)[valid_pks] - pk_pvals = np.asarray(pk_pvals)[valid_pks] - pk_funcs = np.asarray(pk_funcs)[valid_pks] + # Drop failed fits + self.results["fitted_keV"] = np.asarray(self.glines)[ + valid_pks + ] + + + def fit_peaks(self, data): + log.debug(f"Fitting {self.energy_param}") + try: + self.run_fit(data) + + valid_pks = self.results["pk_validities"] + fitted_peaks_keV = self.results["fitted_keV"] + pk_pars = np.asarray(self.results["pk_pars"], dtype=object)[valid_pks] # ragged + pk_errors = np.asarray(self.results["pk_errors"], dtype=object)[valid_pks] + pk_covs = np.asarray(self.results["pk_covs"], dtype=object)[valid_pks] + pk_funcs = np.asarray(self.funcs)[valid_pks] + log.info(f"{len(np.where(valid_pks)[0])} peaks fitted:") for i, (Ei, parsi, errorsi, covsi, func_i) in enumerate( @@ -810,13 +847,13 @@ def fit_peaks(self, data): parsi = np.asarray(parsi, dtype=float) errorsi = np.asarray(errorsi, dtype=float) covsi = np.asarray(covsi, dtype=float) - # parsigsi = np.sqrt(covsi.diagonal()) + log.info(f"\tEnergy: {str(Ei)}") log.info(f"\t\tParameter | Value +/- Sigma ") for vari, pari, errorsi in zip(varnames, parsi, errorsi): log.info( - f'\t\t{str(vari).ljust(10)} | {("%4.2f" % pari).rjust(8)} +/- {("%4.2f" % errorsi).ljust(8)}' - ) + f'\t\t{str(vari):<12} | {pari: 8.2f} +/- {errorsi:.2f}' + ) cal_fwhms = [ pgf.get_fwhm_func(func_i, pars_i, cov=covs_i) @@ -831,20 +868,98 @@ def fit_peaks(self, data): ) log.info(f"{len(cal_fwhms)} FWHMs found:") - log.info(f"\t Energy | FWHM ") + log.info(f"\t{'Energy':>10}{'| FWHM':>9}") for i, (Ei, fwhm, fwhme) in enumerate( zip(fitted_peaks_keV, cal_fwhms, cal_fwhms_errs) ): log.info( - f"\t{i}".ljust(4) - + str(Ei).ljust(9) - + f"| {fwhm:.2f}+-{fwhme:.2f} keV".ljust(5) + f"\t{str(i):<4}{str(Ei):<9}| {f'{fwhm:.2f}+-{fwhme:.2f}':<10} keV" ) self.fit_energy_res() log.debug(f"high stats fitting successful") except: self.results = {} log.debug(f"high stats fitting failed") + + def update_calibration(self, data): + log.debug(f"Calibrating {self.energy_param}") + self.run_fit(data) + + valid_pks = self.results["pk_validities"] + fitted_peaks_keV = self.results["fitted_keV"] + pk_pars = np.asarray(self.results["pk_pars"], dtype=object)[valid_pks] # ragged + pk_errors = np.asarray(self.results["pk_errors"], dtype=object)[valid_pks] + pk_covs = np.asarray(self.results["pk_covs"], dtype=object)[valid_pks] + pk_funcs = np.asarray(self.funcs)[valid_pks] + + + log.info(f"{len(np.where(valid_pks)[0])} peaks fitted:") + for i, (Ei, parsi, errorsi, covsi, func_i) in enumerate( + zip(fitted_peaks_keV, pk_pars, pk_errors, pk_covs, pk_funcs) + ): + varnames = func_i.__code__.co_varnames[1 : len(pk_pars[-1]) + 1] + parsi = np.asarray(parsi, dtype=float) + errorsi = np.asarray(errorsi, dtype=float) + covsi = np.asarray(covsi, dtype=float) + # parsigsi = np.sqrt(covsi.diagonal()) + log.info(f"\tEnergy: {str(Ei)}") + log.info(f"\t\tParameter | Value +/- Sigma ") + for vari, pari, errorsi in zip(varnames, parsi, errorsi): + log.info( + f'\t\t{str(vari):<12} | {pari: 8.2f} +/- {errorsi:.2f}' + ) + # Drop failed fits + + mus = [ + pgf.get_mu_func(func_i, pars_i, errors=errors_i) + for func_i, pars_i, errors_i in zip(pk_funcs, pk_pars, pk_errors) + ] + mus, mu_vars = zip(*mus) + mus = np.asarray(mus) + mu_errs = np.asarray(mu_vars) + mu_vars = np.asarray(mu_vars) ** 2 + + try: + pars, errs, cov = hpge_fit_E_scale(mus, mu_vars, fitted_peaks_keV, deg=self.deg, + fixed = self.fixed) + except ValueError: + log.error("Failed to fit enough peaks to get accurate calibration") + return None, None, None, results + + # Invert the E scale fit to get a calibration function + self.pars, self.errs, self.cov = hpge_fit_E_cal_func(mus, mu_vars, fitted_peaks_keV, pars, deg=self.deg, + fixed = self.fixed) + + uncal_fwhms = [ + pgf.get_fwhm_func(func_i, pars_i, cov=covs_i) + for func_i, pars_i, covs_i in zip(pk_funcs, pk_pars, pk_covs) + ] + uncal_fwhms, uncal_fwhm_errs = zip(*uncal_fwhms) + uncal_fwhms = np.asarray(uncal_fwhms) + uncal_fwhm_errs = np.asarray(uncal_fwhm_errs) + derco = np.polyder(np.poly1d(pars)).coefficients + der = [pgf.poly(Ei, derco) for Ei in fitted_peaks_keV] + + cal_fwhms = uncal_fwhms * der + cal_fwhms_errs = uncal_fwhm_errs * der + self.results["pk_fwhms"] = np.asarray( + [(u * d, e * d) for u, e, d in zip(uncal_fwhms, uncal_fwhm_errs, der)] + ) + + log.info(f"{len(cal_fwhms)} FWHMs found:") + log.info(f"\t{'Energy':>10}{'| FWHM':>9}") + for i, (Ei, fwhm, fwhme) in enumerate( + zip(fitted_peaks_keV, cal_fwhms, cal_fwhms_errs) + ): + log.info( + f"\t{str(i):<4}{str(Ei):<9}| {f'{fwhm:.2f}+-{fwhme:.2f}':<10} keV" + ) + self.fit_energy_res() + if self.cal_energy_param == self.energy_param: + log.info("Warning dataframe energy will be overwritten as cal energy and input energy have same name") + self.hit_dict = {self.cal_energy_param: self.gen_pars_dict()} + data[self.cal_energy_param] = pgf.poly(data[self.energy_param], self.pars) + log.debug(f"high stats calibration successful") def get_peak_labels( @@ -1227,7 +1342,7 @@ def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsiz fig, (ax1, ax2) = plt.subplots( 2, 1, sharex=True, gridspec_kw={"height_ratios": [3, 1]} ) - ax1.errorbar(fwhm_peaks, fit_fwhms, yerr=fit_dfwhms, marker="x", lw=0, c="black") + ax1.errorbar(fwhm_peaks, fit_fwhms, yerr=fit_dfwhms, marker="x", ls=0, c="black") fwhm_slope_bins = np.arange(erange[0], erange[1], 10) From ffe3a76fae113299b446172bc23570b25d096fdc Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 29 Nov 2023 16:21:52 +0100 Subject: [PATCH 051/191] upped limit on number of events in peak for inclusion in calibration --- src/pygama/pargen/energy_cal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pygama/pargen/energy_cal.py b/src/pygama/pargen/energy_cal.py index 4de39688c..952207a9d 100644 --- a/src/pygama/pargen/energy_cal.py +++ b/src/pygama/pargen/energy_cal.py @@ -701,7 +701,7 @@ def hpge_fit_E_peaks( mode_guess, tail_weight=tail_weight, ) - if pars_i["n_sig"] < 20: + if pars_i["n_sig"] < 100: valid_fit = False else: par_guesses = get_hpge_E_peak_par_guess(hist, bins, var, func_i) From 5437be5c22863cb9dda3926dd8bc63bb55a93029 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 29 Nov 2023 16:22:14 +0100 Subject: [PATCH 052/191] bugfixes --- src/pygama/pargen/ecal_th.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py index 81556d9d8..d7e5be508 100644 --- a/src/pygama/pargen/ecal_th.py +++ b/src/pygama/pargen/ecal_th.py @@ -742,13 +742,13 @@ def run_fit(self, data): range=(np.amin(self.glines)*0.8,np.amax(self.glines)*1.1), dx=0.5 ) (got_peak_locations, got_peak_energies, - roughpars) = hpge_get_E_peaks( + roughpars) = cal.hpge_get_E_peaks( hist, bins, var, np.array([1,0]), n_sigma=3, - peaks_keV=glines) + peaks_keV=self.glines) found_mask = np.in1d(self.glines,got_peak_energies) self.results["got_peaks_locs"] = got_peak_locations @@ -920,14 +920,14 @@ def update_calibration(self, data): mu_vars = np.asarray(mu_vars) ** 2 try: - pars, errs, cov = hpge_fit_E_scale(mus, mu_vars, fitted_peaks_keV, deg=self.deg, + pars, errs, cov = cal.hpge_fit_E_scale(mus, mu_vars, fitted_peaks_keV, deg=self.deg, fixed = self.fixed) except ValueError: log.error("Failed to fit enough peaks to get accurate calibration") return None, None, None, results # Invert the E scale fit to get a calibration function - self.pars, self.errs, self.cov = hpge_fit_E_cal_func(mus, mu_vars, fitted_peaks_keV, pars, deg=self.deg, + self.pars, self.errs, self.cov = cal.hpge_fit_E_cal_func(mus, mu_vars, fitted_peaks_keV, pars, deg=self.deg, fixed = self.fixed) uncal_fwhms = [ @@ -1342,7 +1342,7 @@ def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsiz fig, (ax1, ax2) = plt.subplots( 2, 1, sharex=True, gridspec_kw={"height_ratios": [3, 1]} ) - ax1.errorbar(fwhm_peaks, fit_fwhms, yerr=fit_dfwhms, marker="x", ls=0, c="black") + ax1.errorbar(fwhm_peaks, fit_fwhms, yerr=fit_dfwhms, marker="x", ls=" ", c="black") fwhm_slope_bins = np.arange(erange[0], erange[1], 10) From 3ecd027f9950f67cf6f3ff72a2408503559463d2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 1 Dec 2023 11:21:47 +0000 Subject: [PATCH 053/191] Bump pypa/gh-action-pypi-publish from 1.8.10 to 1.8.11 Bumps [pypa/gh-action-pypi-publish](https://github.com/pypa/gh-action-pypi-publish) from 1.8.10 to 1.8.11. - [Release notes](https://github.com/pypa/gh-action-pypi-publish/releases) - [Commits](https://github.com/pypa/gh-action-pypi-publish/compare/v1.8.10...v1.8.11) --- updated-dependencies: - dependency-name: pypa/gh-action-pypi-publish dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/distribution.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/distribution.yml b/.github/workflows/distribution.yml index 17749536a..add54cba1 100644 --- a/.github/workflows/distribution.yml +++ b/.github/workflows/distribution.yml @@ -38,6 +38,6 @@ jobs: name: artifact path: dist - - uses: pypa/gh-action-pypi-publish@v1.8.10 + - uses: pypa/gh-action-pypi-publish@v1.8.11 with: password: ${{ secrets.pypi_password }} From 4bada36aa579b6d62d74ade09d9a8b47e6a4a1f0 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 3 Dec 2023 22:16:28 +0100 Subject: [PATCH 054/191] bugfix for loading --- src/pygama/pargen/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pygama/pargen/utils.py b/src/pygama/pargen/utils.py index 61f9dd6cf..8888e0d43 100644 --- a/src/pygama/pargen/utils.py +++ b/src/pygama/pargen/utils.py @@ -76,14 +76,14 @@ def load_data( file_df["run_timestamp"] = np.full(len(file_df), tstamp, dtype=object) params.append("run_timestamp") for param in params: - if param not in df: + if param not in file_df: file_df[param] = lh5.load_nda(tfiles, [param], lh5_path)[param] if threshold is not None: mask = file_df[cal_energy_param] > threshold - file_df.drop(np.where(mask)[0], inplace=True) + file_df.drop(np.where(~mask)[0], inplace=True) else: - mask = np.zeros(len(file_df), dtype=bool) - masks = np.append(masks, ~mask) + mask = np.ones(len(file_df), dtype=bool) + masks = np.append(masks, mask) df.append(file_df) all_files += tfiles From 41acd59dc1500b6e1750cf4ab816156c21c4e6c9 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 3 Dec 2023 22:16:56 +0100 Subject: [PATCH 055/191] moved dt param update out of aoe to workflow --- src/pygama/pargen/AoE_cal.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pygama/pargen/AoE_cal.py b/src/pygama/pargen/AoE_cal.py index 4db0cfbaa..bbe0ee171 100644 --- a/src/pygama/pargen/AoE_cal.py +++ b/src/pygama/pargen/AoE_cal.py @@ -1320,7 +1320,6 @@ def __init__( self.dt_cut = dt_cut self.dep_acc = dep_acc if self.dt_cut is not None: - self.update_cal_dicts(dt_cut["cut"]) self.dt_cut_param = dt_cut["out_param"] self.fit_selection = f"{self.selection_string} & {self.dt_cut_param}" self.dt_cut_hard = dt_cut["hard"] From b1f512bcbe85b5ed89aa35c3c9d2398e7645a56c Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 3 Dec 2023 22:23:20 +0100 Subject: [PATCH 056/191] added ecal tuning for high stats fitting, fixes for plots to add errors and make sure only valid peaks plotted --- src/pygama/pargen/ecal_th.py | 211 ++++++++++++++++++----------------- 1 file changed, 109 insertions(+), 102 deletions(-) diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py index d7e5be508..2d13c56b6 100644 --- a/src/pygama/pargen/ecal_th.py +++ b/src/pygama/pargen/ecal_th.py @@ -1254,14 +1254,18 @@ def bin_stability(ecal_class, data, time_slice=180, energy_range=[2585, 2660]): def plot_cal_fit(ecal_class, data, figsize=[12, 8], fontsize=12, erange=[200, 2700]): - pk_pars = ecal_class.results["pk_pars"] + valid_fits = ecal_class.results["pk_validities"] + pk_pars = ecal_class.results["pk_pars"][valid_fits] + pk_errs = ecal_class.results["pk_errors"][valid_fits] fitted_peaks = ecal_class.results["got_peaks_keV"] - pk_errs = ecal_class.results["pk_errors"] fitted_gof_funcs = [] for i, peak in enumerate(ecal_class.glines): if peak in fitted_peaks: fitted_gof_funcs.append(ecal_class.gof_funcs[i]) + + fitted_gof_funcs=np.array(fitted_gof_funcs)[valid_fits] + fitted_peaks = np.array(fitted_peaks)[valid_fits] mus = [ pgf.get_mu_func(func_i, pars_i) if pars_i is not None else np.nan @@ -1339,118 +1343,121 @@ def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsiz fit_fwhms = np.delete(fwhms, [indexes]) fit_dfwhms = np.delete(dfwhms, [indexes]) + + fig, (ax1, ax2) = plt.subplots( 2, 1, sharex=True, gridspec_kw={"height_ratios": [3, 1]} ) - ax1.errorbar(fwhm_peaks, fit_fwhms, yerr=fit_dfwhms, marker="x", ls=" ", c="black") - - fwhm_slope_bins = np.arange(erange[0], erange[1], 10) - - qbb_line_vx = [2039.0, 2039.0] - qbb_line_vy = [ - 0.9 - * np.nanmin( - fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"]) - ), - np.nanmax( + if len(np.where((~np.isnan(fit_fwhms))& (~np.isnan(fit_dfwhms)))[0])>0: + ax1.errorbar(fwhm_peaks, fit_fwhms, yerr=fit_dfwhms, marker="x", ls=" ", c="black") + + fwhm_slope_bins = np.arange(erange[0], erange[1], 10) + + qbb_line_vx = [2039.0, 2039.0] + qbb_line_vy = [ + 0.9 + * np.nanmin( + fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"]) + ), + np.nanmax( + [ + ecal_class.fwhm_fit_linear["Qbb_fwhm_in_keV"], + ecal_class.fwhm_fit_quadratic["Qbb_fwhm_in_keV"], + ] + ), + ] + qbb_line_hx = [erange[0], 2039.0] + + ax1.plot( + fwhm_slope_bins, + fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"]), + lw=1, + c="g", + label=f'linear, Qbb fwhm: {ecal_class.fwhm_fit_linear["Qbb_fwhm_in_keV"]:1.2f} +- {ecal_class.fwhm_fit_linear["Qbb_fwhm_err_in_keV"]:1.2f} keV', + ) + ax1.plot( + fwhm_slope_bins, + fwhm_quadratic.func( + fwhm_slope_bins, *ecal_class.fwhm_fit_quadratic["parameters"] + ), + lw=1, + c="b", + label=f'quadratic, Qbb fwhm: {ecal_class.fwhm_fit_quadratic["Qbb_fwhm_in_keV"]:1.2f} +- {ecal_class.fwhm_fit_quadratic["Qbb_fwhm_err_in_keV"]:1.2f} keV', + ) + ax1.plot( + qbb_line_hx, [ ecal_class.fwhm_fit_linear["Qbb_fwhm_in_keV"], + ecal_class.fwhm_fit_linear["Qbb_fwhm_in_keV"], + ], + lw=1, + c="r", + ls="--", + ) + ax1.plot( + qbb_line_hx, + [ ecal_class.fwhm_fit_quadratic["Qbb_fwhm_in_keV"], - ] - ), - ] - qbb_line_hx = [erange[0], 2039.0] - - ax1.plot( - fwhm_slope_bins, - fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"]), - lw=1, - c="g", - label=f'linear, Qbb fwhm: {ecal_class.fwhm_fit_linear["Qbb_fwhm_in_keV"]:1.2f} +- {ecal_class.fwhm_fit_linear["Qbb_fwhm_err_in_keV"]:1.2f} keV', - ) - ax1.plot( - fwhm_slope_bins, - fwhm_quadratic.func( - fwhm_slope_bins, *ecal_class.fwhm_fit_quadratic["parameters"] - ), - lw=1, - c="b", - label=f'quadratic, Qbb fwhm: {ecal_class.fwhm_fit_quadratic["Qbb_fwhm_in_keV"]:1.2f} +- {ecal_class.fwhm_fit_quadratic["Qbb_fwhm_err_in_keV"]:1.2f} keV', - ) - ax1.plot( - qbb_line_hx, - [ - ecal_class.fwhm_fit_linear["Qbb_fwhm_in_keV"], - ecal_class.fwhm_fit_linear["Qbb_fwhm_in_keV"], - ], - lw=1, - c="r", - ls="--", - ) - ax1.plot( - qbb_line_hx, - [ - ecal_class.fwhm_fit_quadratic["Qbb_fwhm_in_keV"], - ecal_class.fwhm_fit_quadratic["Qbb_fwhm_in_keV"], - ], - lw=1, - c="r", - ls="--", - ) - ax1.plot(qbb_line_vx, qbb_line_vy, lw=1, c="r", ls="--") + ecal_class.fwhm_fit_quadratic["Qbb_fwhm_in_keV"], + ], + lw=1, + c="r", + ls="--", + ) + ax1.plot(qbb_line_vx, qbb_line_vy, lw=1, c="r", ls="--") - ax1.legend(loc="upper left", frameon=False) - if np.isnan(ecal_class.fwhm_fit_linear["parameters"]).all(): - [ - 0.9 * np.nanmin(fit_fwhms), - 1.1 * np.nanmax(fit_fwhms), - ] - else: - ax1.set_ylim( + ax1.legend(loc="upper left", frameon=False) + if np.isnan(ecal_class.fwhm_fit_linear["parameters"]).all(): [ - 0.9 - * np.nanmin( - fwhm_linear.func( - fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"] - ) - ), - 1.1 - * np.nanmax( - fwhm_linear.func( - fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"] - ) - ), + 0.9 * np.nanmin(fit_fwhms), + 1.1 * np.nanmax(fit_fwhms), ] + else: + ax1.set_ylim( + [ + 0.9 + * np.nanmin( + fwhm_linear.func( + fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"] + ) + ), + 1.1 + * np.nanmax( + fwhm_linear.func( + fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"] + ) + ), + ] + ) + ax1.set_xlim(erange) + ax1.set_ylabel("FWHM energy resolution (keV)") + ax2.plot( + fwhm_peaks, + ( + fit_fwhms + - fwhm_linear.func(fwhm_peaks, *ecal_class.fwhm_fit_linear["parameters"]) + ) + / fit_dfwhms, + lw=0, + marker="x", + c="g", ) - ax1.set_xlim(erange) - ax1.set_ylabel("FWHM energy resolution (keV)") - ax2.plot( - fwhm_peaks, - ( - fit_fwhms - - fwhm_linear.func(fwhm_peaks, *ecal_class.fwhm_fit_linear["parameters"]) - ) - / fit_dfwhms, - lw=0, - marker="x", - c="g", - ) - ax2.plot( - fwhm_peaks, - ( - fit_fwhms - - fwhm_quadratic.func( - fwhm_peaks, *ecal_class.fwhm_fit_quadratic["parameters"] + ax2.plot( + fwhm_peaks, + ( + fit_fwhms + - fwhm_quadratic.func( + fwhm_peaks, *ecal_class.fwhm_fit_quadratic["parameters"] + ) ) + / fit_dfwhms, + lw=0, + marker="x", + c="b", ) - / fit_dfwhms, - lw=0, - marker="x", - c="b", - ) - ax2.plot(erange, [0, 0], color="black", lw=0.5) - ax2.set_xlabel("Energy (keV)") - ax2.set_ylabel("Normalised Residuals") + ax2.plot(erange, [0, 0], color="black", lw=0.5) + ax2.set_xlabel("Energy (keV)") + ax2.set_ylabel("Normalised Residuals") plt.tight_layout() plt.close() return fig From b6f95761d04142dd11a4b2a6315e138697d718de Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 3 Dec 2023 21:25:24 +0000 Subject: [PATCH 057/191] style: pre-commit fixes --- src/pygama/pargen/ecal_th.py | 105 ++++++++++++++++---------------- src/pygama/pargen/energy_cal.py | 8 +-- 2 files changed, 55 insertions(+), 58 deletions(-) diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py index 2d13c56b6..d37b74f77 100644 --- a/src/pygama/pargen/ecal_th.py +++ b/src/pygama/pargen/ecal_th.py @@ -555,7 +555,7 @@ def calibrate_parameter(self, data): log.info(f"Calibration pars are {self.pars}") if ~np.isnan(self.pars).all(): self.fit_energy_res() - self.hit_dict = {self.cal_energy_param:self.gen_pars_dict()} + self.hit_dict = {self.cal_energy_param: self.gen_pars_dict()} data[self.cal_energy_param] = pgf.poly(data[self.energy_param], self.pars) def fill_plot_dict(self, data, plot_dict={}): @@ -673,9 +673,9 @@ def __init__( plot_options={}, simplex=False, tail_weight=20, - cal_energy_param = None, + cal_energy_param=None, deg=2, - fixed = None, + fixed=None, ): self.energy_param = energy_param if cal_energy_param is None: @@ -694,7 +694,7 @@ def __init__( self.pars = [1, 0] self.tail_weight = tail_weight self.fixed = fixed - self.deg=deg + self.deg = deg def get_results_dict(self, data): if self.results: @@ -738,28 +738,24 @@ def get_results_dict(self, data): return {} def run_fit(self, data): - hist, bins, var = pgh.get_hist(data.query(self.selection_string)[self.energy_param], - range=(np.amin(self.glines)*0.8,np.amax(self.glines)*1.1), dx=0.5 ) - (got_peak_locations, - got_peak_energies, - roughpars) = cal.hpge_get_E_peaks( - hist, - bins, - var, - np.array([1,0]), - n_sigma=3, - peaks_keV=self.glines) - - found_mask = np.in1d(self.glines,got_peak_energies) + hist, bins, var = pgh.get_hist( + data.query(self.selection_string)[self.energy_param], + range=(np.amin(self.glines) * 0.8, np.amax(self.glines) * 1.1), + dx=0.5, + ) + (got_peak_locations, got_peak_energies, roughpars) = cal.hpge_get_E_peaks( + hist, bins, var, np.array([1, 0]), n_sigma=3, peaks_keV=self.glines + ) + + found_mask = np.in1d(self.glines, got_peak_energies) self.results["got_peaks_locs"] = got_peak_locations self.results["got_peaks_keV"] = got_peak_energies - + log.info(f"{len(got_peak_energies)} peaks obtained:") log.info(f"\t Energy | Position ") for i, (Li, Ei) in enumerate(zip(got_peak_locations, got_peak_energies)): log.info(f"\t{i}".ljust(4) + str(Ei).ljust(9) + f"| {Li:g}".ljust(5)) - self.glines = np.array(self.glines)[found_mask].tolist() self.range_keV = np.array(self.range_keV)[found_mask].tolist() self.binning = np.array(self.binning)[found_mask].tolist() @@ -821,23 +817,21 @@ def run_fit(self, data): self.results["pk_validities"] = valid_pks # Drop failed fits - self.results["fitted_keV"] = np.asarray(self.glines)[ - valid_pks - ] - + self.results["fitted_keV"] = np.asarray(self.glines)[valid_pks] def fit_peaks(self, data): log.debug(f"Fitting {self.energy_param}") try: self.run_fit(data) - + valid_pks = self.results["pk_validities"] fitted_peaks_keV = self.results["fitted_keV"] - pk_pars = np.asarray(self.results["pk_pars"], dtype=object)[valid_pks] # ragged + pk_pars = np.asarray(self.results["pk_pars"], dtype=object)[ + valid_pks + ] # ragged pk_errors = np.asarray(self.results["pk_errors"], dtype=object)[valid_pks] pk_covs = np.asarray(self.results["pk_covs"], dtype=object)[valid_pks] pk_funcs = np.asarray(self.funcs)[valid_pks] - log.info(f"{len(np.where(valid_pks)[0])} peaks fitted:") for i, (Ei, parsi, errorsi, covsi, func_i) in enumerate( @@ -847,13 +841,11 @@ def fit_peaks(self, data): parsi = np.asarray(parsi, dtype=float) errorsi = np.asarray(errorsi, dtype=float) covsi = np.asarray(covsi, dtype=float) - + log.info(f"\tEnergy: {str(Ei)}") log.info(f"\t\tParameter | Value +/- Sigma ") for vari, pari, errorsi in zip(varnames, parsi, errorsi): - log.info( - f'\t\t{str(vari):<12} | {pari: 8.2f} +/- {errorsi:.2f}' - ) + log.info(f"\t\t{str(vari):<12} | {pari: 8.2f} +/- {errorsi:.2f}") cal_fwhms = [ pgf.get_fwhm_func(func_i, pars_i, cov=covs_i) @@ -880,18 +872,17 @@ def fit_peaks(self, data): except: self.results = {} log.debug(f"high stats fitting failed") - + def update_calibration(self, data): log.debug(f"Calibrating {self.energy_param}") self.run_fit(data) - + valid_pks = self.results["pk_validities"] fitted_peaks_keV = self.results["fitted_keV"] pk_pars = np.asarray(self.results["pk_pars"], dtype=object)[valid_pks] # ragged pk_errors = np.asarray(self.results["pk_errors"], dtype=object)[valid_pks] pk_covs = np.asarray(self.results["pk_covs"], dtype=object)[valid_pks] pk_funcs = np.asarray(self.funcs)[valid_pks] - log.info(f"{len(np.where(valid_pks)[0])} peaks fitted:") for i, (Ei, parsi, errorsi, covsi, func_i) in enumerate( @@ -905,9 +896,7 @@ def update_calibration(self, data): log.info(f"\tEnergy: {str(Ei)}") log.info(f"\t\tParameter | Value +/- Sigma ") for vari, pari, errorsi in zip(varnames, parsi, errorsi): - log.info( - f'\t\t{str(vari):<12} | {pari: 8.2f} +/- {errorsi:.2f}' - ) + log.info(f"\t\t{str(vari):<12} | {pari: 8.2f} +/- {errorsi:.2f}") # Drop failed fits mus = [ @@ -920,16 +909,18 @@ def update_calibration(self, data): mu_vars = np.asarray(mu_vars) ** 2 try: - pars, errs, cov = cal.hpge_fit_E_scale(mus, mu_vars, fitted_peaks_keV, deg=self.deg, - fixed = self.fixed) + pars, errs, cov = cal.hpge_fit_E_scale( + mus, mu_vars, fitted_peaks_keV, deg=self.deg, fixed=self.fixed + ) except ValueError: log.error("Failed to fit enough peaks to get accurate calibration") return None, None, None, results # Invert the E scale fit to get a calibration function - self.pars, self.errs, self.cov = cal.hpge_fit_E_cal_func(mus, mu_vars, fitted_peaks_keV, pars, deg=self.deg, - fixed = self.fixed) - + self.pars, self.errs, self.cov = cal.hpge_fit_E_cal_func( + mus, mu_vars, fitted_peaks_keV, pars, deg=self.deg, fixed=self.fixed + ) + uncal_fwhms = [ pgf.get_fwhm_func(func_i, pars_i, cov=covs_i) for func_i, pars_i, covs_i in zip(pk_funcs, pk_pars, pk_covs) @@ -951,12 +942,12 @@ def update_calibration(self, data): for i, (Ei, fwhm, fwhme) in enumerate( zip(fitted_peaks_keV, cal_fwhms, cal_fwhms_errs) ): - log.info( - f"\t{str(i):<4}{str(Ei):<9}| {f'{fwhm:.2f}+-{fwhme:.2f}':<10} keV" - ) + log.info(f"\t{str(i):<4}{str(Ei):<9}| {f'{fwhm:.2f}+-{fwhme:.2f}':<10} keV") self.fit_energy_res() if self.cal_energy_param == self.energy_param: - log.info("Warning dataframe energy will be overwritten as cal energy and input energy have same name") + log.info( + "Warning dataframe energy will be overwritten as cal energy and input energy have same name" + ) self.hit_dict = {self.cal_energy_param: self.gen_pars_dict()} data[self.cal_energy_param] = pgf.poly(data[self.energy_param], self.pars) log.debug(f"high stats calibration successful") @@ -1263,8 +1254,8 @@ def plot_cal_fit(ecal_class, data, figsize=[12, 8], fontsize=12, erange=[200, 27 for i, peak in enumerate(ecal_class.glines): if peak in fitted_peaks: fitted_gof_funcs.append(ecal_class.gof_funcs[i]) - - fitted_gof_funcs=np.array(fitted_gof_funcs)[valid_fits] + + fitted_gof_funcs = np.array(fitted_gof_funcs)[valid_fits] fitted_peaks = np.array(fitted_peaks)[valid_fits] mus = [ @@ -1343,13 +1334,13 @@ def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsiz fit_fwhms = np.delete(fwhms, [indexes]) fit_dfwhms = np.delete(dfwhms, [indexes]) - - fig, (ax1, ax2) = plt.subplots( 2, 1, sharex=True, gridspec_kw={"height_ratios": [3, 1]} ) - if len(np.where((~np.isnan(fit_fwhms))& (~np.isnan(fit_dfwhms)))[0])>0: - ax1.errorbar(fwhm_peaks, fit_fwhms, yerr=fit_dfwhms, marker="x", ls=" ", c="black") + if len(np.where((~np.isnan(fit_fwhms)) & (~np.isnan(fit_dfwhms)))[0]) > 0: + ax1.errorbar( + fwhm_peaks, fit_fwhms, yerr=fit_dfwhms, marker="x", ls=" ", c="black" + ) fwhm_slope_bins = np.arange(erange[0], erange[1], 10) @@ -1357,7 +1348,9 @@ def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsiz qbb_line_vy = [ 0.9 * np.nanmin( - fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"]) + fwhm_linear.func( + fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"] + ) ), np.nanmax( [ @@ -1370,7 +1363,9 @@ def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsiz ax1.plot( fwhm_slope_bins, - fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"]), + fwhm_linear.func( + fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"] + ), lw=1, c="g", label=f'linear, Qbb fwhm: {ecal_class.fwhm_fit_linear["Qbb_fwhm_in_keV"]:1.2f} +- {ecal_class.fwhm_fit_linear["Qbb_fwhm_err_in_keV"]:1.2f} keV', @@ -1435,7 +1430,9 @@ def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsiz fwhm_peaks, ( fit_fwhms - - fwhm_linear.func(fwhm_peaks, *ecal_class.fwhm_fit_linear["parameters"]) + - fwhm_linear.func( + fwhm_peaks, *ecal_class.fwhm_fit_linear["parameters"] + ) ) / fit_dfwhms, lw=0, diff --git a/src/pygama/pargen/energy_cal.py b/src/pygama/pargen/energy_cal.py index 952207a9d..a9de8e6d3 100644 --- a/src/pygama/pargen/energy_cal.py +++ b/src/pygama/pargen/energy_cal.py @@ -858,7 +858,7 @@ def hpge_fit_E_scale(mus, mu_vars, Es_keV, deg=0, fixed=None): degree for energy scale fit. deg=0 corresponds to a simple scaling mu = scale * E. Otherwise deg follows the definition in np.polyfit fixed : dict - dict where keys are index of polyfit pars to fix and vals are the value + dict where keys are index of polyfit pars to fix and vals are the value to fix at, can be None to fix at guess value Returns ------- @@ -884,7 +884,7 @@ def hpge_fit_E_scale(mus, mu_vars, Es_keV, deg=0, fixed=None): m = Minuit(c, *poly_pars) if fixed is not None: for idx in list(fixed): - m.fixed[idx]=True + m.fixed[idx] = True m.simplex() m.migrad() m.hesse() @@ -915,7 +915,7 @@ def hpge_fit_E_cal_func(mus, mu_vars, Es_keV, E_scale_pars, deg=0, fixed=None): degree for energy scale fit. deg=0 corresponds to a simple scaling mu = scale * E. Otherwise deg follows the definition in np.polyfit fixed : dict - dict where keys are index of polyfit pars to fix and vals are the value + dict where keys are index of polyfit pars to fix and vals are the value to fix at, can be None to fix at guess value Returns @@ -947,7 +947,7 @@ def hpge_fit_E_cal_func(mus, mu_vars, Es_keV, E_scale_pars, deg=0, fixed=None): m = Minuit(c, *poly_pars) if fixed is not None: for idx in list(fixed): - m.fixed[idx]=True + m.fixed[idx] = True m.simplex() m.migrad() m.hesse() From 14868696367eb43ab7c3afb9add9f720997a9065 Mon Sep 17 00:00:00 2001 From: valerioda Date: Mon, 4 Dec 2023 15:11:46 +0100 Subject: [PATCH 058/191] moved cuts to dataflow script --- src/pygama/pargen/noise_optimization.py | 32 +++++++++---------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/src/pygama/pargen/noise_optimization.py b/src/pygama/pargen/noise_optimization.py index 4e9ff229e..96c2ce65b 100644 --- a/src/pygama/pargen/noise_optimization.py +++ b/src/pygama/pargen/noise_optimization.py @@ -15,6 +15,8 @@ import lgdo import matplotlib as mpl + +mpl.use("agg") import matplotlib.pyplot as plt import numpy as np import pandas as pd @@ -63,16 +65,6 @@ def noise_optimization( """ t0 = time.time() - log.info(f"Select baselines {len(tb_data)}") - if verbose: - print(f"Select baselines {len(tb_data)}") - dsp_data = run_one_dsp(tb_data, dsp_proc_chain) - cut_dict = generate_cuts(dsp_data, parameters=opt_dict["cut_pars"]) - idxs = get_cut_indexes(dsp_data, cut_dict) - tb_data = index_data(tb_data, idxs) - log.info(f"... {len(tb_data)} baselines after cuts") - if verbose: - print(f"... {len(tb_data)} baselines after cuts") samples = np.arange(opt_dict["start"], opt_dict["stop"], opt_dict["step"]) samples_val = np.arange(opt_dict["start"], opt_dict["stop"], opt_dict["step_val"]) @@ -81,7 +73,7 @@ def noise_optimization( res_dict = {} if display > 0: - dsp_data = run_one_dsp(tb_data, dsp_proc_chain, db_dict=par_dsp[lh5_path]) + dsp_data = run_one_dsp(tb_data, dsp_proc_chain, db_dict=par_dsp) psd = np.mean(dsp_data["wf_psd"].values.nda, axis=0) sample_us = float(dsp_data["wf_presum"].dt.nda[0]) / 1000 freq = np.linspace(0, (1 / sample_us) / 2, len(psd)) @@ -93,11 +85,8 @@ def noise_optimization( ax.set_ylabel(f"power spectral density") plot_dict = {} - plot_dict["nopt"] = {} - plot_dict["nopt"]["fft"] = {} - plot_dict["nopt"]["fft"]["frequency"] = freq - plot_dict["nopt"]["fft"]["psd"] = psd - plot_dict["nopt"]["fft"]["fig"] = fig + plot_dict["nopt"] = {"fft": {"frequency": freq, "psd": psd, "fig": fig}} + plt.close() result_dict = {} ene_pars = [par for par in opt_dict_par.keys()] @@ -112,10 +101,13 @@ def noise_optimization( for ene_par in ene_pars: dict_str = opt_dict_par[ene_par]["dict_str"] filter_par = opt_dict_par[ene_par]["filter_par"] - par_dsp[lh5_path][dict_str][filter_par] = f"{x}*us" + if dict_str in par_dsp: + par_dsp[dict_str].update({filter_par: f"{x}*us"}) + else: + par_dsp[dict_str] = {filter_par: f"{x}*us"} t1 = time.time() - dsp_data = run_one_dsp(tb_data, dsp_proc_chain, db_dict=par_dsp[lh5_path]) + dsp_data = run_one_dsp(tb_data, dsp_proc_chain, db_dict=par_dsp) log.info(f"Time to process dsp data {time.time()-t1:.2f} s") if verbose: print(f"Time to process dsp data {time.time()-t1:.2f} s") @@ -158,9 +150,6 @@ def noise_optimization( [result_dict[dict_str][x]["fom_err"] for x in result_dict[dict_str].keys()] ) - print(ene_par, sample_list) - print(ene_par, fom_list) - guess_par = sample_list[np.nanargmin(fom_list)] if verbose: print(f"guess par: {guess_par:.2f} us") @@ -199,6 +188,7 @@ def spl_func(x_val): par_dict_res["best_par_err"] = best_par_err par_dict_res["best_val"] = best_val + filter_par = opt_dict_par[ene_par]["filter_par"] res_dict[dict_str] = { filter_par: f"{best_par:.2f}*us", f"{filter_par}_err": f"{best_par_err:.2f}*us", From d339aa8c8209bea1ef5added27a21d5cb9561525 Mon Sep 17 00:00:00 2001 From: Erin Engelhardt <51338203+erin717@users.noreply.github.com> Date: Tue, 5 Dec 2023 11:30:38 -0500 Subject: [PATCH 059/191] changed 'timestamps' to 'run_timestamps' --- src/pygama/pargen/lq_cal.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/pygama/pargen/lq_cal.py b/src/pygama/pargen/lq_cal.py index 35ad12a5d..c2156eb6a 100644 --- a/src/pygama/pargen/lq_cal.py +++ b/src/pygama/pargen/lq_cal.py @@ -272,23 +272,23 @@ def update_cal_dicts(self, update_dict): def lq_timecorr(self, df, lq_param, output_name="LQ_Timecorr", display=0): """ Calculates the average LQ value for DEP events for each specified run - timestamp. Applies a time normalization based on the average LQ value - in the DEP across all timestamps. + run_timestamp. Applies a time normalization based on the average LQ value + in the DEP across all run_timestamps. """ log.info("Starting LQ time correction") self.timecorr_df = pd.DataFrame( - columns=["timestamp", "mean", "mean_err", "res", "res_err"] + columns=["run_timestamp", "mean", "mean_err", "res", "res_err"] ) try: - if "timestamp" in df: - tstamps = sorted(np.unique(df["timestamp"])) + if "run_timestamp" in df: + tstamps = sorted(np.unique(df["run_timestamp"])) means = [] errors = [] reses = [] res_errs = [] final_tstamps = [] - for tstamp, time_df in df.groupby("timestamp", sort=True): + for tstamp, time_df in df.groupby("run_timestamp", sort=True): try: pars, errs, _, _ = binned_lq_fit( time_df.query(f"{self.selection_string}"), @@ -304,7 +304,7 @@ def lq_timecorr(self, df, lq_param, output_name="LQ_Timecorr", display=0): pd.DataFrame( [ { - "timestamp": tstamp, + "run_timestamp": tstamp, "mean": pars["mu"], "mean_err": errs["mu"], "res": pars["sigma"] / pars["mu"], @@ -325,7 +325,7 @@ def lq_timecorr(self, df, lq_param, output_name="LQ_Timecorr", display=0): pd.DataFrame( [ { - "timestamp": tstamp, + "run_timestamp": tstamp, "mean": np.nan, "mean_err": np.nan, "res": np.nan, @@ -335,7 +335,7 @@ def lq_timecorr(self, df, lq_param, output_name="LQ_Timecorr", display=0): ), ] ) - self.timecorr_df.set_index("timestamp", inplace=True) + self.timecorr_df.set_index("run_timestamp", inplace=True) time_dict = fit_time_means( np.array(self.timecorr_df.index), np.array(self.timecorr_df["mean"]), @@ -343,7 +343,7 @@ def lq_timecorr(self, df, lq_param, output_name="LQ_Timecorr", display=0): ) df[output_name] = df[lq_param] / np.array( - [time_dict[tstamp] for tstamp in df["timestamp"]] + [time_dict[tstamp] for tstamp in df["run_timestamp"]] ) self.update_cal_dicts( { From de891dfb4ccc1aac945d3c17a8dca6c60f07441e Mon Sep 17 00:00:00 2001 From: Erin Engelhardt <51338203+erin717@users.noreply.github.com> Date: Tue, 5 Dec 2023 11:38:45 -0500 Subject: [PATCH 060/191] Removed height_ratios from cut_fit plot --- src/pygama/pargen/lq_cal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pygama/pargen/lq_cal.py b/src/pygama/pargen/lq_cal.py index c2156eb6a..aa3c15806 100644 --- a/src/pygama/pargen/lq_cal.py +++ b/src/pygama/pargen/lq_cal.py @@ -711,7 +711,7 @@ def plot_lq_cut_fit(lq_class, data, figsize=[12, 8], fontsize=12) -> plt.figure: plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize - fig, (ax1, ax2) = plt.subplots(2, 1, height_ratios=(2, 1)) + fig, (ax1, ax2) = plt.subplots(2, 1) try: hist, bins = lq_class.fit_hist From daa915fe771898df39871eb61a767abaf9df20af Mon Sep 17 00:00:00 2001 From: rosannadeckert <90900013+rosannadeckert@users.noreply.github.com> Date: Tue, 5 Dec 2023 20:58:54 +0100 Subject: [PATCH 061/191] Add support for bit compounds definitions in `build_hit()` config file (#531) see #531 --------- Co-authored-by: Rosanna Deckert --- src/pygama/hit/build_hit.py | 24 +++++++++- .../hit/configs/aggregations-hit-config.json | 28 +++++++++++ tests/hit/test_build_hit.py | 47 +++++++++++++++++++ 3 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 tests/hit/configs/aggregations-hit-config.json diff --git a/src/pygama/hit/build_hit.py b/src/pygama/hit/build_hit.py index e531fa872..5cc34202f 100644 --- a/src/pygama/hit/build_hit.py +++ b/src/pygama/hit/build_hit.py @@ -9,7 +9,7 @@ from collections import OrderedDict import numpy as np -from lgdo import LH5Iterator, LH5Store, ls +from lgdo import Array, LH5Iterator, LH5Store, ls log = logging.getLogger(__name__) @@ -131,6 +131,28 @@ def build_hit( outtbl_obj = tbl_obj.eval(cfg["operations"]) + # make high level flags + if "aggregations" in cfg: + for high_lvl_flag, flags in cfg["aggregations"].items(): + flags_list = list(flags.values()) + n_flags = len(flags_list) + if n_flags <= 8: + flag_dtype = np.uint8 + elif n_flags <= 16: + flag_dtype = np.uint16 + elif n_flags <= 32: + flag_dtype = np.uint32 + else: + flag_dtype = np.uint64 + + df_flags = outtbl_obj.get_dataframe(flags_list) + flag_values = df_flags.values.astype(flag_dtype) + + multiplier = 2 ** np.arange(n_flags, dtype=flag_values.dtype) + flag_out = np.dot(flag_values, multiplier) + + outtbl_obj.add_field(high_lvl_flag, Array(flag_out)) + # remove or add columns according to "outputs" in the configuration # dictionary if "outputs" in cfg: diff --git a/tests/hit/configs/aggregations-hit-config.json b/tests/hit/configs/aggregations-hit-config.json new file mode 100644 index 000000000..57237ce80 --- /dev/null +++ b/tests/hit/configs/aggregations-hit-config.json @@ -0,0 +1,28 @@ +{ + "outputs": ["is_valid_rt", "is_valid_t0", "is_valid_tmax", "aggr1", "aggr2"], + "operations": { + "is_valid_rt": { + "expression": "((tp_90-tp_10)>96) & ((tp_50-tp_10)>=16)", + "parameters": {} + }, + "is_valid_t0": { + "expression": "(tp_0_est>47000) & (tp_0_est<55000)", + "parameters": {} + }, + "is_valid_tmax": { + "expression": "(tp_max>47000) & (tp_max<120000)", + "parameters": {} + } + }, + "aggregations": { + "aggr1": { + "bit0": "is_valid_rt", + "bit1": "is_valid_t0", + "bit2": "is_valid_tmax" + }, + "aggr2": { + "bit0": "is_valid_t0", + "bit1": "is_valid_tmax" + } + } +} diff --git a/tests/hit/test_build_hit.py b/tests/hit/test_build_hit.py index e497a1742..924928da6 100644 --- a/tests/hit/test_build_hit.py +++ b/tests/hit/test_build_hit.py @@ -98,6 +98,53 @@ def test_outputs_specification(dsp_test_file, tmptestdir): assert list(obj.keys()) == ["calE", "AoE", "A_max"] +def test_aggregation_outputs(dsp_test_file, tmptestdir): + outfile = f"{tmptestdir}/LDQTA_r117_20200110T105115Z_cal_geds_hit.lh5" + + build_hit( + dsp_test_file, + outfile=outfile, + hit_config=f"{config_dir}/aggregations-hit-config.json", + wo_mode="overwrite", + ) + + sto = LH5Store() + obj, _ = sto.read_object("/geds/hit", outfile) + assert list(obj.keys()) == [ + "is_valid_rt", + "is_valid_t0", + "is_valid_tmax", + "aggr1", + "aggr2", + ] + + df = store.load_dfs( + outfile, + ["is_valid_rt", "is_valid_t0", "is_valid_tmax", "aggr1", "aggr2"], + "geds/hit/", + ) + + # aggr1 consists of 3 bits --> max number can be 7, aggr2 consists of 2 bits so max number can be 3 + assert not (df["aggr1"] > 7).any() + assert not (df["aggr2"] > 3).any() + + def get_bit(x, n): + """bit numbering from right to left, starting with bit 0""" + return x & (1 << n) != 0 + + df["bit0_check"] = df.apply(lambda row: get_bit(row["aggr1"], 0), axis=1) + are_identical = df["bit0_check"].equals(df.is_valid_rt) + assert are_identical + + df["bit1_check"] = df.apply(lambda row: get_bit(row["aggr1"], 1), axis=1) + are_identical = df["bit1_check"].equals(df.is_valid_t0) + assert are_identical + + df["bit2_check"] = df.apply(lambda row: get_bit(row["aggr1"], 2), axis=1) + are_identical = df["bit2_check"].equals(df.is_valid_tmax) + assert are_identical + + def test_build_hit_spms_basic(dsp_test_file_spm, tmptestdir): out_file = f"{tmptestdir}/L200-comm-20211130-phy-spms_hit.lh5" build_hit( From 41e6f614a184d899dc1c2113f612f1ecfaec8c01 Mon Sep 17 00:00:00 2001 From: Danielle Schaper Date: Thu, 7 Dec 2023 17:00:30 -0700 Subject: [PATCH 062/191] Update setup.cfg --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 58bc7bbef..6660e83eb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -35,7 +35,7 @@ install_requires = dspeed>=1.1 h5py>=3.2 iminuit - legend-daq2lh5>=1.0 + legend-daq2lh5>=1.1.0 legend-pydataobj>=1.3 matplotlib numba!=0.53.*,!=0.54.*,!=0.57 From d2680d835872ba104919a2ce5ca44b17235cf66c Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 8 Dec 2023 10:10:59 +0100 Subject: [PATCH 063/191] [ci] bump actions/checkout version --- .github/workflows/main.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 92ae646bb..a20a33294 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -24,7 +24,7 @@ jobs: os: [ubuntu-latest, macOS-latest] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: @@ -41,7 +41,7 @@ jobs: name: Calculate and upload test coverage runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: fetch-depth: 2 - uses: actions/setup-python@v2 @@ -63,7 +63,7 @@ jobs: name: Build documentation runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: fetch-depth: 0 - uses: actions/setup-python@v2 From 784353506db9dda57c2c378a6fa76407ee5697e1 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 8 Dec 2023 11:01:07 +0100 Subject: [PATCH 064/191] [ci] remove ignore section in dependabot.yml --- .github/dependabot.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 368d295a8..f9ecf576e 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -5,5 +5,3 @@ updates: directory: "/" schedule: interval: "monthly" - ignore: - - dependency-name: "actions/*" From 668f371124d045cde762d4116de4fd4f5d2e7c52 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 8 Dec 2023 10:01:26 +0000 Subject: [PATCH 065/191] Bump actions/checkout from 3 to 4 Bumps [actions/checkout](https://github.com/actions/checkout) from 3 to 4. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/distribution.yml | 2 +- .github/workflows/main.yml | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/distribution.yml b/.github/workflows/distribution.yml index add54cba1..8a702f38e 100644 --- a/.github/workflows/distribution.yml +++ b/.github/workflows/distribution.yml @@ -13,7 +13,7 @@ jobs: dist: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a20a33294..74647dc14 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -24,7 +24,7 @@ jobs: os: [ubuntu-latest, macOS-latest] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: @@ -41,7 +41,7 @@ jobs: name: Calculate and upload test coverage runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 2 - uses: actions/setup-python@v2 @@ -63,7 +63,7 @@ jobs: name: Build documentation runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 - uses: actions/setup-python@v2 From 1bc14cfd91b273e7260344282c385ba356d045bd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 8 Dec 2023 10:01:29 +0000 Subject: [PATCH 066/191] Bump actions/setup-python from 2 to 5 Bumps [actions/setup-python](https://github.com/actions/setup-python) from 2 to 5. - [Release notes](https://github.com/actions/setup-python/releases) - [Commits](https://github.com/actions/setup-python/compare/v2...v5) --- updated-dependencies: - dependency-name: actions/setup-python dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/main.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a20a33294..fc2331eb0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -26,7 +26,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Get dependencies and install the package @@ -44,7 +44,7 @@ jobs: - uses: actions/checkout@v3 with: fetch-depth: 2 - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v5 with: python-version: '3.10' @@ -66,7 +66,7 @@ jobs: - uses: actions/checkout@v3 with: fetch-depth: 0 - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v5 with: python-version: '3.10' - name: Setup build environment From f54ee05303c342f8f590441861fe7347d4308b0b Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 8 Dec 2023 16:50:19 +0100 Subject: [PATCH 067/191] [docs] update developer's guide --- docs/source/developer.rst | 195 ++++++++------------------------------ 1 file changed, 37 insertions(+), 158 deletions(-) diff --git a/docs/source/developer.rst b/docs/source/developer.rst index 5d3c85dcf..c27d05def 100644 --- a/docs/source/developer.rst +++ b/docs/source/developer.rst @@ -1,14 +1,23 @@ Developer's guide ================= +.. note:: + + The https://learn.scientific-python.org webpages are an extremely valuable + learning resource for Python software developer. The reader is referred to + that for any detail not covered in the following guide. + The following rules and conventions have been established for the package development and are enforced throughout the entire code base. Merge requests that do not comply to the following directives will be rejected. To start developing :mod:`pygama`, fork the remote repository to your personal -GitHub account (see `About Forks `_). +GitHub account (see `About Forks +`_). If you have not set up your ssh keys on the computer you will be working on, -please follow `GitHub's instructions `_. Once you have your own fork, you can clone it via +please follow `GitHub's instructions +`_. +Once you have your own fork, you can clone it via (replace "yourusername" with your GitHub username): .. code-block:: console @@ -21,7 +30,20 @@ dependencies and can be installed via pip by running: .. code-block:: console $ cd pygama - $ pip install '.[all]' # single quotes are not needed on bash + $ pip install -e '.[all]' # single quotes are not needed on bash + +.. important:: + + Pip's ``--editable | -e`` flag let's you install the package in "developer + mode", meaning that any change to the source code will be directly + propagated to the installed package and importable in scripts. + +.. tip:: + + It is strongly recommended to work inside a virtual environment, which + guarantees reproductibility and isolation. For more details, see + `learn.scientific-python.org + `_. Code style ---------- @@ -29,13 +51,6 @@ Code style * All functions and methods (arguments and return types) must be `type-annotated `_. Type annotations for variables like class attributes are also highly appreciated. - Do not forget to - - .. code-block:: python - - from __future__ import annotations - - at the top of a module implementation. * Messaging to the user is managed through the :mod:`logging` module. Do not add :func:`print` statements. To make a logging object available in a module, add this: @@ -48,7 +63,8 @@ Code style at the top. In general, try to keep the number of :func:`logging.debug` calls low and use informative messages. :func:`logging.info` calls should be reserved for messages from high-level routines (like - :func:`pygama.dsp.build_dsp`). Good code is never too verbose. + :func:`pygama.dsp.build_dsp`) and very sporadic. Good code is never too + verbose. * If an error condition leading to undefined behavior occurs, raise an exception. try to find the most suitable between the `built-in exceptions `_, otherwise ``raise @@ -63,18 +79,19 @@ The pre-commit tool is able to identify common style problems and automatically fix them, wherever possible. Configured hooks are listed in the ``.pre-commit-config.yaml`` file at the project root folder. They are run remotely on the GitHub repository through the `pre-commit bot -`_, but can also be run locally before submitting a -pull request (recommended): +`_, but should also be run locally before submitting a +pull request: .. code-block:: console $ cd pygama $ pip install '.[test]' $ pre-commit run --all-files # analyse the source code and fix it wherever possible - $ pre-commit install # install a Git pre-commit hook (optional but recommended) + $ pre-commit install # install a Git pre-commit hook (strongly recommended) -For a more comprehensive guide, check out the `Scikit-HEP documentation about -code style `_. +For a more comprehensive guide, check out the `learn.scientific-python.org +documentation about code style +`_. Testing ------- @@ -82,26 +99,9 @@ Testing * The :mod:`pygama` test suite is available below ``tests/``. We use `pytest `_ to run tests and analyze their output. As a starting point to learn how to write good tests, reading of `the - Scikit-HEP Intro to testing `_ is - recommended. Refer to `pytest's how-to guides - `_ for a complete - overview. -* :mod:`pygama` tests belong to three categories: - - :unit tests: Should ensure the correct behaviour of each function - independently, possibly without relying on other :mod:`pygama` methods. - The existence of these micro-tests makes it possible to promptly identify - and fix the source of a bug. An example of this are tests for each single - DSP processor - - :integration tests: Should ensure that independent parts of the code base - work well together and are integrated in a cohesive framework. An example - of this is testing whether :func:`moduleA.process_obj` is able to - correctly handle :class:`moduleB.DataObj` - - :functional tests: High-level tests of realistic applications. An example is - testing whether the processing of a real or synthetic data sample yields - consistent output parameters + relevant learn.scientific-python.org webpage + `_ is + recommended. * Unit tests are automatically run for every push event and pull request to the remote Git repository on a remote server (currently handled by GitHub @@ -125,127 +125,6 @@ Testing $ pytest --cov=pygama -Testing Numba-Wrapped Functions -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -When using Numba to vectorize Python functions, the Python version of the function -does not, by default, get directly tested, but the Numba version instead. In -this case, we need to unwrap the Numba function and test the pure Python version. -With various processors in :mod:`pygama.dsp.processors`, this means that testing -and triggering the code coverage requires this unwrapping. - -Within the testing suite, we use the :func:`@pytest.fixture()` -decorator to include a helper function called ``compare_numba_vs_python`` that -can be used in any test. This function runs both the Numba and pure Python versions -of a function, asserts that they are equal up to floating precision, and returns the -output value. - -As an example, we show a snippet from the test for -:func:`pygama.dsp.processors.fixed_time_pickoff`, a processor which uses the -:func:`@numba.guvectorize()` decorator. - -.. code-block:: python - - def test_fixed_time_pickoff(compare_numba_vs_python): - """Testing function for the fixed_time_pickoff processor.""" - - len_wf = 20 - - # test for nan if w_in has a nan - w_in = np.ones(len_wf) - w_in[4] = np.nan - assert np.isnan(compare_numba_vs_python(fixed_time_pickoff, w_in, 1, ord("i"))) - -In the assertion that the output is what we expect, we use -``compare_numba_vs_python(fixed_time_pickoff, w_in, 1, ord("i"))`` in place of -``fixed_time_pickoff(w_in, 1, ord("i"))``. In general, the replacement to make is -``func(*inputs)`` becomes ``compare_numba_vs_python(func, *inputs)``. - -Note, that in cases of testing for the raising of errors, it is recommended -to instead run the function twice: once with the Numba version, and once using the -:func:`inspect.unwrap` function. We again show a snippet from the test for -:func:`pygama.dsp.processors.fixed_time_pickoff` below. We include the various -required imports in the snippet for verbosity. - -.. code-block:: python - - import inspect - - import numpy as np - import pytest - - from pygama.dsp.errors import DSPFatal - from pygama.dsp.processors import fixed_time_pickoff - - def test_fixed_time_pickoff(compare_numba_vs_python): - "skipping parts of function..." - # test for DSPFatal errors being raised - # noninteger t_in with integer interpolation - with pytest.raises(DSPFatal): - w_in = np.ones(len_wf) - fixed_time_pickoff(w_in, 1.5, ord("i")) - - with pytest.raises(DSPFatal): - a_out = np.empty(len_wf) - inspect.unwrap(fixed_time_pickoff)(w_in, 1.5, ord("i"), a_out) - -In this case, the general idea is to use :func:`pytest.raises` twice, once with -``func(*inputs)``, and again with ``inspect.unwrap(func)(*inputs)``. - -Testing Factory Functions that Return Numba-Wrapped Functions -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -As in the previous section, we also have processors that are first initialized -with a factory function, which then returns a callable Numba-wrapped function. -In this case, there is a slightly different way of testing the function to ensure -full code coverage when using ``compare_numba_vs_python``, as the function -signature is generally different. - -As an example, we show a snippet from the test for -:func:`pygama.dsp.processors.dwt.discrete_wavelet_transform`, a processor which uses -a factory function to return a function wrapped by the -:func:`@numba.guvectorize()` decorator. - -.. code-block:: python - - import numpy as np - import pytest - - from pygama.dsp.errors import DSPFatal - from pygama.dsp.processors import discrete_wavelet_transform - - def test_discrete_wavelet_transform(compare_numba_vs_python): - """Testing function for the discrete_wavelet_transform processor.""" - - # set up values to use for each test case - len_wf_in = 16 - wave_type = 'haar' - level = 2 - len_wf_out = 4 - - # ensure the DSPFatal is raised for a negative level - with pytest.raises(DSPFatal): - discrete_wavelet_transform(wave_type, -1) - - # ensure that a valid input gives the expected output - w_in = np.ones(len_wf_in) - w_out = np.empty(len_wf_out) - w_out_expected = np.ones(len_wf_out) * 2**(level / 2) - - dwt_func = discrete_wavelet_transform(wave_type, level) - assert np.allclose( - compare_numba_vs_python(dwt_func, w_in, w_out), - w_out_expected, - ) - ## rest of test function is truncated in this example - -In this case, the error is raised outside of the Numba-wrapped function, and -we only need to test for the error once. For the comparison of the calculated -values to expectation, we must initialize the output array and pass it to the -list of inputs that should be used in the comparison. This is different than -the previous section, where we are instead now updating the outputted values -in place. - Documentation ------------- @@ -267,7 +146,7 @@ following: other) must be provided as separate pages in ``docs/source/`` and linked in the table of contents. * Jupyter notebooks should be added to the main Git repository below - ``tutorials/``. + ``docs/source/notebooks``. * Before submitting a pull request, contributors are required to build the documentation locally and resolve and warnings or errors. From 6f0b4309def3650136ffb177cc09a0f4fd341832 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 8 Dec 2023 16:57:07 +0100 Subject: [PATCH 068/191] Update README.md --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index e182a9c06..87c3199b1 100644 --- a/README.md +++ b/README.md @@ -25,3 +25,8 @@ - generating and selecting high-level event data for further analysis Check out the [online documentation](https://pygama.readthedocs.io). + +## Related repositories +- [legend-exp/legend-pydataobj](https://github.com/legend-exp/legend-pydataobj) → LEGEND Python Data Objects +- [legend-exp/legend-daq2lh5](https://github.com/legend-exp/legend-daq2lh5) → Convert digitizer data to LEGEND HDF5 +- [legend-exp/dspeed](https://github.com/legend-exp/dspeed) → Fast Digital Signal Processing for particle detector signals in Python From 73395e4970355e09dac4db72309ffe8a1975bcd7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Jan 2024 11:23:38 +0000 Subject: [PATCH 069/191] Bump actions/upload-artifact from 3 to 4 Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 3 to 4. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/distribution.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/distribution.yml b/.github/workflows/distribution.yml index 8a702f38e..57ab31577 100644 --- a/.github/workflows/distribution.yml +++ b/.github/workflows/distribution.yml @@ -20,7 +20,7 @@ jobs: - name: Build SDist and wheel run: pipx run build - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: path: dist/* From 38c64ef5db47f0731f9630fa6755b6b39bf6984a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Jan 2024 11:23:41 +0000 Subject: [PATCH 070/191] Bump actions/download-artifact from 3 to 4 Bumps [actions/download-artifact](https://github.com/actions/download-artifact) from 3 to 4. - [Release notes](https://github.com/actions/download-artifact/releases) - [Commits](https://github.com/actions/download-artifact/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/download-artifact dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/distribution.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/distribution.yml b/.github/workflows/distribution.yml index 8a702f38e..9800e2cbf 100644 --- a/.github/workflows/distribution.yml +++ b/.github/workflows/distribution.yml @@ -33,7 +33,7 @@ jobs: if: github.event_name == 'release' && github.event.action == 'published' steps: - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: artifact path: dist From 95f5b1eccc006c998f0d58804557aea4fa1a410b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 1 Jan 2024 16:58:19 +0000 Subject: [PATCH 071/191] chore: update pre-commit hooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/pre-commit-hooks: v4.4.0 → v4.5.0](https://github.com/pre-commit/pre-commit-hooks/compare/v4.4.0...v4.5.0) - [github.com/asottile/setup-cfg-fmt: v2.4.0 → v2.5.0](https://github.com/asottile/setup-cfg-fmt/compare/v2.4.0...v2.5.0) - [github.com/PyCQA/isort: 5.12.0 → 5.13.2](https://github.com/PyCQA/isort/compare/5.12.0...5.13.2) - [github.com/asottile/pyupgrade: v3.13.0 → v3.15.0](https://github.com/asottile/pyupgrade/compare/v3.13.0...v3.15.0) - [github.com/psf/black: 23.9.1 → 23.12.1](https://github.com/psf/black/compare/23.9.1...23.12.1) - [github.com/pre-commit/mirrors-mypy: v1.5.1 → v1.8.0](https://github.com/pre-commit/mirrors-mypy/compare/v1.5.1...v1.8.0) - [github.com/hadialqattan/pycln: v2.2.2 → v2.4.0](https://github.com/hadialqattan/pycln/compare/v2.2.2...v2.4.0) - [github.com/codespell-project/codespell: v2.2.5 → v2.2.6](https://github.com/codespell-project/codespell/compare/v2.2.5...v2.2.6) - [github.com/pre-commit/mirrors-prettier: v3.0.3 → v4.0.0-alpha.8](https://github.com/pre-commit/mirrors-prettier/compare/v3.0.3...v4.0.0-alpha.8) --- .pre-commit-config.yaml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e8238d0d9..f0bc4718d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ ci: exclude: ^(attic|tutorials|src/pygama/math|src/pygama/flow/datagroup.py) repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: "v4.4.0" + rev: "v4.5.0" hooks: - id: check-added-large-files - id: check-case-conflict @@ -26,35 +26,35 @@ repos: - id: trailing-whitespace - repo: https://github.com/asottile/setup-cfg-fmt - rev: "v2.4.0" + rev: "v2.5.0" hooks: - id: setup-cfg-fmt - repo: https://github.com/PyCQA/isort - rev: "5.12.0" + rev: "5.13.2" hooks: - id: isort - repo: https://github.com/asottile/pyupgrade - rev: "v3.13.0" + rev: "v3.15.0" hooks: - id: pyupgrade args: ["--py38-plus"] - repo: https://github.com/psf/black - rev: "23.9.1" + rev: "23.12.1" hooks: - id: black-jupyter - repo: https://github.com/pre-commit/mirrors-mypy - rev: "v1.5.1" + rev: "v1.8.0" hooks: - id: mypy files: src stages: [manual] - repo: https://github.com/hadialqattan/pycln - rev: "v2.2.2" + rev: "v2.4.0" hooks: - id: pycln exclude: ^src/pygama/pargen @@ -85,7 +85,7 @@ repos: stages: [manual] - repo: https://github.com/codespell-project/codespell - rev: "v2.2.5" + rev: "v2.2.6" hooks: - id: codespell @@ -103,7 +103,7 @@ repos: - id: rst-inline-touching-normal - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v3.0.3" + rev: "v4.0.0-alpha.8" hooks: - id: prettier types_or: [json] From 18182664643a526d940c9ad78f306cde9615dde2 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Mon, 1 Jan 2024 18:05:17 +0100 Subject: [PATCH 072/191] pre-commit fixes --- src/pygama/flow/data_loader.py | 2 +- src/pygama/pargen/ecal_th.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pygama/flow/data_loader.py b/src/pygama/flow/data_loader.py index 7490271b0..625b52fbc 100644 --- a/src/pygama/flow/data_loader.py +++ b/src/pygama/flow/data_loader.py @@ -870,7 +870,7 @@ def build_hit_entries( f"Cannot find {table_name} in file {tier_path}" ) continue - # join eveything in one table + # join everything in one table if tb_table is None: tb_table = tier_tb else: diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py index d37b74f77..bd957c3d4 100644 --- a/src/pygama/pargen/ecal_th.py +++ b/src/pygama/pargen/ecal_th.py @@ -212,7 +212,7 @@ def fit_energy_res(self): indexes.append(i) continue elif peak == 511.0: - log.info(f"e annhilation found at index {i}") + log.info(f"e annihilation found at index {i}") indexes.append(i) continue elif np.isnan(dfwhms[i]): @@ -1326,7 +1326,7 @@ def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsiz indexes.append(i) continue elif peak == 511.0: - log.info(f"e annhilation found at index {i}") + log.info(f"e annihilation found at index {i}") indexes.append(i) continue else: From 85900f7c22f347c70589055280dfdc9214848874 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Mon, 1 Jan 2024 18:27:14 +0100 Subject: [PATCH 073/191] Update to latest pydataobj alpha release --- setup.cfg | 2 +- src/pygama/evt/build_tcm.py | 6 ++--- src/pygama/flow/data_loader.py | 30 ++++++++++-------------- src/pygama/flow/file_db.py | 14 +++++------ src/pygama/hit/build_hit.py | 4 ++-- src/pygama/pargen/energy_optimisation.py | 16 ++++--------- src/pygama/pargen/extract_tau.py | 10 ++++---- src/pygama/pargen/utils.py | 4 ++-- tests/evt/test_build_tcm.py | 4 ++-- tests/hit/test_build_hit.py | 4 ++-- 10 files changed, 40 insertions(+), 54 deletions(-) diff --git a/setup.cfg b/setup.cfg index 6660e83eb..f59709572 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,7 +36,7 @@ install_requires = h5py>=3.2 iminuit legend-daq2lh5>=1.1.0 - legend-pydataobj>=1.3 + legend-pydataobj>=1.5.0a1 matplotlib numba!=0.53.*,!=0.54.*,!=0.57 numpy>=1.21 diff --git a/src/pygama/evt/build_tcm.py b/src/pygama/evt/build_tcm.py index be0f44ba5..e821ee50f 100644 --- a/src/pygama/evt/build_tcm.py +++ b/src/pygama/evt/build_tcm.py @@ -49,7 +49,7 @@ def build_tcm( out_name name for the TCM table in the output file. wo_mode - mode to send to :meth:`~.lgdo.lh5.LH5Store.write_object`. + mode to send to :meth:`~.lgdo.lh5.LH5Store.write`. See Also -------- @@ -79,7 +79,7 @@ def build_tcm( else: array_id = len(all_tables) - 1 table = table + "/" + coin_col - coin_data.append(store.read_object(table, filename)[0].nda) + coin_data.append(store.read(table, filename)[0].nda) array_ids.append(array_id) tcm_cols = ptcm.generate_tcm_cols( @@ -94,6 +94,6 @@ def build_tcm( ) if out_file is not None: - store.write_object(tcm, out_name, out_file, wo_mode=wo_mode) + store.write(tcm, out_name, out_file, wo_mode=wo_mode) return tcm diff --git a/src/pygama/flow/data_loader.py b/src/pygama/flow/data_loader.py index 09ea44467..b0df59d8f 100644 --- a/src/pygama/flow/data_loader.py +++ b/src/pygama/flow/data_loader.py @@ -584,7 +584,7 @@ def build_entry_list( tcm_table_name = self.filedb.get_table_name(tcm_tier, tcm_tb) try: - tcm_lgdo, _ = sto.read_object(tcm_table_name, tcm_path) + tcm_lgdo, _ = sto.read(tcm_table_name, tcm_path) except KeyError: log.warning(f"Cannot find table {tcm_table_name} in file {tcm_path}") continue @@ -649,7 +649,7 @@ def build_entry_list( if tb in col_tiers[file]["tables"][tier]: table_name = self.filedb.get_table_name(tier, tb) try: - tier_table, _ = sto.read_object( + tier_table, _ = sto.read( table_name, tier_path, field_mask=cut_cols[level], @@ -708,11 +708,9 @@ def build_entry_list( f_dict = f_entries.to_dict("list") f_struct = Struct(f_dict) if self.merge_files: - sto.write_object(f_struct, "entries", output_file, wo_mode="a") + sto.write(f_struct, "entries", output_file, wo_mode="a") else: - sto.write_object( - f_struct, f"entries/{file}", output_file, wo_mode="a" - ) + sto.write(f_struct, f"entries/{file}", output_file, wo_mode="a") if log.getEffectiveLevel() >= logging.INFO: progress_bar.close() @@ -862,7 +860,7 @@ def build_hit_entries( # load the data from the tier file, just the columns needed for the cut table_name = self.filedb.get_table_name(tier, tb) try: - tier_tb, _ = sto.read_object( + tier_tb, _ = sto.read( table_name, tier_path, field_mask=cut_cols ) except KeyError: @@ -902,11 +900,9 @@ def build_hit_entries( f_dict = f_entries.to_dict("list") f_struct = Struct(f_dict) if self.merge_files: - sto.write_object(f_struct, "entries", output_file, wo_mode="a") + sto.write(f_struct, "entries", output_file, wo_mode="a") else: - sto.write_object( - f_struct, f"entries/{file}", output_file, wo_mode="a" - ) + sto.write(f_struct, f"entries/{file}", output_file, wo_mode="a") if log.getEffectiveLevel() >= logging.INFO: progress_bar.close() @@ -1117,7 +1113,7 @@ def explode_evt_cols(el: pd.DataFrame, tier_table: Table): for file in files ] - tier_table, _ = sto.read_object( + tier_table, _ = sto.read( name=tb_name, lh5_file=tier_paths, idx=idx_mask, @@ -1143,7 +1139,7 @@ def explode_evt_cols(el: pd.DataFrame, tier_table: Table): f_table = utils.dict_to_table(col_dict=col_dict, attr_dict=attr_dict) if output_file: - sto.write_object(f_table, "merged_data", output_file, wo_mode="o") + sto.write(f_table, "merged_data", output_file, wo_mode="o") if in_memory: if self.output_format == "lgdo.Table": return f_table @@ -1220,7 +1216,7 @@ def explode_evt_cols(el: pd.DataFrame, tier_table: Table): raise FileNotFoundError(tier_path) table_name = self.filedb.get_table_name(tier, tb) - tier_table, _ = sto.read_object( + tier_table, _ = sto.read( table_name, tier_path, idx=idx_mask, @@ -1246,7 +1242,7 @@ def explode_evt_cols(el: pd.DataFrame, tier_table: Table): if in_memory: load_out.add_field(name=file, obj=f_table) if output_file: - sto.write_object(f_table, f"{file}", output_file, wo_mode="o") + sto.write(f_table, f"{file}", output_file, wo_mode="o") # end file loop if log.getEffectiveLevel() >= logging.INFO: @@ -1318,7 +1314,7 @@ def load_evts( ) if os.path.exists(tier_path): table_name = self.filedb.get_table_name(tier, tb) - tier_table, _ = sto.read_object( + tier_table, _ = sto.read( table_name, tier_path, idx=idx_mask, @@ -1332,7 +1328,7 @@ def load_evts( if in_memory: load_out[file] = f_table if output_file: - sto.write_object(f_table, f"file{file}", output_file, wo_mode="o") + sto.write(f_table, f"file{file}", output_file, wo_mode="o") # end file loop if in_memory: diff --git a/src/pygama/flow/file_db.py b/src/pygama/flow/file_db.py index bf05a8f4f..4047f8c97 100644 --- a/src/pygama/flow/file_db.py +++ b/src/pygama/flow/file_db.py @@ -476,7 +476,7 @@ def update_tables_cols(row, tier: str, utc_cache: dict = None) -> pd.Series: flattened_data=flattened, cumulative_length=length ) sto = LH5Store() - sto.write_object(columns_vov, "unique_columns", to_file) + sto.write(columns_vov, "unique_columns", to_file) return self.columns @@ -526,7 +526,7 @@ def _replace_idx(row, trans, tier): # loop over the files for p in paths: - cfg, _ = sto.read_object("config", p) + cfg, _ = sto.read("config", p) cfg = json.loads(cfg.value.decode()) # make sure configurations are all the same @@ -538,7 +538,7 @@ def _replace_idx(row, trans, tier): ) # read in unique columns - vov, _ = sto.read_object("columns", p) + vov, _ = sto.read("columns", p) # Convert back from VoV of UTF-8 bytestrings to a list of lists of strings columns = [[v.decode("utf-8") for v in ov] for ov in list(vov)] @@ -597,14 +597,12 @@ def to_disk(self, filename: str, wo_mode="write_safe") -> None: filename output LH5 file name. wo_mode - passed to :meth:`~.lgdo.lh5.write_object`. + passed to :meth:`~.lgdo.lh5.write`. """ log.debug(f"writing database to {filename}") sto = LH5Store() - sto.write_object( - Scalar(json.dumps(self.config)), "config", filename, wo_mode=wo_mode - ) + sto.write(Scalar(json.dumps(self.config)), "config", filename, wo_mode=wo_mode) if wo_mode in ["write_safe", "w", "overwrite_file", "of"]: wo_mode = "a" @@ -621,7 +619,7 @@ def to_disk(self, filename: str, wo_mode="write_safe") -> None: flattened_data=Array(nda=np.array(flat).astype("S")), cumulative_length=Array(nda=np.array(cum_l)), ) - sto.write_object(col_vov, "columns", filename, wo_mode=wo_mode) + sto.write(col_vov, "columns", filename, wo_mode=wo_mode) # FIXME: to_hdf() throws this: # diff --git a/src/pygama/hit/build_hit.py b/src/pygama/hit/build_hit.py index efbfda29b..3f073380f 100644 --- a/src/pygama/hit/build_hit.py +++ b/src/pygama/hit/build_hit.py @@ -69,7 +69,7 @@ def build_hit( n_max maximum number of rows to process wo_mode - forwarded to :meth:`~.lgdo.lh5.write_object`. + forwarded to :meth:`~.lgdo.lh5.write`. """ store = LH5Store() @@ -168,7 +168,7 @@ def build_hit( if col not in cfg["outputs"]: outtbl_obj.remove_column(col, delete=True) - store.write_object( + store.write( obj=outtbl_obj, name=tbl.replace("/dsp", "/hit"), lh5_file=outfile, diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index 1b85c6a0e..1c34901d9 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -68,8 +68,8 @@ def run_optimisation( Number of events to run over """ grid = set_par_space(opt_config) - waveforms = sto.read_object(f"/raw/{wf_field}", file, idx=cuts, n_rows=n_events)[0] - baseline = sto.read_object("/raw/baseline", file, idx=cuts, n_rows=n_events)[0] + waveforms = sto.read(f"/raw/{wf_field}", file, idx=cuts, n_rows=n_events)[0] + baseline = sto.read("/raw/baseline", file, idx=cuts, n_rows=n_events)[0] tb_data = lh5.Table(col_dict={f"{wf_field}": waveforms, "baseline": baseline}) return opt.run_grid(tb_data, dsp_config, grid, fom, db_dict, **fom_kwargs) @@ -138,12 +138,8 @@ def form_dict(in_dict, length): fom_kwargs = fom_kwargs["fom_kwargs"] fom_kwargs = form_dict(fom_kwargs, len(grid)) sto = lh5.LH5Store() - waveforms = sto.read_object( - f"{lh5_path}/{wf_field}", file, idx=cuts, n_rows=n_events - )[0] - baseline = sto.read_object(f"{lh5_path}/baseline", file, idx=cuts, n_rows=n_events)[ - 0 - ] + waveforms = sto.read(f"{lh5_path}/{wf_field}", file, idx=cuts, n_rows=n_events)[0] + baseline = sto.read(f"{lh5_path}/baseline", file, idx=cuts, n_rows=n_events)[0] tb_data = lh5.Table(col_dict={f"{wf_field}": waveforms, "baseline": baseline}) return opt.run_grid_multiprocess_parallel( tb_data, @@ -999,9 +995,7 @@ def event_selection( idx_list = get_wf_indexes(sort_index, idx_list_lens) idxs = np.array(sorted(np.concatenate(masks))) - input_data = sto.read_object(f"{lh5_path}", raw_files, idx=idxs, n_rows=len(idxs))[ - 0 - ] + input_data = sto.read(f"{lh5_path}", raw_files, idx=idxs, n_rows=len(idxs))[0] if isinstance(dsp_config, str): with open(dsp_config) as r: diff --git a/src/pygama/pargen/extract_tau.py b/src/pygama/pargen/extract_tau.py index 4b5a48ba2..d35473715 100644 --- a/src/pygama/pargen/extract_tau.py +++ b/src/pygama/pargen/extract_tau.py @@ -61,12 +61,10 @@ def load_data( cuts = np.where((df.daqenergy.values > threshold) & (~ids))[0] - waveforms = sto.read_object( - f"{lh5_path}/{wf_field}", raw_file, idx=cuts, n_rows=n_events - )[0] - baseline = sto.read_object( - f"{lh5_path}/baseline", raw_file, idx=cuts, n_rows=n_events - )[0] + waveforms = sto.read(f"{lh5_path}/{wf_field}", raw_file, idx=cuts, n_rows=n_events)[ + 0 + ] + baseline = sto.read(f"{lh5_path}/baseline", raw_file, idx=cuts, n_rows=n_events)[0] tb_data = lh5.Table(col_dict={f"{wf_field}": waveforms, "baseline": baseline}) return tb_data diff --git a/src/pygama/pargen/utils.py b/src/pygama/pargen/utils.py index 27f4af9ae..e58785e4e 100644 --- a/src/pygama/pargen/utils.py +++ b/src/pygama/pargen/utils.py @@ -69,7 +69,7 @@ def load_data( all_files = [] masks = np.array([], dtype=bool) for tstamp, tfiles in files.items(): - table = sto.read_object(lh5_path, tfiles)[0] + table = sto.read(lh5_path, tfiles)[0] if tstamp in cal_dict: file_df = table.eval(cal_dict[tstamp]).get_dataframe() else: @@ -95,7 +95,7 @@ def load_data( keys = [key.split("/")[-1] for key in keys] params = get_params(keys + list(cal_dict.keys()), params) - table = sto.read_object(lh5_path, files)[0] + table = sto.read(lh5_path, files)[0] df = table.eval(cal_dict).get_dataframe() for param in params: if param not in df: diff --git a/tests/evt/test_build_tcm.py b/tests/evt/test_build_tcm.py index 505196825..49296e9fe 100644 --- a/tests/evt/test_build_tcm.py +++ b/tests/evt/test_build_tcm.py @@ -15,7 +15,7 @@ def test_generate_tcm_cols(lgnd_test_data): store = LH5Store() coin_data = [] for tbl in tables: - ts, _ = store.read_object(f"{tbl}/raw/timestamp", f_raw) + ts, _ = store.read(f"{tbl}/raw/timestamp", f_raw) coin_data.append(ts) tcm_cols = evt.generate_tcm_cols( @@ -67,6 +67,6 @@ def test_build_tcm(lgnd_test_data, tmptestdir): ) assert os.path.exists(out_file) store = LH5Store() - obj, n_rows = store.read_object("hardware_tcm", out_file) + obj, n_rows = store.read("hardware_tcm", out_file) assert isinstance(obj, lgdo.Struct) assert list(obj.keys()) == ["cumulative_length", "array_id", "array_idx"] diff --git a/tests/hit/test_build_hit.py b/tests/hit/test_build_hit.py index d92d12252..8b8d39090 100644 --- a/tests/hit/test_build_hit.py +++ b/tests/hit/test_build_hit.py @@ -94,7 +94,7 @@ def test_outputs_specification(dsp_test_file, tmptestdir): ) store = LH5Store() - obj, _ = store.read_object("/geds/hit", outfile) + obj, _ = store.read("/geds/hit", outfile) assert list(obj.keys()) == ["calE", "AoE", "A_max"] @@ -109,7 +109,7 @@ def test_aggregation_outputs(dsp_test_file, tmptestdir): ) sto = LH5Store() - obj, _ = sto.read_object("/geds/hit", outfile) + obj, _ = sto.read("/geds/hit", outfile) assert list(obj.keys()) == [ "is_valid_rt", "is_valid_t0", From 0541a6b3647114a9d6be32ac02a51f679ca36f4d Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Tue, 2 Jan 2024 17:38:03 +0100 Subject: [PATCH 074/191] Update dspeed test configs and bump requirements in setup.cfg --- setup.cfg | 4 +- tests/configs/icpc-dsp-config.json | 132 +++++++++++++++++---------- tests/configs/sipm-dplms-config.json | 55 ++++++----- tests/configs/sipm-dsp-config.json | 14 +-- 4 files changed, 125 insertions(+), 80 deletions(-) diff --git a/setup.cfg b/setup.cfg index f59709572..067a87fc8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -32,10 +32,10 @@ project_urls = packages = find: install_requires = colorlog - dspeed>=1.1 + dspeed@git+https://github.com/legend-exp/dspeed@main h5py>=3.2 iminuit - legend-daq2lh5>=1.1.0 + legend-daq2lh5@git+https://github.com/legend-exp/legend-daq2lh5@main legend-pydataobj>=1.5.0a1 matplotlib numba!=0.53.*,!=0.54.*,!=0.57 diff --git a/tests/configs/icpc-dsp-config.json b/tests/configs/icpc-dsp-config.json index 8536a31ec..28af29239 100644 --- a/tests/configs/icpc-dsp-config.json +++ b/tests/configs/icpc-dsp-config.json @@ -38,19 +38,19 @@ "processors": { "tp_min, tp_max, wf_min, wf_max": { "function": "min_max", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["waveform", "tp_min", "tp_max", "wf_min", "wf_max"], "unit": ["ns", "ns", "ADC", "ADC"] }, "wf_blsub": { "function": "bl_subtract", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["waveform", "baseline", "wf_blsub"], "unit": "ADC" }, "bl_mean , bl_std, bl_slope, bl_intercept": { "function": "linear_slope_fit", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": [ "wf_blsub[0:750]", "bl_mean", @@ -62,51 +62,65 @@ }, "wf_pz": { "function": "pole_zero", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["wf_blsub", "db.pz.tau", "wf_pz"], "unit": "ADC", "defaults": { "db.pz.tau": "27460.5" } }, "pz_mean , pz_std, pz_slope, pz_intercept": { "function": "linear_slope_fit", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["wf_pz[1500:]", "pz_mean", "pz_std", "pz_slope", "pz_intercept"], "unit": ["ADC", "ADC", "ADC", "ADC"] }, - "wf_t0_filter": { + "t0_kernel": { "function": "t0_filter", - "module": "pygama.dsp.processors", - "args": ["wf_pz", "wf_t0_filter(len(wf_pz), 'f', grid=wf_pz.grid)"], - "init_args": ["128*ns/wf_pz.period", "2*us/wf_pz.period"], + "module": "dspeed.processors", + "args": [ + "128*ns/wf_pz.period", + "2*us/wf_pz.period", + "t0_kernel(round((128*ns+2*us)/wf_pz.period), 'f')" + ], + "unit": "ADC" + }, + "wf_t0_filter": { + "function": "convolve_wf", + "module": "dspeed.processors", + "args": [ + "wf_pz", + "t0_kernel", + "'s'", + "wf_t0_filter(len(wf_pz), 'f', grid=wf_pz.grid)" + ], "unit": "ADC" }, "wf_atrap": { "function": "asym_trap_filter", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["wf_pz", "128*ns", "4", "2*us", "wf_atrap"], "unit": "ADC" }, "conv_tmin ,tp_start, conv_min, conv_max": { "function": "min_max", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["wf_t0_filter", "conv_tmin", "tp_start", "conv_min", "conv_max"], "unit": ["ns", "ns", "ADC", "ADC"] }, "tp_0_atrap": { "function": "time_point_thresh", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["wf_atrap", "bl_std", "tp_start", 0, "tp_0_atrap"], "unit": "ns" }, "tp_0_est": { "function": "time_point_thresh", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["wf_t0_filter", "bl_std", "tp_start", 0, "tp_0_est(unit=ns)"], "unit": "ns" }, "wf_trap": { "function": "trap_norm", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["wf_pz", "db.ttrap.rise", "db.ttrap.flat", "wf_trap"], "unit": "ADC", "defaults": { "db.ttrap.rise": "10*us", "db.ttrap.flat": "3.008*us" } @@ -120,7 +134,7 @@ }, "wf_etrap": { "function": "trap_norm", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["wf_pz", "db.etrap.rise", "db.etrap.flat", "wf_etrap"], "unit": "ADC", "defaults": { "db.etrap.rise": "10*us", "db.etrap.flat": "3.008*us" } @@ -134,10 +148,10 @@ }, "trapEftp": { "function": "fixed_time_pickoff", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": [ "wf_etrap", - "tp_0_est+db.etrap.rise+db.etrap.flat*db.etrap.sample", + "round(tp_0_est+db.etrap.rise+db.etrap.flat*db.etrap.sample, wf_etrap.grid)", "'l'", "trapEftp" ], @@ -148,23 +162,33 @@ "db.etrap.sample": "0.8" } }, - "wf_cusp": { + "cusp_kernel": { "function": "cusp_filter", - "module": "pygama.dsp.processors", - "args": ["wf_blsub", "wf_cusp(101, 'f')"], - "init_args": [ - "len(wf_blsub)-100", + "module": "dspeed.processors", + "args": [ "db.cusp.sigma/wf_blsub.period", "round(db.cusp.flat/wf_blsub.period)", - "db.pz.tau" + "db.pz.tau/wf_blsub.period", + "cusp_kernel(round(len(wf_blsub)-(33.6*us/wf_blsub.period)-(4.8*us/wf_blsub.period)), 'f')" ], "defaults": { "db.cusp.sigma": "20*us", "db.cusp.flat": "3*us", - "db.pz.tau": "27460.5" + "db.pz.tau": "450*us" }, "unit": "ADC" }, + "wf_cusp": { + "function": "fft_convolve_wf", + "module": "dspeed.processors", + "args": [ + "wf_blsub[:round(len(wf_blsub)-(33.6*us/wf_blsub.period))]", + "cusp_kernel", + "'v'", + "wf_cusp(round((4.8*us/wf_blsub.period)+1), 'f')" + ], + "unit": "ADC" + }, "cuspEmax": { "function": "amax", "module": "numpy", @@ -174,28 +198,38 @@ }, "cuspEftp": { "function": "fixed_time_pickoff", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["wf_cusp", "db.cusp.sample", "'i'", "cuspEftp"], "unit": "ADC", "defaults": { "db.cusp.sample": "50" } }, - "wf_zac": { + "zac_kernel": { "function": "zac_filter", - "module": "pygama.dsp.processors", - "args": ["wf_blsub", "wf_zac(101, 'f')"], - "init_args": [ - "len(wf_blsub)-100", + "module": "dspeed.processors", + "args": [ "db.zac.sigma/wf_blsub.period", "round(db.zac.flat/wf_blsub.period)", - "db.pz.tau" + "db.pz.tau/wf_blsub.period", + "zac_kernel(round(len(wf_blsub)-(33.6*us/wf_blsub.period)-(4.8*us/wf_blsub.period)), 'f')" ], "defaults": { "db.zac.sigma": "20*us", "db.zac.flat": "3*us", - "db.pz.tau": "27460.5" + "db.pz.tau": "450*us" }, "unit": "ADC" }, + "wf_zac": { + "function": "fft_convolve_wf", + "module": "dspeed.processors", + "args": [ + "wf_blsub[:round(len(wf_blsub)-(33.6*us/wf_blsub.period))]", + "zac_kernel", + "'v'", + "wf_zac(round((4.8*us/wf_blsub.period)+1), 'f')" + ], + "unit": "ADC" + }, "zacEmax": { "function": "amax", "module": "numpy", @@ -205,74 +239,74 @@ }, "zacEftp": { "function": "fixed_time_pickoff", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["wf_zac", "db.zac.sample", "'i'", "zacEftp"], "defaults": { "db.zac.sample": "50" }, "unit": "ADC" }, "tp_100": { "function": "time_point_thresh", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["wf_pz", "trapTmax", "tp_0_est", 1, "tp_100"], "unit": "ns" }, "tp_99": { "function": "time_point_thresh", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["wf_pz", "0.99*trapTmax", "tp_0_est", 1, "tp_99"], "unit": "ns" }, "tp_95": { "function": "time_point_thresh", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["wf_pz", "trapTmax*0.95", "tp_99", 0, "tp_95"], "unit": "ns" }, "tp_90": { "function": "time_point_thresh", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["wf_pz", "trapTmax*0.9", "tp_95", 0, "tp_90"], "unit": "ns" }, "tp_80": { "function": "time_point_thresh", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["wf_pz", "trapTmax*0.8", "tp_90", 0, "tp_80"], "unit": "ns" }, "tp_50": { "function": "time_point_thresh", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["wf_pz", "trapTmax*0.5", "tp_80", 0, "tp_50"], "unit": "ns" }, "tp_20": { "function": "time_point_thresh", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["wf_pz", "trapTmax*0.2", "tp_50", 0, "tp_20"], "unit": "ns" }, "tp_10": { "function": "time_point_thresh", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["wf_pz", "trapTmax*0.1", "tp_20", 0, "tp_10"], "unit": "ns" }, "tp_01": { "function": "time_point_thresh", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["wf_pz", "trapTmax*0.01", "tp_10", 0, "tp_01"], "unit": "ns" }, "wf_trap2": { "function": "trap_norm", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["wf_pz", "4*us", "96*ns", "wf_trap2"], "unit": "ADC" }, "trapQftp": { "function": "fixed_time_pickoff", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["wf_trap2", "tp_0_est + 8.096*us", "'l'", "trapQftp"], "unit": "ADC" }, @@ -290,31 +324,31 @@ }, "wf_le": { "function": "windower", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["wf_pz", "tp_0_est", "wf_le(301, 'f')"], "unit": "ADC" }, "curr": { "function": "avg_current", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["wf_le", 1, "curr(len(wf_le)-1, 'f')"], "unit": "ADC/sample" }, "curr_up": { "function": "upsampler", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["curr", "16", "curr_up(4784, 'f')"], "unit": "ADC/sample" }, "curr_av": { "function": "moving_window_multi", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["curr_up", "48", 3, 0, "curr_av"], "unit": "ADC/sample" }, "aoe_t_min, tp_aoe_max, A_min, A_max": { "function": "min_max", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["curr_av", "aoe_t_min", "tp_aoe_max", "A_min", "A_max"], "unit": ["ns", "ns", "ADC/sample", "ADC/sample"] }, diff --git a/tests/configs/sipm-dplms-config.json b/tests/configs/sipm-dplms-config.json index cc7919e33..dd69bac0f 100644 --- a/tests/configs/sipm-dplms-config.json +++ b/tests/configs/sipm-dplms-config.json @@ -10,26 +10,26 @@ "processors": { "wf_gaus": { "function": "gaussian_filter1d", - "module": "pygama.dsp.processors", + "module": "dspeed.processors.gaussian_filter1d", "args": ["waveform", "wf_gaus(len(waveform))"], "init_args": ["1", "4.0"], "unit": "ADC" }, "curr": { "function": "avg_current", - "module": "pygama.dsp.processors", + "module": "dspeed.processors.moving_windows", "args": ["wf_gaus", 5, "curr(len(wf_gaus)-5)"], "unit": "ADC" }, "hist_weights , hist_borders": { "function": "histogram", - "module": "pygama.dsp.processors", + "module": "dspeed.processors.histogram", "args": ["curr", "hist_weights(100)", "hist_borders(101)"], "unit": ["none", "ADC"] }, "fwhm, idx_out_c, max_out": { "function": "histogram_stats", - "module": "pygama.dsp.processors", + "module": "dspeed.processors.histogram", "args": [ "hist_weights", "hist_borders", @@ -42,7 +42,7 @@ }, "vt_max_candidate_out, vt_min_out, n_max_out, n_min_out": { "function": "get_multi_local_extrema", - "module": "pygama.dsp.processors", + "module": "dspeed.processors.get_multi_local_extrema", "args": [ "curr", 5, @@ -59,7 +59,7 @@ }, "trigger_pos, no_out": { "function": "peak_snr_threshold", - "module": "pygama.dsp.processors", + "module": "dspeed.processors.peak_snr_threshold", "args": [ "curr", "vt_max_candidate_out", @@ -72,13 +72,13 @@ }, "energies": { "function": "multi_a_filter", - "module": "pygama.dsp.processors", + "module": "dspeed.processors.multi_a_filter", "args": ["curr", "trigger_pos", "energies"], "unit": ["ADC"] }, "bl_mean , bl_std, bl_slope, bl_intercept": { "function": "linear_slope_fit", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": [ "waveform[:50]", "bl_mean", @@ -90,34 +90,45 @@ }, "wf_diff": { "function": "avg_current", - "module": "pygama.dsp.processors", + "module": "dspeed.processors", "args": ["waveform", 1, "wf_diff(len(waveform)-1)"], "unit": "ADC" }, - "wf_dplms": { + "dplms_kernel": { "function": "dplms_filter", - "module": "pygama.dsp.processors", - "args": ["wf_diff", "wf_dplms(len(wf_diff)-49, 'f')"], - "unit": "ADC", - "init_args": [ + "module": "dspeed.processors", + "args": [ "db.dplms.noise_matrix", "db.dplms.reference", - "50", "0.01", "1", "0", - "0" - ] + "0", + "dplms_kernel(50, 'f')" + ], + "unit": "ADC" + }, + "wf_dplms": { + "description": "convolve optimised cusp filter", + "function": "convolve_wf", + "module": "dspeed.processors", + "args": [ + "wf_diff", + "dplms_kernel", + "'s'", + "wf_dplms(len(wf_diff)-49, 'f')" + ], + "unit": "ADC" }, "h_weights , h_borders": { "function": "histogram", - "module": "pygama.dsp.processors", + "module": "dspeed.processors.histogram", "args": ["wf_dplms", "h_weights(100)", "h_borders(101)"], "unit": ["none", "ADC"] }, "fwhm_d, idx_out_d, max_out_d": { "function": "histogram_stats", - "module": "pygama.dsp.processors", + "module": "dspeed.processors.histogram", "args": [ "h_weights", "h_borders", @@ -130,7 +141,7 @@ }, "vt_max_candidate_out_d, vt_min_out_d, n_max_out_d, n_min_out_d": { "function": "get_multi_local_extrema", - "module": "pygama.dsp.processors", + "module": "dspeed.processors.get_multi_local_extrema", "args": [ "wf_dplms", 10, @@ -145,7 +156,7 @@ }, "trigger_pos_dplms, no_out_d": { "function": "peak_snr_threshold", - "module": "pygama.dsp.processors", + "module": "dspeed.processors.peak_snr_threshold", "args": [ "wf_dplms", "vt_max_candidate_out_d", @@ -158,7 +169,7 @@ }, "energies_dplms": { "function": "multi_a_filter", - "module": "pygama.dsp.processors", + "module": "dspeed.processors.multi_a_filter", "args": ["wf_dplms", "trigger_pos_dplms", "energies_dplms"], "unit": ["ADC"] } diff --git a/tests/configs/sipm-dsp-config.json b/tests/configs/sipm-dsp-config.json index 8de5bd2e6..bb7878a5d 100644 --- a/tests/configs/sipm-dsp-config.json +++ b/tests/configs/sipm-dsp-config.json @@ -3,26 +3,26 @@ "processors": { "wf_gaus": { "function": "gaussian_filter1d", - "module": "pygama.dsp.processors", + "module": "dspeed.processors.gaussian_filter1d", "args": ["waveform", "wf_gaus(len(waveform))"], "init_args": ["1", "4.0"], "unit": "ADC" }, "curr": { "function": "avg_current", - "module": "pygama.dsp.processors", + "module": "dspeed.processors.moving_windows", "args": ["wf_gaus", 5, "curr(len(wf_gaus)-5)"], "unit": "ADC" }, "hist_weights , hist_borders": { "function": "histogram", - "module": "pygama.dsp.processors", + "module": "dspeed.processors.histogram", "args": ["curr", "hist_weights(100)", "hist_borders(101)"], "unit": ["none", "ADC"] }, "fwhm, idx_out_c, max_out": { "function": "histogram_stats", - "module": "pygama.dsp.processors", + "module": "dspeed.processors.histogram", "args": [ "hist_weights", "hist_borders", @@ -35,7 +35,7 @@ }, "vt_max_candidate_out, vt_min_out, n_max_out, n_min_out": { "function": "get_multi_local_extrema", - "module": "pygama.dsp.processors", + "module": "dspeed.processors.get_multi_local_extrema", "args": [ "curr", 5, @@ -52,7 +52,7 @@ }, "trigger_pos, no_out": { "function": "peak_snr_threshold", - "module": "pygama.dsp.processors", + "module": "dspeed.processors.peak_snr_threshold", "args": [ "curr", "vt_max_candidate_out", @@ -65,7 +65,7 @@ }, "energies": { "function": "multi_a_filter", - "module": "pygama.dsp.processors", + "module": "dspeed.processors.multi_a_filter", "args": ["curr", "trigger_pos", "energies"], "unit": ["ADC"] } From 646cf1a883994e2402670b3c89c3d923940fa0f2 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Tue, 2 Jan 2024 17:46:16 +0100 Subject: [PATCH 075/191] [ci] fix pytest call in main GH workflow --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 4af28a993..95dba9133 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -35,7 +35,7 @@ jobs: python -m pip install --upgrade .[test] - name: Run unit tests run: | - pytest + python -m pytest test-coverage: name: Calculate and upload test coverage From a1a48fd292941f04d4fd00cf881c2f41b7f931ba Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 10 Jan 2024 17:51:55 +0100 Subject: [PATCH 076/191] [docs] add Intersphinx mappings for LEGEND packages --- docs/source/conf.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/source/conf.py b/docs/source/conf.py index a02d2d512..d267228d3 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -60,6 +60,9 @@ "iminuit": ("https://iminuit.readthedocs.io/en/stable", None), "h5py": ("https://docs.h5py.org/en/stable", None), "pint": ("https://pint.readthedocs.io/en/stable", None), + "lgdo": ("https://legend-pydataobj.readthedocs.io/en/stable", None), + "dspeed": ("https://dspeed.readthedocs.io/en/stable", None), + "daq2lh5": ("https://legend-daq2lh5.readthedocs.io/en/stable", None), } suppress_warnings = [ From 9d1dd2cc6dd21e071fd65b559302a8c2b410d41e Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 10 Jan 2024 17:57:43 +0100 Subject: [PATCH 077/191] [hit] Update build_hit() to support latest Table.eval() --- setup.cfg | 4 +-- src/pygama/hit/build_hit.py | 34 +++++++++++++++++++------ tests/hit/configs/basic-hit-config.json | 3 +++ tests/hit/test_build_hit.py | 6 ++++- 4 files changed, 36 insertions(+), 11 deletions(-) diff --git a/setup.cfg b/setup.cfg index 067a87fc8..9f07a5bd4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -32,11 +32,11 @@ project_urls = packages = find: install_requires = colorlog - dspeed@git+https://github.com/legend-exp/dspeed@main + dspeed>=1.3.0a4 h5py>=3.2 iminuit legend-daq2lh5@git+https://github.com/legend-exp/legend-daq2lh5@main - legend-pydataobj>=1.5.0a1 + legend-pydataobj@git+https://github.com/legend-exp/legend-pydataobj@table-eval matplotlib numba!=0.53.*,!=0.54.*,!=0.57 numpy>=1.21 diff --git a/src/pygama/hit/build_hit.py b/src/pygama/hit/build_hit.py index 3f073380f..b6033e305 100644 --- a/src/pygama/hit/build_hit.py +++ b/src/pygama/hit/build_hit.py @@ -8,8 +8,9 @@ import os from collections import OrderedDict +import lgdo import numpy as np -from lgdo import Array, LH5Iterator, LH5Store, ls +from lgdo import LH5Iterator, LH5Store, ls log = logging.getLogger(__name__) @@ -25,10 +26,12 @@ def build_hit( buffer_len: int = 3200, ) -> None: """ - Transform a :class:`~.lgdo.Table` into a new :class:`~.lgdo.Table` by - evaluating strings describing column operations. + Transform a :class:`~lgdo.types.table.Table` into a new + :class:`~lgdo.types.table.Table` by evaluating strings describing column + operations. - Operates on columns only, not specific rows or elements. + Operates on columns only, not specific rows or elements. Relies on + :meth:`~lgdo.types.table.Table.eval`. Parameters ---------- @@ -47,7 +50,7 @@ def build_hit( "outputs": ["calE", "AoE"], "operations": { "calE": { - "expression": "sqrt(@a + @b * trapEmax**2)", + "expression": "sqrt(a + b * trapEmax**2)", "parameters": {"a": "1.23", "b": "42.69"}, }, "AoE": {"expression": "A_max/calE"}, @@ -69,7 +72,11 @@ def build_hit( n_max maximum number of rows to process wo_mode - forwarded to :meth:`~.lgdo.lh5.write`. + forwarded to :meth:`lgdo.lh5.store.LH5Store.write`. + + See Also + -------- + lgdo.types.table.Table.eval """ store = LH5Store() @@ -129,7 +136,18 @@ def build_hit( for tbl_obj, start_row, n_rows in lh5_it: n_rows = min(tot_n_rows - start_row, n_rows) - outtbl_obj = tbl_obj.eval(cfg["operations"]) + # create a new table object that links all the columns in the + # current table (i.e. no copy) + outtbl_obj = lgdo.Table(col_dict=tbl_obj) + + for outname, info in cfg["operations"].items(): + outcol = outtbl_obj.eval( + info["expression"], info.get("parameters", None) + ) + if "lgdo_attrs" in info: + outcol.attrs |= info["lgdo_attrs"] + + outtbl_obj.add_column(outname, outcol) # make high level flags if "aggregations" in cfg: @@ -151,7 +169,7 @@ def build_hit( multiplier = 2 ** np.arange(n_flags, dtype=flag_values.dtype) flag_out = np.dot(flag_values, multiplier) - outtbl_obj.add_field(high_lvl_flag, Array(flag_out)) + outtbl_obj.add_field(high_lvl_flag, lgdo.Array(flag_out)) # remove or add columns according to "outputs" in the configuration # dictionary diff --git a/tests/hit/configs/basic-hit-config.json b/tests/hit/configs/basic-hit-config.json index 0cf98137e..69c44d985 100644 --- a/tests/hit/configs/basic-hit-config.json +++ b/tests/hit/configs/basic-hit-config.json @@ -9,6 +9,9 @@ "parameters": { "a": 1.23, "b": 42.69 + }, + "lgdo_attrs": { + "units": "keV" } }, "AoE": { diff --git a/tests/hit/test_build_hit.py b/tests/hit/test_build_hit.py index 8b8d39090..be5e41689 100644 --- a/tests/hit/test_build_hit.py +++ b/tests/hit/test_build_hit.py @@ -24,6 +24,10 @@ def test_basics(dsp_test_file, tmptestdir): assert os.path.exists(outfile) assert ls(outfile, "/geds/") == ["geds/hit"] + store = LH5Store() + tbl, _ = store.read("geds/hit", outfile) + assert tbl.calE.attrs == {"datatype": "array<1>{real}", "units": "keV"} + def test_illegal_arguments(dsp_test_file): with pytest.raises(ValueError): @@ -95,7 +99,7 @@ def test_outputs_specification(dsp_test_file, tmptestdir): store = LH5Store() obj, _ = store.read("/geds/hit", outfile) - assert list(obj.keys()) == ["calE", "AoE", "A_max"] + assert sorted(obj.keys()) == ["A_max", "AoE", "calE"] def test_aggregation_outputs(dsp_test_file, tmptestdir): From 33a7c9b634460b8b492ffa23aae4cf5fa2009858 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 11 Jan 2024 11:04:54 +0100 Subject: [PATCH 078/191] [hit] test HDF5 LGDO settings too --- tests/hit/configs/basic-hit-config.json | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/hit/configs/basic-hit-config.json b/tests/hit/configs/basic-hit-config.json index 69c44d985..d02d80797 100644 --- a/tests/hit/configs/basic-hit-config.json +++ b/tests/hit/configs/basic-hit-config.json @@ -11,7 +11,11 @@ "b": 42.69 }, "lgdo_attrs": { - "units": "keV" + "units": "keV", + "hdf5_settings": { + "compression": "gzip", + "shuffle": true + } } }, "AoE": { From cc3b426a8bf02f517bd09ef80d72074a3b8d6352 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 11 Jan 2024 11:40:15 +0100 Subject: [PATCH 079/191] [setup] update LEGEND packages version bounds to pre-release versions --- setup.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index 9f07a5bd4..6582215a1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -35,8 +35,8 @@ install_requires = dspeed>=1.3.0a4 h5py>=3.2 iminuit - legend-daq2lh5@git+https://github.com/legend-exp/legend-daq2lh5@main - legend-pydataobj@git+https://github.com/legend-exp/legend-pydataobj@table-eval + legend-daq2lh5>=1.2.0a1 + legend-pydataobj>=1.5.0a2 matplotlib numba!=0.53.*,!=0.54.*,!=0.57 numpy>=1.21 From f249cdb89e135a42ec214d4b778c950440ee87b5 Mon Sep 17 00:00:00 2001 From: valerioda Date: Fri, 12 Jan 2024 10:37:51 +0100 Subject: [PATCH 080/191] update the LH5 file writes/reads to match the new LH5Store syntax according suggestions --- src/pygama/pargen/dplms_ge_dict.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/pygama/pargen/dplms_ge_dict.py b/src/pygama/pargen/dplms_ge_dict.py index 67caf4ced..0c1f9fcbc 100644 --- a/src/pygama/pargen/dplms_ge_dict.py +++ b/src/pygama/pargen/dplms_ge_dict.py @@ -13,11 +13,9 @@ import time from collections import OrderedDict -import lgdo -import lgdo.lh5_store as lh5 import matplotlib.pyplot as plt import numpy as np -from lgdo import Array +from lgdo import Array, Table, lh5 from scipy.signal import convolve, convolve2d from pygama.math.histogram import get_hist @@ -42,8 +40,8 @@ def dplms_ge_dict( lh5_path: str, - raw_fft: lgdo.Table, - raw_cal: lgdo.Table, + raw_fft: Table, + raw_cal: Table, dsp_config: dict, par_dsp: dict, par_dsp_lh5: str, @@ -57,19 +55,19 @@ def dplms_ge_dict( Parameters ---------- - lh5_path: str + lh5_path Name of channel to process, should be name of lh5 group in raw files - fft_files : lgdo.Table + fft_files table with fft data - raw_cal : lgdo.Table + raw_cal table with cal data - dsp_config: dict + dsp_config dsp config file - par_dsp: dict + par_dsp Dictionary with db parameters for dsp processing - par_dsp_lh5: str + par_dsp_lh5 Path for saving dplms coefficients - dplms_dict: dict + dplms_dict Dictionary with various parameters Returns @@ -275,7 +273,7 @@ def dplms_ge_dict( wsize, ) - sto.write_object( + sto.write( Array(x), name="dplms", lh5_file=par_dsp_lh5, From c6b09b51991db4c1b95d7b9e22a721a0469a2295 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 12 Jan 2024 14:17:27 +0100 Subject: [PATCH 081/191] [hit] allow for randomly ordered build_hit operations in config dict --- src/pygama/hit/build_hit.py | 79 ++++++++++++++++++++++--- tests/hit/configs/basic-hit-config.json | 8 +-- tests/hit/test_build_hit.py | 18 ++++++ 3 files changed, 93 insertions(+), 12 deletions(-) diff --git a/src/pygama/hit/build_hit.py b/src/pygama/hit/build_hit.py index b6033e305..9e4c3b027 100644 --- a/src/pygama/hit/build_hit.py +++ b/src/pygama/hit/build_hit.py @@ -7,6 +7,7 @@ import logging import os from collections import OrderedDict +from typing import Iterable, Mapping import lgdo import numpy as np @@ -18,9 +19,9 @@ def build_hit( infile: str, outfile: str = None, - hit_config: str | dict = None, - lh5_tables: list[str] = None, - lh5_tables_config: str | dict[str] = None, + hit_config: str | Mapping = None, + lh5_tables: Iterable[str] = None, + lh5_tables_config: str | Mapping[str, Mapping] = None, n_max: int = np.inf, wo_mode: str = "write_safe", buffer_len: int = 3200, @@ -100,16 +101,14 @@ def build_hit( for k, v in tbl_cfg.items(): if isinstance(v, str): with open(v) as f: - # order in hit configs is important (dependencies) - tbl_cfg[k] = json.load(f, object_pairs_hook=OrderedDict) + tbl_cfg[k] = json.load(f) lh5_tables_config = tbl_cfg else: if isinstance(hit_config, str): # sanitize config with open(hit_config) as f: - # order in hit configs is important (dependencies) - hit_config = json.load(f, object_pairs_hook=OrderedDict) + hit_config = json.load(f) if lh5_tables is None: lh5_tables_config = {} @@ -120,11 +119,19 @@ def build_hit( if f"{el}/dsp" in ls(infile, f"{el}/"): log.debug(f"found candidate table /{el}/dsp") lh5_tables_config[f"{el}/dsp"] = hit_config + else: + for tbl in lh5_tables: + lh5_tables_config[tbl] = hit_config if outfile is None: outfile = os.path.splitext(os.path.basename(infile))[0] outfile = outfile.removesuffix("_dsp") + "_hit.lh5" + # reorder blocks in "operations" based on dependency + log.debug("reordering operations based on mutual dependency") + for cfg in lh5_tables_config.values(): + cfg["operations"] = _reorder_table_operations(cfg["operations"]) + first_done = False for tbl, cfg in lh5_tables_config.items(): lh5_it = LH5Iterator(infile, tbl, buffer_len=buffer_len) @@ -177,7 +184,7 @@ def build_hit( if isinstance(cfg["outputs"], list): # add missing columns (forwarding) for out in cfg["outputs"]: - if out not in outtbl_obj.keys(): + if out not in outtbl_obj: outtbl_obj.add_column(out, tbl_obj[out]) # remove non-required columns @@ -196,3 +203,59 @@ def build_hit( ) first_done = True + + +def _reorder_table_operations( + config: Mapping[str, Mapping] +) -> OrderedDict[str, Mapping]: + """Reorder operations in `config` according to mutual dependency.""" + + def _one_pass(config): + """Loop once over `config` and do a first round of reordering""" + # list to hold reordered config keys + ordered_keys = [] + + # start looping over config + for outname in config: + # initialization + if not ordered_keys: + ordered_keys.append(outname) + continue + + if outname in ordered_keys: + raise RuntimeError(f"duplicated operation '{outname}' detected") + + # loop over existing reordered keys and figure out where to place + # the new key + idx = 0 + for k in ordered_keys: + # get valid names in the expression + c = compile( + config[k]["expression"], "gcc -O3 -ffast-math build_hit.py", "eval" + ) + + # if we need "outname" for this expression, insert it before! + if outname in c.co_names: + break + else: + idx += 1 + + ordered_keys.insert(idx, outname) + + # now replay the config dictionary based on sorted keys + opdict = OrderedDict() + for k in ordered_keys: + opdict[k] = config[k] + + return opdict + + # okay, now we need to repeat this until we've sorted everything + current = OrderedDict(config) + + while True: + new = _one_pass(current) + + if new == current: + return new + else: + current = new diff --git a/tests/hit/configs/basic-hit-config.json b/tests/hit/configs/basic-hit-config.json index d02d80797..1946f162c 100644 --- a/tests/hit/configs/basic-hit-config.json +++ b/tests/hit/configs/basic-hit-config.json @@ -1,8 +1,8 @@ { "outputs": ["calE", "AoE", "A_max"], "operations": { - "twice_trap_e_max": { - "expression": "2 * trapEmax" + "AoE": { + "expression": "A_max/calE" }, "calE": { "expression": "sqrt(a + b * twice_trap_e_max**2)", @@ -18,8 +18,8 @@ } } }, - "AoE": { - "expression": "A_max/calE" + "twice_trap_e_max": { + "expression": "2 * trapEmax" } } } diff --git a/tests/hit/test_build_hit.py b/tests/hit/test_build_hit.py index be5e41689..5387b289f 100644 --- a/tests/hit/test_build_hit.py +++ b/tests/hit/test_build_hit.py @@ -7,10 +7,28 @@ from lgdo import LH5Store, ls from pygama.hit import build_hit +from pygama.hit.build_hit import _reorder_table_operations config_dir = Path(__file__).parent / "configs" +def test_ops_reorder(): + assert list(_reorder_table_operations({}).keys()) == [] + + ops = { + "out1": {"expression": "out2 + out3 * outy"}, + "out2": {"expression": "log(out4)"}, + "out3": {"expression": "outx + 2"}, + "out4": {"expression": "outz + out3"}, + } + assert list(_reorder_table_operations(ops).keys()) == [ + "out3", + "out4", + "out2", + "out1", + ] + + def test_basics(dsp_test_file, tmptestdir): outfile = f"{tmptestdir}/LDQTA_r117_20200110T105115Z_cal_geds_hit.lh5" From 8277439dff6b7ea39abb32b81491a635ac850289 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 12 Jan 2024 17:43:29 +0100 Subject: [PATCH 082/191] Do not ignore DeprecationWarnings in pytest! --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8f5058ee8..4d08123b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ write_to = "src/pygama/_version.py" minversion = "6.0" addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"] xfail_strict = true -filterwarnings = ["error", "ignore::DeprecationWarning"] +filterwarnings = ["error"] log_cli_level = "info" testpaths = "tests" From ff123c02caa8819a4aaeb1a78d5492caf4de0aa4 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 12 Jan 2024 18:25:14 +0100 Subject: [PATCH 083/191] Add pygama logo to documentation and README (#547) * Add pygama logo to Sphinx docs and to README.md --- .github/logo.png | Bin 0 -> 124659 bytes README.md | 2 ++ docs/source/conf.py | 1 + 3 files changed, 3 insertions(+) create mode 100644 .github/logo.png diff --git a/.github/logo.png b/.github/logo.png new file mode 100644 index 0000000000000000000000000000000000000000..36aec659ada7b8a52bbc7d2b8939a17be91c0bae GIT binary patch literal 124659 zcmeFZbyS;8+b>F$77ElTUMNyLcoV#|Kyi0%fIx8xuB8+xQYh|F+}$N;2@Wk<+zC!` zf}T4(&ui!Gwa)t1+I#=?rYkFxJ2Tf@^P9_OXn=y;J3Jgx94ss>JV^;rB`mDl3s_j# z_wL;Vo`jWJ2?HNLVH%Rgl9EhV_kb@z&l|U~ZUC)?+d%X0_C0JYteZD(Vci0r`vLFA zKpWEs^CrD<=lU}kXp{Zjjs)5afA_fwv^j3v#KHqU6M;7;(0&Sh<^gY0|G(yH1lpL_ zz3YDxKnp|pAD{0eWfiD6K^*KLu2;Z#c1~_Sb`CxcPAU!v9|t!d4;Rq;#`k}5grPJ^ z8AJk5`E2_3t%Bs+w^TNE*2bn5Ff1(RM2|2*i8h6&zVgbMN=#{v4{bXYk~Y(*L$rLU zRqj>3$;W@mcH#39PckqleNM~fk4fqJ+(JRVPd?SX%a0I#2xFZ@z%iZgGY&oHhS2?O z-UcU1DsCp3@;c)ivekX|oG*;w`39?mH$;Divfni_CV_`XezNwMRtAsgCYqp+FEkNm zGX}3;(@{R% z_zbNrK~N)W0~pBJ(gw3(SOUV%Hc&%zm_3yN%*516kb0-SiJHpPNRV2cN0wdI<}K`l zsf4Q?OxaaV#n9ESO`bJnGYaf3A2Y%Ia^v-!TFp8sjuns0d34)IvB^0(M5md`hC?f0F>d2~vNsx3}SAV{>wH0y%Mk ztnEzLICyz^+1NSRI5}B?9;|Q|D|@Ijs}-CEL*g$QqA<9jovDqzskIdqh9=a&+QD9s zni}|>>L2`B+Q`cOle`uDZxjGL*qos@Y#bnVHcLyke;)z27jp!N{LRq+as*t(#RkTv z1cO^U*crma9AQ@WH2+S*$nc-zZ5-?@u9stE$Of~3Spr?*z^okqF(pRh{y72zfr+W5 z&Gje%?EmO#Z)*H6u>ONL%rDo=`FBQu;s2!jAHDyv_Uq0-FIibWQENj7411EIg4CG# z`HZX$O^x`jn>=6+E&~oQgw>D-24>}k8iQF4xH!SAT!uVcT*d|*ybwSnk2kV^(fM4lpa!fYXo_!oy?0%>&_v!5{|zMnc}s z6p%`&#lLffA!P)Rdc_Ilff{nKv-0v77_oA5au~5fIXJjk4UNGDP!2FJ%m~JQP0GlS zPu$wh5(;dmsU_3|#%5z>a{U7aa6SZ)RD z2~)O*V$kFOv-7Znd0+8zaq)12A-wz@x2~#*QzsvQbzcfl2X8YH-zrI?SUaJxn)wNRaK@I;}0vzfH z`)j`eT7Ugy_yKBV0t2ka-{Sg@=cfM)QQ&0<8$x(F*;yeFumLN#0XK}*;1w6JiADwx zBN)^e3W(D+3jd@Iw>GwSg4)4EOaL4KtN`)6_E1y|*Biz7pS_(vz%aW20LIGB!OFq; z{|1ch?*+4AY{oxUEWq~v!9?J?!@n6BVBBBN0OJMNLbiVz!@n^D`29cm`gC^wJxp4_*Q(1c@Q;Xc8~Ed+}I?ncDH9jS8{ZG zBzkOr_va#XCyp1jiR$e=?saPt!_*M|`})6$z(>Un1psxVA0tuY@A#b6`roR=hFWwy z5_|shnQtV|=&G4R7Jd4W?ORQCTnm8UU}4D-h>&AprT?thFJ`JvE`-}D)2ZH#&&Xz)&tWXz zSY7SWaZ5N9T>()381eQYHdZqOJ9<+Zh0g8e-as!SiOWxsY!xTT!UK$rqDUDM!JT1> z=w1Xe)}O$MeB!*>WL7(JnBLSxhrz;gZ-(;u(hyM4!n!Mtp+xet&g`(ksK~4+kr|w9 z((l!qO}KhiTvPO^!>t!}68m}|#PONn;v^=0S$S#smmJ6-n)fXr** zPY^6D@z*_NxZyB7R+IMrzHjiB1@)Pdx-{E8GeZR5B6^e{JiKyod5l%cwa#s}u~_Kf ztCv~KFirZqkj+^H^sLxRZRrC*t65YG!xOuymYr2mp2G?bQAhcTj;${2u%cGI!?vN< z)pO7_xgg~m3Ok(SxX(75t8Q`bsIN)WYop0YV&Zr0V(z;tL1z>+Skw}_QTG+-f3b;>Dkf)i``cNo0pE& z*&XivZqFwpDvF=1I7tlBC6{%{^57Ybz`KbSQ5xN+Z+$pC^sDl$batwKG1cvP>h|hfD2*LMY@>z?AV#Yy8>cI{F&Lw- z^o!Hbp(ij%trntpxmh(#OtXQ#_esti#xpjRALiYPXGQ3tkH)6?(AQ5-b%afR$3#dbTz#^Hu89al7gbiXT!BzMBaF& zc@Z=Dsj{Fv5nJB;uk5#`5BuIVp4I9*ZM)~ilmW|nUqJQc7S zF=^_-20SPIv#`4ku(j2Tl6UUjp9`+s`;^fjn&!o$5h4!E6KN`3=-3JT) zcKhHJ7LH-0Vxb=>=t>4y6fLwiT@IUQsTW!~D7t$u-6c0M6V&%)qS-)mjw;psdg0hF zSaH$9pR&Eiu~WD4ws=Zf5+J}%iV?$?+-wr3F;geC=3gtlgiP1{P`d|3P?{#%L^ z<<=d4MTQWaKS8<#7xvX56ioI$+-B4{rbki!IT!-yeJFTiPgK+T~K77-aHb zLc=xrlkm|eLr@LJ_xEWFY}LgV%U&}_GwL4G?TRvg&5YTIml@1fm&ojwl}T_p^7?XG z@CUaK7q3P5m<nWljsRlYmWzam)%hBe2DVwhzwszGm5zhV(^ z%i_@DRw4UPp!$t9rcw=ALe=Ixr!!UU)wV;q$?v|ZD}0(@?j#O>E6>`^O?j0 zzStNa8CvnA)>SO;cJ*Ypm;Mmh8{Km%)VJ_saiQe*7<{!E;Wj3G+MRMSoll^Qv2PFP zku!i_isJD)Lx*y+HT_vJdI_G@*hu+srXp5Z@a`U1gv1a*71k$PXpw9O1599jO2dX5 z86z$**bf(QQSYZ}mM&82GaZ)K!5Rbk(cDYK%7(!JUJt~4X*CTIet_=AjP>dKE<+bx z9Y2u*+~CLg=9H@&ScWt41n1Ilz%VqiEb1E<2tvz@J@mUQe2qTbwPhjd_@#V2eS}Dv zKJYWo#T>9medT|M)g0^i%T4%V!4%cBYRp@`YBa$k9H;aRlx{UN3HN%A?8t( z9+MGIF&-+j^`{Cj$SAmy=zQerICYH(abjp^l_y928Fx_gPA*G(TL3Z zY-spGe%9-Zk`~4A02nPtgjr#4X|DZ3L$%jQITxGDcs*SbM3YNAz@qkCG;m>FWq71l z$JtcV1MWI`%V}z70g^=vjD%1Cj^ktL&;^C-B)yDa4N zzJ34bEHqeI{!IT{Zc*{<#j3UEBf`BG*E+V}n4NkX7~d=pJ6E-K-x=mHX*%Z`@jtyh zH#*`#GKF<@;ojt6Ws>vBO&F>&XwSFGkv1ti-f}%peM9~%7jr@-M}pyf7?%?Un5&Cf z<`vyT6s42xd0E7Ghep1rrd)E$!x!XIEt*~~)1S)C8${PzJT@pdzXsk}`HB%n#%p0T z3$So;T+5$zY5x?{a7hCSQBUa=LLMD|F86$WLs1F&!DVP63QzB$K;2G2Ws>3xd1Byj z>th5aE+DQ9hY{A&6}z4t;ud=@`s(Lq;IJ)ipP{2xL)V0-ZXC+q_-PQ0702y<`hJ)6$l8wFuXUN@n|;meszVeTN`#FoSvsEp;XP2la)>4 zaqIj8E?jz6zG?v+TrvVHO)sj=4d@L3Wh`=kDts&(WG0ij8BxQDgSA=xk>w7&e_0OT zx}d>G%9E4MkVWiAoJmEAX_qFWI*S!w4eUas6^OO41Td047c8YZI^5^An=<>;ack1b zzF##F*j-|mmR}G12P2^e?3~gsS%v7Ih|SmeXpd6joVk*#Vq5@MjoS^ZL=FJ2^*d}H z7upj=i20o)cwEFp7qO-=;6Qp@g~WcW8uo@~(V@unUF|2WclcyPfO-Es07AWHk}#%X zkiN|!S+VwcVD)7ZylB(txZLwQRaDsFt-vpeJ&r@iWAxtU9RisF2d8zcP8#b9A3Dqa2W_CxK9BIcro)iO2q8sGok~4J8vURxTlSpHzqFfAi@9jW)*&zE9lr=Ze44?z_gE4bp2xA3TlgxKSJgW!=FCjS zZ9OQ_wN+%y=npATk(RNb*X$)8K{3I;$DO=;fP=8b_+N3+Z;a(VMP6gWfXMB>1%>E) zmYtE91Kw=!31b&AP5_s&RR7p=A*1f^$l-ux(lrR*X}v#?!?=+z0IjSB4~Dh0)#UlzVSafga)gNwA;g5ZAsFqF5x~F* zJx=vH>6i*zX{RiVt6lBO9+p6RpR0(E?lV&N=d2VItK!`sF$uX^H~KmpyQ>mcF)__$ zL_9Xg?&MP0H#%Lu=sl^~0C4&KCPw2P1ah9F->*Q$<02Rf2I? zHRYwvpLEmR-HtO2@v|LoHdA-X*lt+~F4t1wcY>+Deb+OcaP@~~B?wT!*!LI(Y(t)k zlI3$tSril95I;j|z-Q_&cSp+EJlg$fGv!=`&6nwvf%xn}5ZC+2&UbPn zlKF9jjhhcUGEIWbhLQtQE3(0AFP^gpb3s8`iMghbl0&et*Y9>)8DGq<+{NrlNzJOY zQ0e=XcDA!bco901^rf)Ex84%SE%B(|lDcBqGVcM<{2nacoTNln9*da`bOherCXUHT zs(TPHei?&yFEt5_NDWH2!p*5qbO3web|>fc!l|(i&B&Hgu21?UR{QdRQ*9U1rCu z1410E|3LN*R^*TPgi8mxjXdeg35^KeOux?krw?194<^RQ*08l*E4SaJ$93EdT- z3rdq2k0=6i?key}>mRpJx~_GPgv;?Mav?jVGnVMXpFT942>Yb`!p^MQl?nImKx#zt zuU;H|bAqSXy1T{ewE6Ve6 zu=c!7AN`rtT4PJ@#pejxMKoY;aK3P`*~^7i5mjEkSM=~<>HHJy7o1(q%Aky-bpDc8 zs#oOFg{G`od2*#J4g<=3DA&$z?$gWSzA zVcJR=u)5r}bLevV(0X0rP_ zR31Jouv1{l1O>mHkKH>?l6nQkc^hmZf4TZL|u_^4qbbYJ}WuG zrga#DoIom%4-lsVPLX42j(9dPVtY1X?5IfM*Zq|sbH@G360LyHkB>yIsr7iIRwCmXwQU%S}$h8jx?Tqnx-zKamP<{PA_V&{$&k9BO*>y0^j^aE5R#*FOnKqYh-W?fq-me109U8pZ&H70t0Bsa z3Q>}JzW+9xZWNa^2j9lPlg|)@ZYH{f4TkUc^B*Fu#hs|97O^%w`l(FS;kx zW1cDRKA^!z4}URR~>G0InEaO@D+#`yFmM6Ifb zaWR|Ii|pMzY2J;= z---21ifUq^H|Kbxb&f-D=~Fz`$eC_D2rU-)mAP#ou=MIab6@cRw!4MAwGpQ!QPIf~ zs_`UxZ76%_5F;YNfKGi@2Opo;=5`~=qp(hwN$IPzy7Mnv+js5q06P5!1@G{m`fU=% zNNXVMUB#CJyqk))Rs%_yVszY>v4u}sVArx{0o0Ii1SwU>`?4N>RQV;)r+=75Hak^k z)(2C$MXWbFGnG!EJO2}9)Vlr&>7C8F#qwj~r=kFx^3aKQEgU76g|SKLlp^F*Jel3* z*;>L93OF+M`+D{lgSjX@&yWlZH%;2$V}|DjpUoz~TI1goJY46BEha-g=jqw)`lg&O z(a%D>F5N?~!Lhn+`!OTlR9cTH10Ioc`(;`8sHW<8-3s^})Y&vQnJa__2loMVC``c% zqC#$$5cqi{vv2JNCBKgS#4w3kQNu~#zIDyKAKmrh>}kH-q9utVg5{9{oP7u1cv(P_ zukX6MR1Ol%j~pN&+I#v8;%#lYttLe3URIM~!d>4z798kJWcAyQc6bMWi7k#7zA{@; zSy}oR$WfXZfGkoXJWO5Ca<}K)s4CvzJ#}6@pOfeEQe3eENKdC5g%@<&wmE>jx8roV zD0LueA~Kt(Dhqbkxq#o^wTz3~l1R_<*E`|UUojxfvd=qC80?cGjLd5Z)D5F9^$1R^wvE-X^Th z8M4`K^{Yj(T1XAlAFMC0Ewg!)!;4&;mHjmIsN1oz5??$0 zGS!@(OHx)&R-BUrLWjetWeS{9m1o4VUFffkxI<)MD|Z7_f6`eF#uu;7E8m>ch$`H_ zoKQZWzL3Abx<7CO1wOF7Z%T*R|uun!uM z2Hm8k@J(%XFFrSlC%8o{x|z{IKv;Qblf$@t%S&v(P*HkxEN%Vsb5l0*V_@ZPEms-V z%X&cd!@N!dvf$~8ZEx)7@6*HqC#3l4xh?UrT;{te#V(nmtly<>`^Af2=qNd=WG#E6 z5c|tPpxV<4BqnkMecf%Gb#k87Lm9ZXLuKN1^hftn!A0&n+WMS3c>CEZjiO3XMn;12 z$*3M;8k%e~H_@R6L|!?CRD!}ZsPK6VdGztqcd4{A8&8loZdD0~;}a#WsNZ3de=sFI zLfS>VrXV!hIJ3xKPiT-GpOK9W9wWWoQq-3K7H;r3JGwango==Tz#D7}rclW#QAg#Q zuKuo_?*QSi5&CmvrZT}UR|cNR)~}?IP`b6Z59zU)c2io<719qJ!+J}>C*A?Ws}3K^ zoL=M2yg01Q;9KBq&wO6Ao~WY|X=D`Q=ZzIY$e+tk?pjn6Y)#0&cD&WW_Sr6Db@kjB@*6S?8A}T$h38_ zuiYPiT*qweW%27EUVU8??oDdAeC8cYA)q52_vxj}me~Q{@Iqz&0G-2c7-4>ecU;gL zUKA~_U*63DL19`4!lV_I+j$cBGt40S!Q^BK|Gf^dVpxQkvRWl$ac)6@#blOj!t?P) z4>z`JXv(jl*#V#_D=;(e2sxa!x9$D*JlK_oknt?nDFu8%NF;M1oQy)8jE5fj+cX9X z*7kU2ZVeh)r^GkB%_~nb3x%&cX6haei>2z2OWLxKzz28mV(%HC^(hz0gK9+m73p|kD%M%uM*ldCCWngLuRgmV27ndHdPmw1FDj~listlptH3cy9` zC^-opxF`X_UtZ7{omlkiyZJ=1;L?|9Mg4X+-XL>-wK)3s+8Rx2T}pXpwer=Uk*tDj zo%b8->+AT|zf0yzEhnoSlym`g;sQ!gi+WjkxfrfA1~X9QUewyP>|v};h2~dPf+|na z6m~#w^Idz)Kq%&e~;zAv5ram#z@Lx3#9hbwdvNKmOu?kOHtUS zxVB>=$aUSu%er>CCq{o{aIown#t9_y9$+HgBZu|?(Um<>zUp=RWO@lARCbS9lc&+u zkgf`0x(IbifVQw58mvvGq%JwI6>Yraa>+P=>%_!lSxt&~8^64x6y&6kkp5CE(?TsV zB8a+u{ug>tAQZjGJ})X4NSieB?HgDkCwVWJ52tjLLcsHvU$R2H*0#A+e@qx>xyit` z`}uAMrAZOs=|6J$@RJ6+LDfyj4;`ZnBuriM>eimE@g#yF`$X1;y`e&vY3h1EN1aDr zkJ%aamiXBWNW`WYpYl6c86?dlp}n-`cD^Z^zO$)^C~2%Jr=ERjbx&C$ei0JTCdv1k zXanqq^Hzr-16fhbsCiY>L5-@Y=T~{x=!HlX3wnlF2+}{NMl1ju)c?@+2lP#+n!Hwg z_XtZwq)-xyL!5ODr+BM#X4WevZ$aj?JzH#V+#sZ>$kuW#KhH@wjgUaCXRgdgN;{0L_yv`zFOU$y}Y01MzE%9?Zd?I*aK)9|)j zvR8t$f$CH^r-Oq-{aBIi*$MJ1+1fnt2OjLLtK&ZRTRH{bXDAgsH5rU*C#t9@bfH&m zkE^ir+>)mSyENn0SJIFI0)p83X>T1m6OEAdibIDzehFIJ2L$BlGu6!vwNzB+F9m$! zS%WcQ^{JWd4?iuH8WqI`nV_Kr+4X^?uA#bJ^M~Q9gg{N%=I&1#{3ma!%5wRm@_E&f zECLrMT#YHc{qpLsJg+)uviWjA;uF@5_I==ue5Dwv4mR~u8EXjK&n+-2PH)7;zmmmS zX$XOEg(T`t5ZTD}w+FU;TF7AlPWF0cquD(V#pII2vSAJZBGr;;@ppl8{Mh$4>w@I) zNaRNtnI+@k1ADGeE!+!G3`x*T}1)w|Oj|5V}b0mMPA1bvxYNyjry!pN!&!O6s>!;4QVzSS{D{EvJ3+xw|Ok#4)<%ZO8naj?C|`gVAeA8-`+lxcP(w>9jo zoo#Q6k8Mk?*SxRbw2yypTr&evtT(dA++029E!wc(Uvl>9@|aA$P>WCj<5I#zfPxrJ z<3$JhWVZw7-40ADmL;~^NUU!|J6gw)R~2dFa=tMpq!T7P{J`#E$oh=Ut=*^jgN|$G zON*xpRyDDA!ZM@kGfx}Kq-vT`ZAE>w2loW$U5%oB4t(21vRFVWPne{eLQE@$2 zrgv18_4F=1XOB&kfO{3!vj@Bo zYRn>?quwAVTSHc_e&*X{>5RH#P?qbN^c8hKta z`DR}sc}>LR#KoO0AxG7b@@>FvnOz5ioIr??`m}A8G7WOM#x779)_ZO**5;H2LWGl% zMn)-KP1H2H;@oK>#A9(plrnF#UzcxQWUDWY}Fb<@09 zXGkeVmcaH^rW87@vwkHO8t`1O`oO+`gPcc_KdoV8QO-P7 zTWonH^RT71+u`UP*sWxe4gMh4g`aald`oY$8KN76FL;j-A`j22GcHt!08w-XL~*!0 zd52O^4Iv{|DC{tvNr*NO06VUb(l}LS_<0{~s9x5L&-pQb>fUazBD*(1>=9eMZaWK`rU`sd5HN9oBewLEqgIT}2;gS4UsQ$&e zx~iq6<2WfrJM<*&S*ZVzy_;udm7fFC2jbRuhCpc_ zbN=8>j{LHoF+vxiq%|HJ$uk(;mg-qns4h<-WE*`{b%&QVkJnh^tf;E0iYHX)bd~bj zj{He|M=svs9V>Arm&Y?&&eh*!W~Wk;@~uKgNh%y^6i7&@p54RLs0_Yp9V6EOj;kOn z50Ud3BWf8+qjguAFWx{5U-omS;9juCqb!G#d^78qnd z1%k)9_s8AxSKTkzy`nvJW3#PdR3Hxomx2!?S@Ku){n{UPLM_}=rCr4t2Rcb}AMzji z$@zR$Kosz1@)R;g(TDIh3iCT{Pla0iF7Y-65_l{>X6o;^i6uptHe5vVT5!X^M#SiN z>WxJD3g=nGo!vn7ns;pNuBK^rsTeDM19_EH8(L=vt(ksItsIWJGp7G)an_7kX~=)V zs-QUfw1Eut#5}o47xxpzx~(aNEr~DF#knmsRUQ;*f z3dVkOdO7&S^f!!2#z;%ab)Q+?_)|Bt=+=b6J>3D3Hjq_a1FWtpk z5EzI+(KAG6ybI)Vkhe{(`i!^k5HNVxE*!Xe6wc@(;zb~OlQ%f;XoQrtNN$x*c_^=k ze9~=*cP&}h<8wHb>426eEllyIG0=LazRT|xM z%oTk*p%UUOCEQg!Y&idPE7)upB{in~v%R!+=DC<}15o@QB`{*4=;q+#HWAeQohShM z9C3>Bs;}YA1kMAJ_)jB4fj_x)&6raOPP*wPQ||7Q;Si30?NbI9k75Y z8ykardCw3d%FnLKQzQ_gO1`VU6K`U)RZ z)ba8xIg$*RSzBk^^5{4UWJbyj^>{?x=}4r<-9a4Bc$ zuUMY5i(1km_&lnQe(=-@HtERwe&u@@6T|&2s4ZB$mX?-QZ+{ZM<`x+i*7qOx78D)_$e<%7BL?D= zb1a=@jnZSmU^zCElznSyD9Fc^B9U%6 zMm8u6ga#yaO%U~jHc3oa+)p_+%MXeqqFts7=Nx_92^Dn_AD~d9%|J=`%$(|&-V?^W3?_Qk&JlSxLs0=i3XVqs2_^+DBYHacqJ@#xyFOR75a!7tvfOm`KcA+tU1jql&w!iZ;c&YbP(qE7kW!3OHZr0fFqe)T;~`;N?R9b@4{m$I z<{KA#xrHD?IJGBENYiEdH`turXy?luD)73ji3<2SfnriI#^EE5iK=kI++)&+gqIrG z+dHVbc0irm<)mE2mAl=SAK#+1hHlz-ga{Q*>njMnt}v$SEBF(&mO>ATwjYbkCj zA1ea=B~d_*$0#imFMy~s>%If4EG&*3;*|7<9h78z3Oh|yo*_|=R229P*FX)12gAF` z@hA#TQ4u{--ICoh8S#9zG0UXc15TjJiPUYu*#}ORhBGzyJ9(=$E`s9U$2Pq1@Sj-3 zx%h6K>GZ5gEm&6QsELrl!RP8lV=$1g1Hlz=mlnp_r2%3E*e7hLJ=C~ts?&gy9J>jLH%GW%+g zdG>L%N(oRkL%?11QfLv$4O!!qS8`bVdJ)4qzRyn&SfPayXZsp6szvm{x3&_7A9A4- zEKGKTWp%G50Y8)eX0xIRJh*d2REc0Fx4&}VJoo{f-utO5e7Sq&*@5R|HQSh!jQIoY zHn$yuU@JXpOY0i z#-5m*9Qq~{66~s&3ZALnRZj}%qZ4FZD?(XCFcn)e6sI`mMnp?VNtJb#ty|KBG3T^;ea^(evOot(yF#3P z^|-a67KZFu{~;;T&RPZ~;=^vSumZ|X+{c_OHQrE#FPU#<)>Mf2S&vqV-O)U3#bc@E zTR=hA2&gM>OiWH5OV(}er(C&HViGlmWmaK!IpFYtnR6r}y3@2YIG#3>OI4O9K&D+r za(`HuXe>tvJ8|l?tsd;P{IkkIpI~3`46V{pymPULYlW~7+n09uBxWDXf%Ev_3t94h z->QO65kHQlE0e{bvSM3FMV;Qab7iUy8`aWZn9#QpNqp+6^<;g?%TGVAkR z;fo<*ic~3Ji|Nj`@5qqD93sdfYS-_)BZ|xE8j(n~w6TtGdae7Yq;Ac{$4_|XZN&Ngj%LwK+EH2WND{_oYU-BuJK7aKvRx)Uj&(XsO8=a9AMw&9gc#YX|jl z1ZHD)ZvmIwk%p&g!3|@5-Da8|Z|+++FP=puqoyBsa0Fp)yCuGC+`2XJtJZBODSSIJ z1H$r(Y@l9S34RewH=?*S+8G;VvRW9or*|gK(=UUKJnQtPr<{4}>-$CCVj&<+p;1=h z%OfSzI(tpK!Xspq8B^5;LA#n|V7$~>oL}lwzYAPG=f(!3`1w3yMS^X%500we*S&pe zmcy;CB_SI)_$GeSQG~}&=OG;)oVEuUO+xgk&W%WH~xd-#?CH0^2+ud&*OTpx4r=KQa*91 z-l!L|4msb1?f&F8_FpvN;Z74cQT{Fm9Hbe8%^g%6QM)A;YBi{5VRO;ZpMx$;r= z9NS;M($s63A+40kBVCCpmW);f$z<4-*G)QIirTTvv5Of&BZsXzYDb1`aY>*}AmSQDRof<-Ex}trf(C>2FjxvpvP{BVo7X zljVH6U*@S#iQXB?>JDi}7;N-KbI976MNSz>w)`?jmAeUQbqR0{3rFjmWM8Qr6nLG% zdS7;JsaaG}@K!+88%^{@!uz#{As0hC5Y*&;wpt-y-!ZFsVF8M3{OTSv!_WR4arvj& z>(%X=6NCiv_|_M%lPNppIlRcF%t7p0ROKU zcX$|Yu)Cj&)&a%;aw=6tFHcy{)L574JbRI*D*i+1Eq_W<#;t(&VVwN z&4qTmI+ekj6kwAq#g}7=aHNVfQ$%ExO3`KV5Akr1{>p>6E2njWxn#8UaY4#q=j!*w zx}{;wRr8&YKtV5ZCHJFNzkMM)-6AXF#WlUeuLYHy$nO0(9QzE1`U*#}k-GX4jV3lZ zWhf-euu=_J%c=MI?fs5aBYj-T-&d6cI!-<6O()!yfItvuS?fe%5qknR=HnRh;}Z&# z4_X5y%dg%j4V5Gf_fO4~dz~dXUCz%Wsp#e9=nlvB4zon|lG68uMamd$6r%1VC+0^C zQ|D@R+%-3mnS7cbghIP6*NUscvb_STNBJk-hQ!?ypJ=R6a!WItR@TTkgyy$GAwPg* z>-=*A6(d>62W&a*<7$1kqrni%gjb5dJ*bb+I+N_%&7HduadqpXXlbT+X=SUBqi_4lrX^Of_Q9-gqsN8SbTnpql?Q>Fj!KH=KN0y0%9kB*^{Gu}7-|zyCU&0R z;xaDLlSo}BHp7f@Sv6JDbonMB1+gRhu?6-E!KH@F*Z4HRso2Q2-KpmeFA(;r*%I@$ zA7R@yHc-6~j!Vn!VC1cEnO+ulpJm?J7Anb>-~pt^Z~l4cV=NQ7hep9M5oav2x=iD*{6PI3bCDR-eCr0X?_#k$joAU&f=p6qc}<<^I_p(`_|x={b26& z7PG_i&ogLhdbbI6>-~<}MV7ZD@1|O6bo_O-s*iNP_d(Os=3|BXH8f{qQe?VL?+1oN zG;r3CUGCQ{U)eezk3P=>)RFzcv123_1iny{Q}TOq#Ob6N2}!sxrPdxdN?OUYe^JYU z%!kx#a*i(JpDW zV>eO4KWJ%AlPRFTuO+w7o1{$MVF%jY5o@;@c@*)=MfLadzQbyBy%J@z)@O%|ihJqE z_OOK#2F%%bL}~A%FHg8hIk71b%hFAqP7Y{K9hO~ujyib@xC2$ zqw_41m}H`kqYdQm=KZ0{aAIkvG~AjnQs&@wxVS#L=R0kFzEn60U4{|qH)y`c*#}9D zJf|MEWef8k(qyl2@;#|N>@Oj6CvSJJOm-UE7VFjzyqi$baf&f?n%ja^i$56g*StT! zeh*GixcONjW*V!f)tcy4NxkN?do1-O6?7RQ5+5H9Cccq;;|fn)|7{!Q7G~L1=1L){ zyp2G)dwB&bl^UAHi^m`D3wZ8{4krF`--*k2o8?`u3QekjOpI2fWKvO6Q+Kz}@k+l| z`DiWEp&GrcBCqX=jLKBb(tkjepiOmZ&>Pe?&8lxcySq8D>z))%fis|S&^YjBUc2ha z{adlE=_%qAgzSYAt!^>(HRow82f~Nle)u33IaqIf&F8SsUpM^N-b57j_3;`Twc*n% zbssLSvP~V8Mm!Urj6W)UEDF5FJ9rkp;Dm8Jq@eY4!xvCf(ySf`h)lWnolkqmEpynS zx?-WgM$=(O!D)Zgy-6UG#qH=9Wn)&_(Q1Mvy?Y~!R~akvPjmJ=tOsawqeqJP+y0t2({6f2XjO6TuR5v549j@jdnVU!FeENo=U0wvNU%$L;O;FWc?-x=`zNbz* z@aC1%142oBN1o}4(na*Z^4@U@;(#VavFZ#rvw%I&qGM=xjqRi14SLoJo5R`KoX$b# zw;lIe%6Mb>W^d!%zdwI;;#Pvxj~zncM5#49zRi^rZSN{mKi^J+;k8X$`n6vR!XAQD z`ZafHugB!<6fsl&)%cxKo&+Y|`Tk)1{_vK~ceP>PES3kpZ$t49E6^mLys$P&0OE7a!ro<;X5DKJIckiVJk6Lz==(dC(K?e5S_ zErg8new1x_^FiaAg;&wbn_G0yo0L_HHg#tI4^v+mP*oSSduXIXxgy^Y>rLDc{- z7|hT}*o7@8I$pD+C9}V^xDnVv0aD?r!0lo5Q5w$;D9g`X z1a13FFrk+QMhC6-E%>u1U==Nwl-CpSy5~lm;k1oV{>1%+VDGzrRN$6NnO}~0dUQfJ zRPIs6mmqRBbMa0ApbY8IoDAPy_H-O;v>rFfIDb!F8&X%;f_uYQl|Qdc`CE2I6RE0=S5e8}^*H2WF*pUWWH zN&IdT-7nc6$QyYsI}G0c*-ZAy^dr;$h;rN^En;JBD0_b^uivb}pzsD6heclM*#em9 zUa*d;Gw?SW{QecOU_(FU0n-rVtF0B~@5Efl zr)Yb~@5}8^#T45)&T0sReR3br)&V~O+Xsk|j=y0aYuLX#yK~!~vv%eGSuJopAZ@%< zuNX($|1Y>r;f=148!v4v7l(pKgQUsw0sM4*pDsvxL!S*suyQFK@+Bj8@xw<(Ec=Rb zZ*7bCuZUc?S(FLZ*K3DIswx_GRSBBNMZd7^=V+*rqhEm&ChzPNNfG!$*}ybV%tnVQ zp$|~PpMiaoH2n}6e2{JBWYHI8zRL@ZO8IJ1fxm2=li0OIwqCAd@t9~tKS{R_>nG>16C0wj#G;r3Ybzwm ztg4%eY|ZINOB1rcq6t-;PLsMPuSyFv&g1{FF5iGl9KZ{RpSh@yCB=E$r{CLhKbSkj z;L?#Z_^X)>qh{JvViDqFBM?h3t~j8qVBLbKyJ&SXQbshG3RchZO(nPO71zAN2S?DLGbI*7x2QF>A(j+05=j7o>1eQp zCZ`@eKTTCx&?`5fX`pSe5Y$xYw=;mtCc6Q<7R{TC{WYdJfSoc%`K1`V-%AwmtK1H4 z<4jtNt@o${awI=Hpsy~o*4cj-j6sb!FWW@imlGGYVV5cm01xo9i5S3}`AMb=UR(%C z7pyWZXGde)3pe*CESFSt4kRIl_FaeN%H_LY#V7H0f9&+!4jHSRpWYNfU^k~qn;Vrj5VOvOs~A zQ#BP|{7BO&ld9j*kzA1omd~XNX0CouFZs~kzgpVQyx+_IbFlS2Uhn%nqKRS{fV9O> zGQN9L_~$M1=haSlVV0cqnD8Qs-VNJlND8kg$P#|6^LZR_OFG;`dbvPP4 znETm9b|7kS>t0vG*Z#`{Dp3z!d>d%=_`R!Le0qh77!fKLo^Pyk&Ds2&9ZhE*;226N zT)vwl-hr`_eP`@uGHb>jkG$ABZL(zBzkazaj%C=sb?rqcB=Pp?ETP2St}})D2n%33 z75H$B^;5q{Cu@z(2gOg_(UvXOv9K%zu1?=bXCj!H#6Ptc6eHgAvoOdis%0n=k9=z= z5YsR)7b4F6Fix?}@ha-%^f!AnJaW!sW3P+4V=vMlP@F~XKZi8&5iS*4n z%ztn#1St4N;P{iGVfuczqWPw0ZT5OX(MngNuUQF^T9M{GSL&i+de?{JpZdBfvEzl65ZT}1(>)L#*2y_tbgQhXLt+m~T_b9QiHPE4w z9T)(TNrYczg_>RS^C-8lsc6=7?jL$U*dl+-^w0%NEx;RNqv{0sf1G%~iwPOGd;fkY znkMh8P=&T^h|d?I`Wk!F2+LSnn&E!GcNdfi2kz^8*Pwc*b*4PckpD=crhBgc6YNoh z2<1Xo0OBycr#rc;CeTt7GFtoKrx{)C?A7)LuMOMoqXO#5!@G$1YglXoX?Jdu>2G7{ z$X$d@+{cN2+QL&BaJ)TyzBqX4svpNK_9|K%3y?o!HMu9pDr<{aKO8QVoh6B|{|9LA z06_cLSE1618)@kxTNn?M5Pz$~nQnyA5gWMUWcaDQQUEPU;O(0#F|XL5Gik)7&$>iZ zNNnMQtO)JejktqLbcE{ps_a#NETkiI3v;Jmk-qx(8m``s>Tm05`#RfPVvmUr|jnIDY5n!+OIxb&j!@Je-#YfT`|&Ks7Qi?AKQc!~RWbG+9Rd53rT zir{Xv5`0!R$pFZnUEbvRg|}&DN7W{ma9N zi6$V)6H<;?4y=a49GU2hc?Hi=(UjQP$36|yWd`O}jDvD$F}6BDlh(-5r@a58NerND zSPWOq5gSUfutZ?ytl^a7)Q|SH4L~H8O~XyE*Rp-wAePNVB^G$S?9ddT!L^%L-(%KB z2sYN&;AZ)g#<3aZzpCPb1Un0E6OV#)SUjtn^;@f(`0E2<7;G$1CXS70-z514(Bt|? zd~7*V>$(Cu?edF9@$$Z)flo`yQMBP4pAg22XmWZ{;k7h5HSblx)Kh*28uL2A1Hd+z%nIt8v8A%9->4Mgociw;^gR{_RJ@suu^rZ^Z>q$;v zztwq@>gtHdOXxGA>MQ;xKmj<#mOx5^!FA>cao5inDkfSDpLgv1)xpW*pw}#6y?le2 zK$tlQ<>lf)89#wk7d9Rvk2Y5`P}E?;s9yosATSxU+G{q8XT;-wF+xbur8KBP{D|<^ z&7}HYDuW{K5lr7!pF|on9a?r1!nI{jM+P7)tfe)1mK(5*9(AD=VxAw$A7GfQvfCg; zubW)Dg|oBi;>1ml)xgm!!w#;ShC?eal{#0o4=P1G10z2^{yHN{bL}LMg5@@5R$uE> zz?@9=f7OOg%&=btbLC7}b%8v6$Td4>u~Di}#g_N(kvn2q8o?ehZvD>g_O-Hb9G@IO z8}u8NLYmt@7hOe(QO*e=N+S7UfU;l)gGm@StG)rMrDl?bg5*ztwJWVduD=TQ(?$ z0h6Ohc&J)H$>t;bn;gQQ2O-9WXkBIrZAg@52Sr{-vq<`cqU>)xwP_-uMFbKrb!2{D z-P%A!(k^`pNPkP~G5J(-I*j?(%+ew(o%CZoL)ttNyiNdwvGBU&YuybRiRrtq(mc_) zMwB4f6}Z={yj1vndfSwE`ngL1#NUy_QUcVESv#1HUAt%-dqu>a4w3b6mb5$vZI?8x z|61-5Vnsf&WN(_96?kDCQ5283c-TBDe1*=_yYUg5?CXqR8Z4!szFgTI;}p0Tz3?e; z!bf_gwsS!k@*4R+k4zG&VZp)pa9J$8@A3+_!u-=U(brsEShr?VW{FwaENu8R9p$Fe z26sR`W@g0(0#v-eqG_(tX%x_0YII1fz7t~=UrDDsGyX}Fffn-Pvvv}sO6*Gqn$E=} zB@MPLQP?n!+G6x=XJ#c2Lc0^d17iwmD2*p!j7gt+B28gC6O<95Z2uQaedFoKQg9c_Z5V{p-k7 z2ta+ZyC;tddI%Xn1M8-0x9bcMK!kbb!O=H1{`$$rFUR#6Ls;bVVac8tlCWj(ZP?|; zmC*UB*Qoa8aeS})JQ9rlA?oNSmiL>cN^0f>L?Y zTd0cj6SPH+ie|L6nswL|Q=GUvy(Rrtv z75C+R)}#IBV0N~akbkzXy*(MY97wU&EL-x_?$;E=#%)8{jOh@%qD=vfO%QqJ(M1`*$o~- z&PPhuJyL&s_4z!;!BDq~MBd{4hqBKxISRQ7W7Xpm;joyvn@}2KgnewARHwqa$0pAP zZyu3+%{g^W@ShpM`lq?VC!<5ERvRs=vZAYgZBr79)^+epRaLYh$EU=uI8d(Rfq^R4 zZOuk?bB>6a=S35$m8wcq8?Pxc0%X$Q#E-&3D!wSp!K`u1QOpBsnIk_;iVx$Q3R872 zJAIzwDX~TViRs+$Q(Ra!J?<^{Ypo0Gry^}FKefN7eW3xDO7nW|0f2^!A8)K$x)4p$Ht0+S3b@tBoI)3Yd z%H<~l!Mq?N7`euITsjzf@%J9e$b4Rv50gk5h?z5lFoxtOBgYsU7dEb7T8@=5v3*c# zd>{raKJTa{A?ZHndR{Eg>d+WD@9uB`C(?AOwfoINys%f7(II_?m(5v=2=8VR6w&Dc zhJ}ZOARTB-$zNrrZQH~1`(}RFKRb<>D_+=dTG*4deYara-?0pN*Hr9`5oC!wPE=lh zp%D9(-unUyZuhn@bPV@SCBos=Sm1SF$q+G803ASjsN~iswj(Zrix+ROWOF5lb7Up3 zZpb}X0+4yqhIpjRJ&PZ#J!{vwfef@pb5O=)cvV}Z<@9J2@53bAuo32k#h31dIWH?p z*Z_t6afX-PfP0FW_Q?d&@0*=iVP~=`cEVB1IUaW6C+3N-_1@acQyPzT1!wLLvU895 zigtA&gjDpJ^jbWE=Vu!$w~HxRR%&z3@fC->)4c0>D&7os@dh2_CO{L zp*+fw#>NJ&8q&?u1R@!nsJ_blVJIQIHx55QSz`2znf!cMnkk-D?o7WdRhD~fv$AUa zHvglEgheh3B`N%{K)6XrWKFW`+YWaMKRw2QSPx<6?-3HevGz3K^dT&cQ5_SA0`+0Y z&;$f;#X_p;U<3g5qRNmXGOTZ8D3ER@ik%qA)Lvs(Wtg@&O>*yw1PA{ z`(}q|fTezHKIE9z01YTp6upN0=ViSgFV9D$H_F@lMEbImELfUBb@%@@q@hI^c~c|( z05!J$lxhI$BU2lC6p^J3_64Ttw8?7TbQCUtE;gtvK{nZ^b%efM`|jP}AiYeWPtK?> z;A;ewjHH6dN$QB12Vp(v)^0t$BF{a&R-+aNhoEThyTedqPmzA0>>Vs&K>bSEns5Di z;!CX6CG#M$`{r}R^l!ryUYb32klp|PzPpcWJS=cZM^%@oB_40uo}bv;I!Vwm98nz+ z6Nd^(_<=lW8>e4F#?eamkj2{KHLp&d6*H9`W8t&;TzxDTX=EHU4L4v^`rjBL2OsD@ z--f;l{#_n9RRpheBvdQAvF~zIw?`F`SA)!ts63%QVeIJJcAZ|mg;B_t{ko&3c(3;? zp(S^T1ZhFbx!Jj7s_l*XhC+DSKW}uX?(5qDKtSH1EV}V23%mN^<^wfk#rH>7*hU z{Ce^nkePhN@l*S;D$>n#im;mK%GCBi5o{tNvh7a7;meH*8QKgLm)v47dm_3Y z3rKEsajn;bf36;oli!f40tSC%JZc8Re0vUp0 z{|~IqmHB3v=_3SZ(yDSpzaW>lwTfym46*XxVU1wH3}CCy3QLCW(V4$fv+q}*|6MtG zCDzdJgTg<_IYP4ywL2~}pXH#kxxh?stD|I|I;;#5W7@o*T`<^~tw%Z5wa#Ek*43hh zC4QU`1nUzThfM0s$2y|DIR~t^-$jT@20P^M>z>L%YC>46!StM0E+#e8t?Y>WCm8{g z!5FK-7+K?(?HuMp$`BBWjrwc(eadR{PKoR?#1~ux+XvGC87UmY@NMT~1W(*oR(FCz zvE3w1tx#jxE&b^sNPb~qu$30TW2&m1j8^>QE$3P=d)Pq4VZm`A^tWv-^7>*zC%Z1Z z`7Cm^!Do9V1%?`msulxGWSZZvZD-^#t?$3^M}3v~7+YaTox2`en%EW+NR8_tnB0c} zBKJT5uu#eRrO7fMMH_r~WzAHmTSeS9SA-m)+GlsN?9z%Z8<`YAbUCH&K1n*1tPwUp z&!ip~B+(%*<$-xs>u9hrLigskFu8{(S44ZNwETCVQbapLv~T{A5>;@VVyl&EQ1bfHi==*i zBQ7W2+~}jGTbUq6O7=^+t(!s+sh1@sta4O4^7cHpv*y`ewom#^pxe2r{_+D6@AhFY z%?Vs`ohKEyjZMeY%rkZ>MOi}D8_<#KvXm3#L$B{@c0EqiK$zGE0b2@>@j&w3I#~I4V(ONf$c?5+AA(v&nZA+Ecw?VR5H=sG*cpffgE* z@_ZCJEuvOhL6L@gny7Hwk1>)b-dX&24JRy~TN}silaMK*TGIvmNL_v;y_ z$0cT@vB6al11-|H_*f8Yq`cz%UFobGgq75V~Vh4bZiF za?4x&tMD|HFcU~JML{;$;g7&^ax2TaY63%q1=E0t;G=1JaxUfdu&Mw}|eJbaa1rIm-3zue74(+P>|s9-~-tb6e1p?Sw6hK zMdc-7`9$#gh&U1lXDG<7yvr@b9V8cSxtFgS)4hgqL*(Dls}rixq>b6{K(%6LB+W$YO4>|sRRC|pI8`Y%k(xz2socE@hNJHxPPK? zzrw;M%*ueH5L=ZS*exJyF5e;WIh)i;tl=p0s6?whitn7VqjtL(@Hsj%UnV*s`v^@b z0>HA!4IWapJ|{UMsQ(yMv+BsaF0_aK@Ca>>8ZF?P3>S4S$g2-!nvRQpK3TVp&=VnA zWo|t^ZR77ZQ;Y-qjT7^qV8N5qjoU|;*AXnvM$}ZL%%ZU;7W4LI znF7tZ+N8!C_}ZZ8NZOnIUZ?BcEQb&`%gs#ZDM6?U9eHZ7N&AU*EZ`8%O1PE=EFFCFkz@+i-E_vf{DV-^QD3HoR zrx=9kOVs_1A`{bAL_Wm%cQ1W$u;68!oiMN+&bQiqx<4(wCr|%;tWci2`c1mNbb~dU_e6Xw5$qRyKRzO`}}1 zq*^bpYp*EG4h(5uxJVe_UCn;ux-7_st?UC2{!{6mtG;y)*dQTs|6n-8mD}d1lH+Od8b|TJqBFiZ=4u9B9K7d|oQ3-nhsnJ72etgi1xyqEq@n< zc@swIr~KkInW5?hBLm*JqsTK!_Y)94Bx`Bm3G{ARhf@>NIV&rRwbe_`*H>(@?$eF` zkyv}F8baNT48`cdPc$T(oZLc0zqF(hO;MPb?fq2Rw1*{~dNVD?L6ODCESwu$({1Qp zsR#G~uH(3~V2}w=c^i6!gT>ZXvVsRddJ%FaMv5E zEo{(eCGl@Pqb~v&q(dv3ht17Of|ry0_$`Xd$DnoLv1QhqMOGotN9TYV7POz7lAW@2 z=>htrddcqA`h7(qib?-_79cEg)PcYoD^K4$$5*ul^~H=)P&4K{jRfZ*Hq;8iQjS=B zwhqmX3XE?p%Wkg3WPYwo3Swo0ZNv1vpSgD(y^gNi5)V`sY*QG3O@Y zV;Y&(p$~h^`PSJ!gaOy+m>#MkA*hP7wKy4p&)LS#j@&`e1VWXen>q8;@W-r4321MG<|KuJ8y7p8(2eZ+*n|J*x_qLwwDQeJJ_3Gol4 zqIxHEZPp^YBl=~hX=q7)ztM%2MC$V<&u%dBUY&`DpAWwu8KpZxbt&fk)eH)rtAvVh z2AmDRj?nb!y(2Mtynh1a!xyrM%UdEQ5-)+ zh6F$2^CP~=64Rqb6k8QT^3F9#URf6K++CNo-eHL0C(1%~7S(QQbqKin4R@RYW-9jTH7i2hK0<^G#kMFs1A5*{DmeV}SA>{~^cdoa(uqa(o38$}#`9jUfc~b4 z;6Dy~jU&YZ7Pq@tp`=Hq$+qwVOidG;0newQX-<|Jjemp^p)n-_jj06M;Q0m?dhIRA zf~YS8C5Y+jvHj-ao#d7aEfm#UUlZQvz-IhJ9VC8S!^h^N*lC*>}}C&ZXoRu+>)|V_h!@ZXDy+G8g($6w(+k|(%%N0% ze_QA5e*XR=JpYM)48nq)z6L%<{5^BA%fbBoLpxlGIGG?$Rt-`c<)A1uU5fw!$zb(p z{HW)$baxglal-)`(D>Ca3tzXHnrFLzthSbrWwJJ?cgJQjlrMBJYBOI)vhqBa0;U(l zFGcPaY%d!#nGE}1D!ET>kdb@ZTBbigZ zvikf;=Lfue<>6=8Ts0u&gw5y*L9}n9 z=TnQLp~=0OU#jk$ABjhm$rI-0FGcKGv`~1$L(lKJ!iXXzSBdp1hd>@8=5m&Iw(p#VtX9XGB4b#0d12n?d2$G+nGyq= zZ=F9QfU=y#B8c$W7eKqFQUCAl)%OkRMUVT|b(zaGA1GiHJEF11*DOWy?Yfuvd5y}a z?n{qd=HrHIm?$dU;1!&t1*0G?hsfzJUtyyc1G!2`MaZgIdPhR-}tK)F_hK>ip$o?}yB!4TOa2+_Z(; z^_r35;goW%lH13vULLM1G`!h;*A4Cx+_s*D(&goex>%srhJs``PrpRWSGbfv#Gv|` z%&i>?jT!-m%%x-;rDZuc?-1|N-}`qi$=t2mXQ=W=ZreY)tMgLI%zk{}WMMtnV-sj> zq-fo3Pr)pnTb@(WedT0OOZb&=Z2s3ka2j>BVypH=fOGAHO4P&vdveK%1*79M8H4nqjJcVKsS?ScqTFYN_C?2B}&6h%d?*&vpspAwG+Q~!NDcV@ zgoLsH9Gf9VHy&DGE+Fs{QBl6H`Eb94pM3A_GJl#UfV)vJ>P{+iXcz^kk(#(y_B{aY zXI6DK$8o7~1kZTBqZU}>W`VN%qp9y7t)7XPAVRr6y-Ojo3K(BU`7J)c+i4v)n~UGa zalJi&PBc0d4-v(%jEJ3$z)1G3E&8vKloxEV6i> zm#_UJt@bkm4w33SxB%sh^Fj-j4e>W%O!B6Qi6a8w90_qwh3)R7Y@unYa}7zNvNBfQ z`y2dp)F*eDS67T;1dT7|xOsW5XD54RX1yleYT#tDeu%X@=tAO_apB z7pmX88*IA)g+=tkQsZkXsL0T0H`As?`hy6ZzP{E{+N%YQ{zL@B#_?_9&1gkddOnn9 zA7rH$B?jqMqPMn|*SjrEG#?1P$m)69hl4flio=c*R#SCF>bj+g{jzd{LkNm|eyZIW zx^W$|w ztQrWG6bquB%9|YKyZ>&TZ+~}_(l(?mnj87ld!81=*azoZWEB6heQ40+Q-aM{g$aEM z8jbI^5!isN5aE9q%q*hHbDXlxmvLq{*VGQ4L{${JbI<|(b5lvKm?AcY=p4Lz5dz9O z@z8#iWXmt#r|-x60Ed$^l?rUd@kAgxp6%2EDyh$MxLq;r2`7Z|j&9F*Gf!9aaKK$G zqi~%ds00sC3H+l!ABEOdi#*V;R0wwhKEoh~%6M&nz{Pqz%AyF8Bm)s!{hVQtH_$7p zE#%^Ry?JK~T^s?L-0+fJs~D$e9xlW)Cw(f3B{+#?zD-{Q9XHQ7%jDu+urkq&D$py4 zm8A*=0_)|o0qw#}{}^zM<29-4oioF&5H!{s^2=!kO-Y^BqAcl^mGWD&2+{x9eO{dZ z<6UQT4_C`w?gI4_ygn{wSy2fD_gbb?v%!+1Ez?r}4t#t--3~fNa~TRlx@RqTaby)5 z7-&eRYgC7{bubvfV@es?WfkxeD)|y!U71L!`_|r9e?YIxj-Nj)*3ylu3%&jKO!n)@@pArC4^| zb{Z`cc-b5VfDD{eU5Hnu2YfdVc`&v=@Bm9nY_T+c3#d3yEJ+p9h*K`mXU##AaBYF6 zKqfY*^L8j29x35yBet1BVK#0^*SQ}rZZlt-(|d3G={(;^_#~0f$V0_@D<`SVhQn`V z*Ojt6FV3%+RE53KQD4EZ)J+Bf5TeCCUN=2(0+SbEokwGS6P z5iYte93CL3It+j_coMh+iqFf_%Qe-tKVFf;r)ywS^W(6Fd)443CLKB>m=u6j2zCbX z$=!f`W_EM}EQu~T`gh)z4D!V9AR(Mvwk(O)D@@rKnn0*b_vt5Yh z8L%^;IjEsundi5jW;{irKj%4BBm%BUqco7p?=XH};_zW}4sB!rB!O~R?H=%W?Hb_4 z>^2FlLQtR>m{bO|lNW`@>UTXP=@-<|fw`8}+!0bPtKhSjh9w7l+uO1kMy;b}Na5Nw zkG;eT$AW>P2=2V%@sS?-<4s95zU|mxBQoNaG~p* zd^WsMiv~Xu`pW|c>6gzsdNJf6X+F!i*53x!`)}?FK1^3m8SLAUn?p4L+e~i!3T&1c zCVA~Dw1h5l-MqFBb5ThxaiI!0?iZ2k#k3G=uIk+yC4Q$kCP<>H8LOO$Yf9r5-9pVN z1GII7M+bEi#x6wS4RN6b>k01%T~H^K2_nom%gU$nZy$^TczxhVtp2bZr8?}X6Yi)e4rX#`5<2hIC(0?-ZgEkPCnDpylUiFqx|DM*b} zriuIWQh1YmWzDynz^F;#bPPIf#e<$$j+<|ALLE`u2{sX z2=jH8DVY5W>P~gv(>D2X&4!rb=to~2TN{hRUZ4SRU(UMzF(@qJ7wCn zmfnX&9NvVCf4FN9R^Xke^BL3Q|6(4ILsN^&3DjY;IcFR9=ezW0qV%(a0C%We<;^%V z{$i4Rw`;qL=7JVgh@_~+z-pMDbGkV~Mnb-}_7W(TFKfz3EOameq#K$4q$<1R-zkK8XxE(K$t7!?!&V3eF@XQxMi*-DL`na_!) z=se)wIUY{|LGc}>=nFE{**D85bt-Nvl>M(icUmuLKb4lIqB^da!EEJdJ+&h~i#{Ej( z&LDXod;+mUpQkwpE|iC)IRB;n`t=-BVBYF#VFZ#?zfKmrE9f`y#LGWX>EhvWKA(4W zegbR+8&4RpmCEbj`>O+@oq5lQE2BRzmL}^V{>@*YgxElJ~;_ne_?UB zJU2FWG1M6x0%d~vCs2IPWM8b^-t`*^rr4oeMiT{~kN@>slThb@7FA8Y>kDx&y{;jw zzADFg)wm!_jL?1as@ems!{yN|D6`{ahemSd`^gnoJ)hnln|;dmVG_XJGg8)ddp84j z8zNto`AYAGS6;80VYA5P85nEKB`5A(A1p0aJm2~(_KLr}MeH4?1}rrbcju#rO<0@f zl5)WxzyUh*k%J1~F+dV-$6IeA)5)pL<>bCPtFksH;nfHYibI>aoOUnkPcGEo#^diG!{{(V zXel!d@hD~~jybl1ljibRt5~ilKp%=wu$x0SgP9dllv^LDQ7bB~=hpSt7M}SJ9fFkS zG^yl?WBMWl1z&b>upSM42|kJC(GidrFuUOuKx06tl6AbMSGj~Jemd-BGNss59oBh& zl;JfqB8NnLbh|kr$ocX#Ds=zuK(;$NiZg9}Hx>a3gqC->`eNCJSI^8T6{`Y@dsTK% zPItkL3pG_%ZlM(9&}<|J%`IS17ZRrRp0%bIqePu9(4C6LEl?*%`RavFO&_s+oI%<6 zc4xs&;zd+)z|ACR*2RlQow!AAy={LmVUuY}OXPy3=Xjqm{?;2A9n11Ox*}mB0dF7g zf@XvrCmiuZ1*Zd_gC*eZ8D4(Cso(N_qqEH`brdK@g+jUH_@6f+0%@v}Uf@{D%nuuA z{Bn4Wvd7OOUvljFm?NBpw%Cz9pLWN{AjGx3o|Xpzn+!a zhyhd|FdY%yT;wwsS*{`d7Tx1?IL~=9dEHv&Ck36%nXN$o!wZfwLI z3y&u+8{S&O{Ek4!8d)ZXtP6eT+j{rfh^qBpX#VB&gD&JQYZ!gHY<1{C(V|G1bVs710|XKO-ybV2Bgw87y9hnteHW9s&AIwvUl+y!Rm>Y`3*RipQ-qk4 zN8~0PW9SwbC)+WmsfHBrad24DHe=ffMQ8;-UF2MsY%4(tP0|gt@nd%J@GWg+>SBk3 zgWzd+JK{Z01+_$X`&(wIx&jNq)CLM_A2vT$=zclzx0BD`<`sSq!7KJJmdL0oUBm|e z;1kD4Cmlo?twA?|@dc}p&`h+H*#E+SK(OZPJ5D zswZ`v_K8|q$6D0_Smm={#UZN8UsV=r&a1*WT=GfnoVJ(l%RZKz-g z94t&PVn?tO)0-&y%KUs4etm+Bg)Qg%=lkGSUtW-9cATHGP0RMX;$$6Np8FwV>OEfM z$e<2bkOQR!U5rY_52I;JeW3UigG9td$0PCc9PjSW3#d-f~M=kVcO>64w?5(SEhxOoAh zZ38Gz{GUxVXw#`Tqv@zlt%U+<45?jS3nI{n(Q=}0S(fwc6vYD)6vdJBApIoe)==_> zIx{>*Wr)AjdxX#BhcZ_Y;`qM}iH?N(NDj8=#YzsdEHrsu*c?f(UE1;KM=hKCn}x^? zY)YUXKlrXZ)?IvTtHr)m(5MH2s*s6PiCT)pxLG#T^L3k}s+IB%e$YxNyvtPJz=F$h z)97^*70gd~EV$Ku$`TE_fxT*4pazH`%E49W*uNJD{O*H_+zDJgqEpcg?u*$&vpHeO zXZL(CuOcViv)%}11CEk@o1^NjSY-1>_8secEJH~CHZ_tYAf#M^AG#_*YhS`Z8a5O4 z4OcWhowv}g0`kSbl!;gG@ERwfVYRu`(+f}H$&9G7CFk@Bjz&)w7X0d)^gVlIYSzUu z1(47(hhk$!(Pl;j_i5rt8c;;v0_Jg>q^Nu?%j$|bkf+k|6~?mZbY83TgR(Y!nO)sN zrL7wmi|J&V&w!%Zev6i{{UWt$8kiyCkMh4pUuSrToY*dzoL z2bUml#vvi~uuzoFO-LJs@ag%eBsU~eTJC#<_-B_@Ob-_92z=lmKDM!#XFk{5#NA>u zSEYPmMati2#I|2{61Xu|FtM~K)p!tEzdh5CvZ8mPd{^&S$^?|mn`<=w_u1X|Jfv?xFPc zG1R9p2Dtf(0>QS0aIhm)DJ3n*Q_?fGf3ogt_S{;(M)fs-;WjUQ9w^z`b`F2gMiXsh z_x^C-d0^$ZGtvcwzp=un?jj;C4*6Ttg}3)8H?Nq-1J~w7t?EBmApkeu))tM(JkKnV zGJbNzIW6gOF&Mk@-f|zH=XQn!JdZAWh4g=2zJ`5nIfhJ_-pgnn7G(Ip2^7stz~CMx zWJ6Rd*|^!7w*7Gw`NUiARH>+ogOOiVD-K)9*N?+m1LcVf{qP;1IGaDLQ0(b6n%vqv9~Drg z5>Xep#4VXI<}*+#ComM42jj)Rb&&sb2mA}+98VF-5u=s3TtBQfI)u|`LM3=s$VqmA{z<6$}M_%7u66nPW}dy0wXFAEoBSVgAV$Z$R%-Z!qK zQeh(7t;SyyMIZlnCUGX^7`?C7k3^yCtzPHISAJA0Z!d0iAw#Zvr~)Ap5?|gV9W)6p zIX|d5ag-;RW65DQz{fe7?O4E+G!KYQdb-`>Gjf?V*hzm;!c;{xUu1aKgQdxSaTXmt z@b+8n^G(XR>nLSrlK#B8+#WVb8;fZ5Mt34CWG*i;j=UMoz!lq<+ZGXKib4wcQq-C|7ny}v<2GIuwQ&!H5Oge5Z`JxZt!xY5>7>4;F5qs6<#eR^g#&vCnuk&E zpU#QHvoeX-L@PSq-dBeP0n`C<^ia#v z$UoJvjtbg(kDi&eQTGl#kCf(QH%w1p(=ih{PaN6Jk$C0jO>7<70&{T?YnqR?ziAja zS6~Tj9=tRLPi2Cdjv)FbDTyTNP2(17U>=C~l_=bAYJS%bYZ;D3r@2qd^j8EZ0Za0B zwjP9a7{4JI4nxR*;HoU+*b3DpH+9x|&nRu5={`7q(<%GVDK)}fmbqWeAw)MF$U&Jc zSnw=`;Ah&@ee3hIUd!6*G+ct@HVro8AHR?u`ceH3aSTDUHq**kP{;>6n^#Aa1!1f! z28_{&;~T;uPGWPt^ibySIoW@WRyejXImOfqbwz9}IcMiw&8YkDoibj8qS2vPazc#0 zE~)8jVaFW$d!-X5c(4uDqVnTCXd$Ap$@&XGetT+dK3YquJw?MA$37;N5toQg!OrMN z#9DV1{RnESBGJT=lYNJ1Ti{=T5co|~LP~}4^?d6$q|`a`yUk`lECs%e#`=Eo>gDf5 zFxcb2u;rXnV=RP@DuZMBDYRn}%w#^nXxl{xkLnnU{@!!?BbrnqK5?;orKiSF5BIp@ zdBfUBK5GXz`UYFh+*DB-`+LGds3MDRn>Ix1Tft>uw&IgARTm!04+?S|x!UWuW$nM$ z8$4GyI*}id&Nmv}VY-#$KJ^Cn6Cn*1+;cC^Hh&X32*Mdy585w6O zpIO4N^mNgoN~k=>_^NYop9~c1a_cgmz9cp)u zUl+;X5~*#JRkY8qmw%cGI@g$}s~H$_Advl_VFz3MR8%zOs!DT>L+KQQ>>m0OqQV`k z1)Uy#?BPIHAf%zB=07tmzkNSfbI~dJ=JXnb&xV~@Yu(R+uJ`;ivfc{!dsnmfpi~EE zvBh0^vft@nvb6j0FKG|EDO$m9aiKf3#j)bwgs5%L@cEx zkpBnuTY*UUV4y)NPJ<8}j}!JPFbI)O}~w)iH&AV(NopBtBo-!b>yN%u8z z-Ks0hxm?fvegh2T>r$X^xoHi$*S!@*>#3~Zgv8}w1 zQ~Z3~!BN|;LgGRWv9Y;|TO03(G!=e4v-eS=mH6nHCGHdw^Zeg9OLa#GS=y)%iBc+R2<*?)vL3+xAjhM z|J_qN+35L%WhQafA4Z|Lq!}j8TW%^_l-HaS)@Bu%R`g=}FucD0Ku<=C`b6elw%&^e zB`5V-W@Bdi+(WR({@-b^yo6)St+h4p$&Y)v##4B?+WB_OPhgAzJ?(F+H+--?Fcy}< zN>fhOzrabVU6YIZM@zu}D*zWHTX$7C?gOLfuA#6)iC&6c<&^l|zvu5Njozwh{MwQ& zHamwj5b44AnSi20fip5@veLuS8sl4Eiloo&C#)05nDiP(Zsa8$zLf)|B`5lgwYT0X z{*^lL7uhCUfZ+XnM&-dG{92jpPg|GYslIVV1$&%^hz`|MIbsz^O*!}dtAPK31#S$*n^H>R%Tgv*rqa%|Os706 z&q1(;GRJu;$MUFI=>Xb=GrBm`+;1tg`r1<3{J4rwGr zN|tU|O1eW*x&)+4y1U_hmjCsB;tSX8dCr-+XXc(cbH){^3}a~OT(l(aFT#fc%?;5Y z6Ec-+cA6qH4Sb1vHW-z(0IYW*^-~;8)}swEuddPDnSAYv?3aFw$_s!f|%M|2?$?{VE3&&-E_%a^zeO3NA&&j0deV{Bdz^$@%bn*ep~S?X1NoBhu}g`H4E zag@H>l0P?|09snyCRosJIFJQyj!I?jE5~^`ofgX z*pt#MSLV%$kKad9PU01lO@(@~jKx>>G9_M|!J>SmnyCP9-ssZ(t}oF%<7?jLy$Nn6HrVT5z32AKwwv5L=*`gSxF{Tm-K?@fQSYnUhJfz^c| zz|v}vGe(S~e-*pyGXbibDvRfPk?c$x7lRZf(2pUGfUVN6{Db!40x#S#{0Ce_7xT7V zQITmtfsO|UkS%ZARR}9#b4a%wZtAl3ixv#B3);7I= zw011R$tSREc2RacFOK2#o=&muO*6ADQ(mi4U(1yFUf!KrI$lhMB#S6cFrDV|&4QS( zy;&HAG7J9<;tNmt2I$f1(Ybd+r5%s1eljq^k<(MoZ`~H}mm|>*Z?XtyS%0O?6h;r( zXvsp<7dF2|CZ|>DuG*sl^Qj8fyJ(@`zt{R~{@I9iM zrd;Y)e|Ft$3OK0A&dZ3IMVdN&KASPH7k7oWu*yO#GU1CZBobl4d`gtb^LvS^Y+kbF z({iL{0!cn=m>p6WGWQ#3Vwh!r9^Zmko6E2V0q#M=qiOl~nGRX}NQCCFlU%KjrI!i} z?Q46la|^bF6sBV>q7>2&rQNnMwk9-4A#orvc*EspqM0#mMIkV+Fg1SrHPca3;Sw<&l@~ z7Q+f?Ls!osRBfGiTgg>p%-)s_3X>3krZyZ^j$A<7){T&t*j^<}uNKHcAR2N-^z(K5 zJ*+_=rhz=*CSK0Pn)g%L21c>^xsLz{NDS`Wrz_}gtTx99*3UxfnR2i439}6_t-hcD z!8|+|ouQ4#X~oNznV)x9(D3WiK9Qjtnm^(Q_{jwS%25d1!YiDm5nYoqG$iT~2woPe zR2K~z4I+dw1X&2UrHg&K7$+#%*1yU6kgt0px6HJRXvg4pDDTA(A~SGl`}1OP;t?dS z!^jACH2KhXv0uLrr^I==K+oS26;9X!J8T-oE7tTA5l&2RQZHJruV^iJePTOtw*Sql zk#M~03!1c)ci+hml51q(Mp`CdZ*bln0^b#vhda-5@o&@AmMn&wwUKFvfDrm6yL3=~z5=0(Of4 zN2)m}7D*jlLK-OxB#rhh!@afmEn4%P2zmB%$pnm}d^{=^?o)!+T=}pWu7?@*_7@;B ze@%%l&}2|JO@WaVCf>J3LN4-M!<#&l?C7)c!?^mCuM?j*TOFkU)l$L?>=)#sLD-pf z;cVuGeD539$iD(H6D=i+U=DoS#I%OhckVS8~{oyYt91qz#2f3Dw z1t;XoCeBhH_(uMEu||m>g{{OqEG9A#5hd`VnrRuaHa0+}YKI8u$Vp7t zp%A|If9_uv-K`}y-Lx;jitg)Cc#x6cJwik@iN3@YA}V?bwM%h&=;iNxAgb@dpL{-o z(m^DN1WOGw&atrNB~y%?Aly(`gX*59~?UeGYOS_O~#FhadDu)@6j#0{K6o(;T0Q z3g!8%&a7eQ0&hR`9m{1EF$O*3XCfn)$f2>Zw@&pqxh7ocO^{5&$Ur2KWXOfara}cw znW^-JK$k2sNpLO$NEF%ntE#(g$>a7Wd-pF!vy@fW{<5ZN6m5cKYK65)ZJ@>ge`Uxb z!1L#Rlt5Q#L`+9u?GrZJ@;{m@g&;`jMVOd1ZPBi&h0C~Kk3)||0X5vg9#;nxJoG? z^=?VeLw7`WakDx`K$aiZqr&pTI=a-yaMiiDgmf;!O9Pn8w3wgmDNHj`wnt)O$~U?7 z;X*F}hI8`yH07+|6jvE^Uv>H|CfQnHemn>AueW6+C zhQjoz-OUe_q%npf_yi=DDTCa_SCmZZ4) zrh6{nS9=*;e~opB*G6Zr?NH(G>0NEVkJqeg$x~u!!xZ!bgMMo9{9+=${Sl22M=IKz zDZ4Wp(-@O*4Qf@Gsa^=e1^vw?J)qlZef%s9IsbwHWvUGgp2ffIv%ORn>31WE3s)=( zd)g57bEwB`_d8Y!%lEYNXX;CrCbD?h{nHWD9H{PDOI}q;FJucgGw3ucm7`;zyBlDs z^4{p$9{L(EXmzM+MC`rX^(xFB06e{hKpdi2aDvzXjw=hkZTx_kq8uqaBZe zWRl|vhgw+FX$PV%OIr#)Z@e#7XgaobVAX&ldB~U?{_Mz3M!tA!FotbHC^1o7hZWDs zW_u{^@YqdqVr{~|H#zjT&Zka$SwjkB+^uWJ7Bva_uE?vr{k`#?5>)m1=hv6SM2aPw z8z>YGX~zZuTAtfFaYT~yq+2U-GmD>O?&Q+n31n*-Gen0K;@7W_8Lasz`_*OnXcSP| z31RquWLMw!J=gXJ8z4LB;yX@+jD}*eN49bbj&AZTAsVG34fyA9Og3Kr+EB4Iw#lq&BWU-|oVNgGzyNGz>d9B>% z!We*?r9dcwOSUSl$7}J%{i>l0t0i^Sw0=n8tlb{|m?*9@A9ABBAqIOe_Yr-Nj*FT) zj(ITWElMQ#S)FAo;p&;X=fmSsf6lFe+ceaW|GYEeH6@J52s_LJh`rm;undBiUC~msSO%DP|A|MhrqDgjTh%l{K()GWgYo9#O&R zfbq>PRJ`n3)uEbI#cU`eNnG1^=VdyOf<3S=mioIAa0za)aR(ZF+}mU)K0iYe=~X;2OLYfp9`(Rcnpo zDvPh!Hm`60zFMZ+%ZdfKgRiNpqid~t+9SpJwG?AyR{I zDm`U)T|@1D5J*_Tc4B=>E}TEjcoW09>j_CxUJ??u5RtbJbDs&2@;3E?NP~$-esM1F zy{-+&us zN3yo>OI^kJa#XvqL1DSVWLV0EmeR0U#=dAZ)UNlS?1;P294!qICJAvqf+gNo0D0yW4;xNK};XS+R*FyWP>Mj z4gKuj28jEK0f7ieP%bpp>go@%{0Ip+FfOqkk zS}DiND2Q$u+RrcHANajS`h>)sW_~HF+#iO-5{~lGNLmxFZoQ6+M{W!Sm)&bn#z2K5 zsZpy z?-UD4AV^RJ2F4JRFs(&fyn3moew8Fl;6#KELQU)$KNVlsIFV{ind>Xw*aF6qvcM9D z$&s}z8$~%1)9bL?*%)#dvXeZJSFP63V@OY$dcC=pP*|qOoSK?Y+6MW>!{NAyVjvNy zJ%VpUv8ME@na0LXyxs#vqoH_Y^msbSnj7CEal1^7v5$hl{1X z39mZCKge4pYeE@It;vcn9a_+rYg07`q>u8}3!{sxkIoTwcK^Um|M+Ul4Y3&>^f8Ab z`3JpN{a@!xLO{r%mi4;_;ep_XO;9}ek6Oa&GJf0l&8gpznmGDu)%tSqm2_PIS{DnW~8Jx1k_!@`4$Wmy!SOklq}!n>W=V zQ#hqn;Fa+X(Qc6>Mb9+q)F~J- zrGj|8KgXw{dr-*yW<#4lwe_c_Enki;m4P;7$YwhyU?<%7aXLT)oRh( z*UHWic6^`L+nUoR?m;wLLexF9Qjq-!r90VNWsz zdL29z?nZuI3+c?PIcJvFxkv*4*^@P$;$f%`UJhM?oDXCCX_WK$=XD)hiU#s&)tX;T zlsSzIDSSXAVvzEIYHADq>hhaLuuTiH{}AA?s6~db!qv!-{oESDofT+m+phDz%=`JR zce|_~txk7w!1S=W9-B-kj1P^&?C+*J4+U-=?sMucT=V#@^2X9xTlc-XUm0>@*iV&A z@{F!+qOso@^9kkXSk3Byh!sK#%igH=v8w0Y9ZE?l3;9#w-knrIJqADVL2Q$N{>PS= z6Hx7OZnN_?ov}nGkSuT}O4I(6r_9m&qetZAHr&;nMt8)LcWy+`?27YQN&PZrhy8L6 z3JR^iqco;ewtmx_krzgSLU&#H^v|6HTHjk+>MWW-QpCxkYRc3*e&l3EBN@M<%x+v~ zXQ5S1cUrJ~Tnj`t{ebq6@94q(kq>g_IQI=8#D|1nX`jj>S%llT5`==SJ}C0}qJH~{ zmN-Ht_&Gf!CZqiAe_5(Os4=}yUFo>ag8%*$2ojIdG*^3jCGmJ5{P^3=X}*31^nGaH zW$l_B9ru37wQ3a^vL#5ogdVQN;E&<6e(zA?HH7K!3r6)~A9e^HBOKeUeFv1Zh*~Iz z%bR&H|Yh+S-_o2KFQ#0^Gf`Xcb3Vr72E`JLQJH!zPjFJ3fZ()N-_NX3s0{ zE!_UZG1g|7-%XU(-;ji0G<$I)17m6k7vy#ntCa4AImYicLAn5o1PenNgeLyP4ZgM` zrW5PzntzQpAXx3FGXws+$Z?d><~EjurQ*!&DUDJ;;b$~1>La{Y<#XA4>#hN64`*By zwAT?ydsH)sDNSr#KnozN%?jU{dfC~-FRFJ)t!eV%PiAfWfLZFWG6W+y3KQT?Nf#mo z1V)7JF#tk6ZJA0|z0+1Ilb^*n1zQyq^(x$)hRQ+!We6*P(ki^`>sqEGOK__~*ckH9 zDx;O)R*;7b11@XC=fD|4XaeWJzQ*8O=*8kuY&%!f^e#W4y}t1?7@1La$5w`AV~fN@ zASGzk+l_FNA$UPD+#Y}5?`XT7Ycsg3OwMZFj2q%yh{*J^-%mvS9o!h^wddXM(NGB7 z*=S7ix5NH%)0KfoGx`rg!j1_Rp*xbwTt$j5AXI<+KCgtfSkb9xKSzz8&`7mSBtNt* zSM%bLBr7G!$}^Wzjvs~l>Loo~7z~D&M_!1W2#*X!AyJ$xc&ikB%UKHB;xq$7>oEKX z3y83;@W^$VNkI}XUiNHm(ELKvIq(*Dn<{zEYIfgDfk1WIkq*Oow`$!4h}f2uWYT0) zyYMjPOysyq&gvuT2c`fm_UB;7UNuIc=1VGo0f%7N^nAe?<|)~DMCA^(BAjGi_sX*Y z*vVsl@C|>%RXy0|tAJ~&XWoa^!@NIgs=O1+TLRghB*^X-C4T!!1J{e^$iLBiS<2Ph zR{?ONM8sNKneT`G%xyNW4K$`bsDa);lT?qBD|@yH-1|7q(!I$7YA=HuGoSA17}i5` zEu^_CE~w|aATIChz*#p3SIbpyV5fyOlVxx`cKrc05e<>8Ga=C!L+tp^IdXmH3@Eb_ zS|(7Lf!VS{XWG`2Sf7K{;rFbs_Re0}IV~OINW6$2|M^StQ`k-vNBq^u-jUPNaFpFX z%5vf8<&(VQDKNs}N%@A!4y9Ktf9Gr@${64av;O|QvCi_a$LO%I=dl^6D@9)?rBj6+ z(MrfN@RRuevtogBC~rs`T}irbH7ubVC18vGypo2Zi_IbwL14*u4Y27GHg^50C|aw)Y(9hBV>#>bvy{{sVNms_bJ)ieEX5)EXK1@jZMlN~Q{u9n^J(>Si9> zfL>zYI6%%jFw|e3EvSre-WfZLEG&IwhNzbc`h)?dV6VKum0FroRPgJ3HOxBd8S{hG z1k)vk?{%1>N&?Idhb?Z%Y4}YK&%`6HG5_38hzk=3nq#`&EX7rx4Z7=6RZ1+OO3fN# zB%RQn1+CNe(w4<)F^GAB9?r&&+l(jG||xMS?J zvTy1~FtaW^5==3($`p5}&h={Nw|IeX1ltY*sv7^JVtKp(kK;3A20x@`%AK?!NWEA< z^zrL{=z!Q=&)>9;p;h7 zARtgX2_n+nte~66G-m63S{=J6;OGsuH5_w7*SP*fhOzM%msZ%VzYuf$O2s|R-77!*FZE}L8ZY48p=Ozd=jAAo@`RV_A0gQPJvte4_J}e~2x$2|+(h9wN zdLBv(7Z2lyJhyXo--(&W#7R`dwO|2jiH@NJ^`P zBjL5E-jcAqjR!3M36P~iLSa4|NHjf;5>x7tQvP~r>(Ixsd=o4sEd_CzltI)T`0@y- zihzPe>a7~b_zA6Rb+~l|oqJwy=w<1f5f^EK`frUUkRX$1x>7GP$mKajM5n&pnt|jg zm{f*}pXa%{Ck%d|@SKji6nFZ%8TkpZV7tdKAQ2f;pKulyz5Q`H|DRkc0t&)9B=S

csRmrdvfN4e#}29Zqs6m#M%yW;k6cDBqt2Ua zL1B?S#QwX&>%$A$%yp{+zCGt(qfEu^1}fp!?~QjhHrj_ob^9+L1qB+{u+*G z(x6k$w}K9?=qfb>9tnFw1%gN4$TVB^{+l~qANzH_>r=PPO$8L%5KTZtG_TmtXqG6A zmi{z<^vxtCw_=48@sNHl_Mb^lX`6IAJxFPpa&l?_vN1f4nhz691`08S?B}-$^PTIG zhlkk7X^$}K#j}>ZtFry|ljlJ8A_RK!DPh=OuySZdus= z{4vwABEfaB;9Xbtj>olRUmLUfl~^`>Y+vi7F7f2np3TD5k3+b5+oJsYzR3%~<&9Y) zw8=RajE#cU8RkmudV>qXQwS=2#+>yhwVg|JL&H%D!!`)puD~RZcswi>lfL6BET41a zG9u;cj3}_5V=3}82jv^muSHiicys^-bv}ARN+zXX->pQMDj4k~uCULu3>iqxTWbqu zDtmB#drRK<@082E7Vj;L^=uOsa!b#TNcB+WF=sXG=5e#`!C~tD2A}E85COjdwzJYx z_BF8_;NzY5t^oa1Aqp{^;fy($bZ(mYInm1tct2xm>FQs6ISXq!bG39^-K{BfKL?$M zqpM$NBJ)zxja7fc@!ss|_*=Uae#?wE^fPc-qfGEGaH>^uHMFC6Eeo7<0|0qR+GF4*nLbTI;nRV7TXTJ$m}} zJ|saumvZc#Z3Z#TOz%(Zz=yq2CnKkbK)1wG(IsG()lrxp?b;2%d8!5B(E#AwDC*y? zX80y9Vf>p#?lCS1U5x?m`JUk;JMO%TAq%fVCiyLtVbiOd2q^bYM9N~UF$p1$80Dq+ zny!uTj}8Slm`N}w?h(KJa!<6^QUWXLkXkw+xJ6jj{*ZY~_2{-*?$ zP$+M{730~d=MtxLN^D{_rIj{%W-=DBA|AsZytVsXUv0o0eWROmZPBCki7o>Aq23^f zov~^{bSRnX9JHWuoLKu8Bn+O;8MIg?DZVSTM$PI%huPwjpKMr08Os84mX>wC!ena| zU93CU%)5JdWRKjhp8$^5gdHep(jxdvKD+1ONSrKlpiSc#+42)3WKz-EBG9A&CGYN5 z|I_@N(Or~s?c)XSHj{#wO;QbjoBkLyTVobDqHomi`dIQ~=KQA~EsX`B=rRQVlULXt zeE5+^(mIjSN=O6r4A??zNoxz9BaUmw)=&E%@sCz;a$duUr_ki{6u;g02lLFfU zTCv#oAK_IVbONfMM;U9@&IX5Im^BrBDUT$uyHfG>-Ym`2zpMg(>OzuyCI#VBe9As2 zlqm4mbIlK{)rzV=eA(?L(hy+auVS-i;bnBDb4?vY3jK#_ShOmXyl;`atqxSDKL$ZC z&LnGDuH;%D8|dOgY8Mg)_GVU_bTA^&0RD9w(T>0fOO*8cs4p=wBs+oiWnv->p2osg z1(dHF9JBm7KjLHFjT^rZYf=^P%@(rJlJ9Jh@uDZ4Ouv))z1` z0dDzBkUwpws26!afe7maS8jSWqatenLTak)dyYjq__9g4$d2L$CQ#M3f0v>>8TlJ| z5hcuUeFfmPxju%b-^Ms*xkYV89;)Oa1Mv++!yk_dSYs`6IbZ(m`r#Z5c*5nO_Xnnm zMqDFVLkIUN7wD?4@4scuo&gm_Z%wR*zgjDf(z54D5)0p{7jBI5<{!wt1=_@q(umc7 z#)6lKDHD(h-6FyGBv=j5{4)gl85B;aQBx1?S(CeNS~$_Z{D}WNDWo@2`zr^(7r#F$ zQ?4g-e0>>r?Y){A%EgL zl;E5BfPk>V2t;`bGLepbKgt=i_Z>E4B2A@iDwA3Wr<9|jQSz`YRHd2~Nk-{tcS(~x zypQcFkLwsM=-yVFGlbZ7-!M}(u z94&qh<_3U_ss{mN8sI$1b<*4EU1{f5D}fSJ`ebYs7h=_6w+Ixyg_GZEaiaM@W<-hQ zM|581HBk5ulNMo{FPvOG?k>6XHUT}{Zb1PlY9`A(WQ^H2h~5bz4Z6H9&dvKK9OHuG zLE1eMVerIivwa+?w#p@!o?4}YeL8r@RkZLL-l_P@yk9Dx)fF8jua{JV(=X2sy|!M0 z(aj4-KLOBl9*nR)xLmMg8%HFh&P3op4pgCJOki=6e1D6gH&NQ|O0;NCpSgTfH8EQo zZ_3=JLpJ7p2isr|(7N#$nW9!6!MFG2PTF_PRR0ZltsN4C?Wx!FU#E*rD3tM)oH=4r z0HDK;mE{27{LF45@c8yc=HB1L*pCz0F<3;Z>9<)3!Tl}2KqEqMN( z$K*h1@>N+eFuz;FF1PL)rYhzs42lNQZ$sVA!MG(d`H>ViZj9?!pV&|>9+zo9=s*d5MAiOd}^02pm;h=Fo+qq80#fN``qX|E9=>Gy5kfRXn1N^od|7 zKxYlJ{F>;Lxkhs>Qt^D-6=agA(V-0Lzr-(X_Zy!JoCMxdVgVcvEdo&scM?uT(K_Yw z@i<&k2^oP560XKP_St$lp|*bbZ*3oK9Nof=Y^C`OYY^e*XRXA{)UH0lWXid3Y z92Gl_Z>4Fwjyxpm0pfKcnitVzK<*Ax?LVaB8HA*mu*8r^N&_EvPDEI|F#cP*?fYCl zzG`TeTBv={B;1W;*Dk$Rj#He&!NfTv(!G8OQGip>1C zf(9}Fp|l$V4wvdXcyM#kAf$MVM*w&+9O*OQfZ>>y5sZ;RQ$T3m0SD;E_HHPat4 z1oX+T)?Xl`)kS>OAt6#xbmUm`GtusQ$#$l8-OL1+#4e!Hyius*t7OG(ng53$a7Y>| zZr!U(i*Ec1B82L(#D9VAdy&3(G)pHG{6G^4MEv-y(Fm3a_V1tPe?tmE`N<*%n3QI~ zx_XrcMWn71WLo5E^%Skw(1TT755s6|M3n?g<$uuA1Z5v?)OoBQ&s9pDR3&FE-KE;J z1JQ{(qA(j~ofvmG^Fv`ohuPT+OYmJn;SOedGuLH%`T=#;Y z6t;0BQc;1&FM6~M!UC1+Uz4-&#LpJU(J6sLPy|2eYkqf0R|{;?fjB62V3StmT;&7n zEfiX+)pwxEjtLkx;do_=t_#O!9+F&v4`&8uYI*97!s4|LyEDI{md-gEXnQ1qFfWP* zFekWGu%ut+b}UWEfQ+^nQJC10a zqh2z24q3oU-j(^WbZ|NS&?|vv0fJ3;{zjlpW=G+k%76OkXVB{0L@os zSIqw7qs6)=dz=tN)~HVH7LWHD=_g1TcnP9=e_e9P&e!9gOm_v%65GG2L$r_|xPZ#3 zJUIdb2W%gL0vOp4Sk-8FlTPKlBEmF*J;7J)b4V!bsb}7oQO>B3#EM^)0fqo;ic?wA zPR(#-YVc?y!#wo0Z)yCY&o7tL2q6C&8*7io1k3@(O>D3lXy>u^v(7BwbCzz#P z@9<~hW3qn-;|eD9Mkm&w%o?Z*?)Uo38M7aX-HFm-SVVueppaGMVWpPYF{$wL<#SMA z>*ig_x0&l*)z(|RnUx`G8rDuCASU7C{7C{Fj7v8gu1JQp-XbVW4e`>0#284X23-N#*|bpWrNZH2gA!w45vGHKT$6a|t!DTTno8||?~kE5=kXxReD;^MP5UG}H$L0@E8cnMCKR)sVjx21E-n1rBFF5+; z@vQ>P79$+}NkbMK^G+eN2QZvv!Th)C20_o19&}ksY17Q%8K~EoO)R3y0)ie|_QZFabMFD_!Z2rQZ zc)A70B-It>X%rlJ{}*~=8}dk7kv~13FF)+HH~Mjsd-|zT%w6c!iRc;w*@Sx36MQ#r zX!5MBjCV7|W;e$J`%W2P4kOYrdU*!zWe({s8YPIwuB2Ykp9W(*hk8T=pI6Ts-AjvBH;xMt-P1(9s5PXBiz$cyvb0R`rRdCRq550{i z@nsQiKE?e3?Ddq$ys%|GD1R8^|a$}-=a;r4?jL9(kkrtxC0@dyVH)KV% zrS2hxq_cOeogyiwbzViozdY*_;E|$ki=*~giFT~`q(I1mY00lK{%OMbe1x z&8;n{6gQ~Q6ElxY*L7m#YeUOgoL5tmIhuGiacS|juQ(2PfG*N>;A>-r-W zBSZ1vPg%r7Dp5~??bdo(!^wjFONY~2rlOfcG;&8)_%lg?HCFRb;IjYsLj<{1uNqKo zW=GE5%G9G}a&?T7{P zw6Y1Y$<1fbEnL*C-77!V>u-5Yxt;1nBMjX0$tRE5JO@Ji&4!s)%gWRH!N5XmOr+Rq z4p*(u4g>pfo^{3T6NB1f-%}T!`I}L$nA8+;x86JDv?%*l0hz|sC3cP+8WI8@J)ffF zG3vA7;6FEOil?EOsM9fwa9UNZO3|smcY$|XhHg9`9Ym5f3@V`DtT1*Q7 zvBGVn+LqV&WdsxAGl^RMen5a2qSTxvPynGxiZ+MJeB{htWAp{pbWD&@bO$l;OD+108>( zWu7_t1FWgWmfLK_MV-{ow7X}d{xwRUmpdK|+#bBd+CC`+8@?<0)(6O=0Z~%`dlk2x zspTk@9@qCy)j$gynD_=KOSG7!(e#@0BgJr2Hmw5XmB^~uGr!?LG6t`C*-3z0HTrY3 zE)VPP>Evfb1Le>byOMwjK$bjzV}Zd4F#Sh0th|G%gex42x9aaCU&(OH3MgKzH;k2v zt^rLdy!F!*XQ_js7j~__E6WE>hnnmT3D`OTh%b|UN{#v6r>C6G%SnG_7EZ!uumE_p z9m_*&9^$USvW$(>xfX!7c(tZ1ouerCer-=^M2QW&;m!4E-=b&Vn91F0M#2S5s?DzZ z%l)JYahn>IqODrbA>U~ZNeR(`+z=U=v-P=w{@}p_!E={(75(Tq6}cVOnTiWum5h{> zVC3HzhR2IQ@r1^RV$PS23mEl~-HEvT!k8WSaXWm!;&&v`)(zS+4*_IyY-^G!!Lr)U zhM~%4^P{1?G_3C4@8UpMBD$*e@#->Jtm+=>R!;6xJW$CzQ2P%$)APCq9rte*DzO9o zQRfdqV{2rFJ_1QhhHuhXqNJfa6e-bcJ0{Iuw#t{N9zbh-3 zz9#!a%`y@0@b2=ll5I#gY3PB$T+l&Gld3$IsT4Uic-3{XM16V8UzT z`7h{KKC9n#cbll^Sp~W{B?y#H8!QL{DZOnGsuahx4oKN*v`kkR53ne3?sWODTrmOX z){itKZaVpez-iFF`CP*Iba)snr-Ag&(<*aCi%aC_N^4P-!t-NL1~v#Z5i0o~2vqR9 znxrY!T}-|IZ^s+3C3d=rHrBLro6hq+E`_OjO)9EsXRnI=_16($!o8Kxug_N>LX2*E zJB8b>zjVazhhBLA@7mx0-~)jiB6bqS;D=hca|5NLd^sr)|7V%i3Y@n8BGy^kUbc!` zIdv*Mjyvjk@ec|5L^BhH{f4%^en{wSI%tO4ZmkG+a35Wp0K#~mcOij}aQ|>HHN`(` zrM6#sk-f@{3`wVS&o_F%vqXSSK$BMTxl}~`m9ty&zWMZ5;_~FS`tj=C-GmAweyVP2 z^b7Ax61+D3jZF}!eK4C41Jur&HYr@OUpsBhVQvRb&&G!Mf&SYv(9}tsl0z}VrmA_b z6-dc`>@+ZZW|QBL^}g-IxJBs3l5ooKb@d^}Atkl@l>@~eDPJrQXs|;t2onTyx3FFx zlg@$W+tvBhU$7!24l z1!S~*{&$2DgXWun3~YZ@7~&ArG7Vj5C(j}sO)sR0!ecQBKg+Fu$xJLwh?%G4W6$e@ zQM;NJtbf$Bgf(PiwHi>s9b{UB9B;>14;wt3k5T{szquq$>aFT5@I4#l&oXdt4m@n$>-cA**n_`|Aa1zIc~KQ zOv}rlZ&`yt*@5qofpz5?QnH~&HA3GcYrvqN!`TUaint4P&ijvIsC_-$D|#OaXbpPv2Q+NcoUw(rq_4kp7ZU z-j4#7MvH9B;CC|p)tMvtcej4wA)hs!Wr|>W6a(B*AhW)DMt|5)+ zo_j;%wuNT6Y$C*8b^@1o10A?|ZGP9z6vwX#6El93?8LXQlYSr-{%i&1t;@f|Pz>6e z-i}xr@rzt`6aj(fJYf}@e_!q1*R`;L3wq6tekNRPKDY56`!EEJs zjxc<(;@bfdDD(PB|6!BGW=-?jGRCuglZ@Y=5z&w9#;evEaqS=3tc5;XtdqHD>ML`? zJj3Xv zgaANvRy^Iq9_9IY^v31YOTYo-zJXc%@NKD{CoWI zZ0s!ue<7(8Vcb@KEQEFslg18C-2E_RD!lJJ3h;WY5IpH3-b#=R7N*e+_bxATX;RhS{1l9@)B7-lnVfjd%6y<{1Usnsur3@OpaO>d8-DwcWyCLmHc*J)t3)8EWpm6gjk4=6_Ov8PGdiWQ%&v$ zx65CKhHR&?F^2cyqY(e5!xQzmERfH+g_di%k%du${I3I__ITrDF;#|%tF(T|e zql9yBf8&blWWS7?)__!7YN1Ka>(+sblWD8QN#E?(a({fJZhfy{t6S-F{lP85??PlL z08y{&=>Zh8(5%r-u=e<;!kXBoFtPt497L4QT�|-zMf;ZbV(4QJ8GRrP(sr5pPnz z`>xApCqtvq@YT;lFFG#r3t(7M)HNy&sQvCq?{!8ClR`|B->o&SK*?eS0a3Kv)JjG* zeAQK&VnQqMdcc}Lvo0PZE7tkmR-XRJAoKnIUVw7kgZp39y)v%8IXu3`1Xjo(?vY+F zLO1pbHKO+%O=(8Por~ome8foEa^?!cZ&s(V39f58w!(G!ck)a{TCQ*x91_FB;5M^( zG8f*5aS6vLmjGU*{Rc!Ch`7MweJ7Vzab7@1WB70}|AHK#7X~osrIvgsU5Q2rU2>s8 zDc_ZU%GT=`+N*8ho|wB6y_(f5BqFT$gdkAZ6X0m_-}QgUHn&R2pWB6WF%r>NicIvy z@6>d3`j$GJ1>HL5@TgLXI~bLURJW*bRbu0VKzI1qp8*Z1Jh}RX(X4(b&p-50<3B?{ic?y=l_v&)mbXpcA)GaR zg27nEVbijYQpvqV-dSWAg`CybLG{Z{Uza_GHX%>dq*SsHx?7J*mzg0stJg)yv3VYS z2O2devA1`#SH6IyfLKMt5Sp1DQ1+6uVnwI?{l@e{0pIE6`ZOBfc}g3VMWN3J5|{Hn zz}Nq=;jEo5(*|QeV5&F$wZY_&mc0R>rerown6XcRuO-eU;jTc0Dlu6;LtS$#Fw+Bu4nJY_>Es1K% z{9_#hf!A@UAdn!XJ`5Yw&Ky4;pRX64wAD~jv%AL?oF#IpzStvw&RMDz;-^v$#!nx_ z=?S5|_F%8ukX9isAot&*Wz66Wt z)|%VB^6=kZJ{`Fhd}^vow{L3r)7P5$vPQ9k1ul`R5r3`oDwQ{WoYV?Xr*MQ3ETiHw znKXqToK`L9wPs6R53qTtgz}`Se|ZjU zBjhP^C6%-Ce6ct^)^v1ZQz@#~#==;jynYM(wC|weMQT>O*!E5`H|`DwU5q_REQ2R{ z*ioj?pT$p96<|55$Z{l5JJx4}j{1JJ`(!&Y1RXw4yG;r(Y|?<_KdADuBqkkr)Qi;m zc>$)*YFBoEW7?N)GUQN}nz)a|4X)OPANBzlc>6`V27#i>?uaGPGs;=PlD2FQ4}7Lx zvre48Bhukd%ODT2)+-fsh%w^WBgSHF#n7if?H&`2VXfjQCyV{B1r2f`TUnzk;+ zG~RWqd^(-{rJp_|QU_LTU#t=0x1rVhtOdf4OPwHhyi7n`T5z8p*0uGP9%g!+9&j9+ zc8UJBMVboOraDC2Fet24Iq<5qtJeDvjZR$-`eT3pGl^)y^~ztZ-MrY2+$eqng*tu< z-Q!Z=bx;*KyHYLbR+aK-8UXGM7x&W>nR(P9nk0&P)8t&=5S`Zj ziI`Q6sm61#o58_y0ICnIVQLYt@AMx`!;4BdKHK+ou z9C0+Yo6eh?YLVeh9e{K}L-z629ekMz@4SHIVGFX~(N18qp4Dj5Q<C?JYeRkFCK8XmVA9&T}C*P0jU#jrVLqd69U;W-^TnQ>({gNrR zSeWG6%lmbNJoZ0P0?7FG+vO!44!nIPAOn7qQs7?Hx@j@12QBD|}xN%2j?L+bU zLR~)aQgK$b2yP+R?>IiU(&bw)9{y2gYS30m{ zt$jQ+&YYB;?yyR0vs4HXQ)9y;Wwj%t@bX($9mvZG)is<00U{caki{z~|d?f)%xYww9%g#KUXXU{DQC0VkGC zS9(}48|zf9vGY-uG9enEdeng~Ac+Y_k`~t&Ek`$y-R}a)nkSA#z~kZN?jgWqbnM-o zP0y@J(OtyjrN?Y^QngI#LyXV~-`~S5ylYfNod1ufuV9Gs`NCa932Bf{7ZGVBq|*iI z66r3brMndAkZzEa5RmSW?(XjHZtm>w-v9joJ3DjYIWgybCICQKd=nfm5+mdGG$VS7 zA3Rv5PhIE#s`iiDC9WG2hG7Wa9e2R`prdBQ&Y)u>DG%wqm4!l?O&NbauzjHGdx^M<5WrhjieN z0p+la_vYQjjV-e0P5tHUg9M86r>sb_bcK_w4fE?QQCPBL0M+rexq?fK4n=X)+gMNn zNV3uSO7twYBIG0B8o~nhB5dSxe4zH)>n8Y*^X)8W>--z&!d*%&L+e*I51)v8BTIKD z!;DH11xozvc1td|ynr{JlEQ6{pND7A86GTE+Cky_2zMe_WYQKt`)K0>xnGK>WUf&{ zU#7I8JhzJ~eF5pk$}#oqqhW@hzC!VCt-#)ESB>aM&bvhKuKoU6eb?Ylze=x2R-kQN zxVE9~4t(Vkm1WFMqYa0kUaQIQ1@HZ_ydPDb_#0UTT1w)t&4@ViGod49ZZ(!r18j0^ zy2Lky?UhQ4^!OL#AmBnaMyWvX4ESnbIW4p4H#yZ^i52f?o6@%mwgjUS*ma(6O8KMR z%5s~idzWY)Px+dYD(JS9plZd^>pjS;`dI$JPxNhneX`Qg(z8jQR`-?Ejjvl$%F$dQ z2lZ!-?PXj2LofNUzgmK9%u<6hI#vY%Z2_18FJ$TF?N+Yk2|hUD2O>heUD)DjpFo-| zY?I#=Hte3*&C6{5MrSAyfsJIbz0KyBwTY~e;eWKIrmkxVoKeB+h{~@RFWa3Uh(ye& zWMTLa-PrhbEskFV6n`$?K%~=~P?M7`t#Ina@WZ?HHi<|3;g*oXC<~JwCk>rw0f;R( z#!5!Xkd$u7Yc(rHGa_d%Tz+AkZQuLVE{!?-fEnligTx=!6nn46%eyA7z9|N=ALOr2 z%GIT+EJn@mm0axQT{8SUU9Kc)BC8taT-uOe!Ad9qtr&xQvhk^YA>A?*9A#a%32 z^LeBTNH~%|yYdDWKQUX3sSx-(d3pj}PaVltWgoQ764EQ)!;oR-{6)L5R=g+VBNd&4r3!sv&=(3s3(Ol24A zA5B_wLL^TC6f;UfRH64~!fR?^jxlmqkVP8qlE*IK13pO3y;0_3pRzTONbwb9^kKs}R+2JA5l_1ja7dDqbY_lEwA&+S8zn);()W)C4=%(jFdOS?#F(Z+{H2w8E z_&fWP+J{qw;9+=U>Mjr%!*6%oxBLZO%VG`3l@6Em{b~$<%EO)*&q!tyFZkH%FJ1(a zs08^RaQ?Z1_FD3!yhXsK(`mKFU|5xIBtr@x1J3Ww!{HBJo@~xz{iBwXc^HW%%6H9) zGcE^>h%_GyQewiG!$ErRjdJeopNWl9&*Tu0w-ELXE6FKNxJXYQ**dtD0Bh91I|obK z^4NuKEn17uxQHEBSbY82V3DS@Oxp73v1o1-@wwU+&ReQEDH{4b37jp)?ZVMnh)pZX z!T#20-&|ARRzV6^tz4o$jeTnni{<|G&0%axtqQC2{Xhoc^43VlHG#gn9YhJV>x$ zGlo}zK3wQ}HE`xrkpHEcTrUY|eBt6mt$7D1&`*9*Uac5eQvEI-`UzfHBfx`L6zH{O z#1{3da_d~&OLdzK}Zv8D9o#}14xTDna?QbR3`Mzm!3ri*8-vQ_wQ?gLY26__yiP3xvd zW%w$TU}TYqP-;Sse3J&-Gg6GE6+D1v{h@FfSIh7jP90qDj#@G-es-HMh8jdk**Ru! z)A)&+-Z>fX9SsMPkT|Z`j7p{znu@cOAVj2t4NXKJ_bTZ>T7l#Is)1+RWUcmW-O6Bu zU#aHI4Wm~`wVG1P4h_7Ys_MTx9+8IR$;q!+W@f54bv_eE;%Jb4M1@HuWh)*t{(0kL5kV)W~x+n5ngK=nA31-G2GQjHi=-7>o2+Pf4h z{$n!G&hMGqH5m<#BMBX$9zXuqiie4b{@H)qh3_@ZRI8ho{(x;f@C%}UF9mhq zm#cMD)v-6m7FBbx1swfbuijDd`dgxYj}Z5`Yr1niegn*J$Q_)j**&GdhRJ-SrlR;1 zSGyQn@pnYmot{RZXTRKD{v_Jv)AJG3v|kzpWOTE6I9jA4S# z`@(wwS{#(XCs+r$q~bp?(pr9=nXr&FimjKeic$^2jZ)sA z#P`#dT2pL#=ct>hgDE#Qz(R>puiW%G{s0N*SOPNusrAgjM|VM&j>6R&^iaMq1Lx99 z{hMsho1*KM9kur`DqJ4yi!XPYU^#W{O2u zHYu{)cNK?D!qIC3UTSS1j7lD zXPLhB9F?%Z15x2CSxydLsxIm5aBdtB>R&|`!Ho_+ojeX|F$-ZB(uvhx_QEUueLsIJo?_ki`#HmWA;lAosR zl?wQo=)8tvb|XamvTx1f+*#MXt|&nS?j(E$?}o&`w;%1Ta7?);l&in*r$--riIe=4 z7H6ZVgvN0O1iFjG(~4h;EZD@@E05!4hlR+ z+<%gfj_%|0v3s{R4ZJ4>W)6G4`y>^jM7@kcS-Two%p%CJQ7f(5F+tlE`EF(k?grSF zAV>A)`PHyGGGH+{F-ykMBnzwdCrm9;)-FpO#r7CB!U*4cTvq+|OXoEk6|W3~ON@T| z01c>#GQt65a=gtZ*6>_5HZN2@D5#9%(^T6I`KWwqK4|5;$%Mb1dX)OD;zU?Ia1ILn zqy%7>(ptN+P1cbXbVI$rz`n%5 zub1c~T2GK+0j95YazL{RE`f4@vfepa%CjxKkJGlA4;nBzYAl&&twp+p+o6(ZHsd|R z852Lj)PTI1u#tVRL>Lih1ixd}f8xE7$s4P6t=%i9OAlpg^Pw}E)oD~=z!CuM@7G$# zD)f8vq}J-T1xi6BPq8EHzOR=TeHjaqA2ZJk#|2ShxDE7Izq}^p+DVa4z5i*NT^SJY zfhyk2kJv3;spDJs%(%HBCr-_`Ht($=+26YCGVsC%d;OFX2N?6#+!Yz0dbg0lmjSl0 z#dz^p(nDSi5SONoq*w7c8N>KXSf*?5EMZ76ztV>K^~vY6yW=JA|Ff09pc+XO-fSsQ zAzGhu6c{254uImw>@GF`wSJK;>snkb-k^*XQ~$Hg8)d(#)*(WvgDnztH!?QOpg_#K zYG;2oiLF+cQH^~Rd|UgDPVNbY5N|{N;;f*6-0Hk5Yk&_Mc>ja>rx7cu5%uM_=f!isN|TN&fFQ8Gumk zI1NN3N!W-e4kr8Hp4!)K++I@~TJvhB&d)+HE#WNZ!j#VyHOI{UAY}_60N9DmBW12@ ze?2Hm>ya0rw&pX9`{uelAo-nXtHMskTI(w$*p^HNsTY$}oi|8Q|L;#?B%?-@9j9ZB z26qVO%z2nde|-8H;2pL;xWh3Y&7+vaYH@X2<+Use9ie(lMc)#wME^R2=nxTt{W3Wo zf$B-L3DNaKu-s`#BW1p@t~@lLPq~r6Q|e>KisY>17T32KRSEQfun91y!E+o|0k=7O zP@a0L=WvX6pQkzn?yom7LS^EyITJ3#1~v4jP~9@b^_SwX1aB%x#j@m#gkb|L(9hr! zCo{Suo)Q$1OIhfUG8gQLeWS|Hy$s1}w;y0eJb_d}EmU{E((ANohs#NjcFe(1{dyeJ z>>~Ep`2v881oR;Uf(sCTEAGPSgR;lIP^U&|*p0-~VSGnNN8QpcR)dYU{+8pvYBlOG zGb1F}j~m2!`#8F6yG5Wys5fxXz9(>$;2m^N!#i}32{4j)s<@eSf}j|gk}Hcn*76;> zk>@pZ@gFGc(2=VN#tOE14nz!uxq(VzHu0?RL9g;cmAoNt2 z+&_a^l6Z2GO`SKq#P<`TeM?0mib@!7{_Z^AK3EK11Te$nDWW~Q$?w^T!W}i|ZR7@{ z!Bd#8+xaiz->1Jdt#+R%nynnapCbAag4GyS@4fYf=S%(6pL{uB^?HtQa4^kDI##1^ zIv``ISn^2EEs%KH&p7F}pvlpH_S$UqHfCvP*$qjQ1}lOHn|N|cPC7h?IOGul5-*k< zfS^Z0tiqqvQ`|tGdf*iopd|y1M=WwQAaBSR^DW>7HKPI@R-;cf_*c`#uYlNB37orm zZx|>l$6vpF_G1kvOW1G11t`>9@~NHgiIT_7)X+uUD|ERlVL#Sfr3 z&1bA1-HIK<#)5SjjA1$;Nr!tjy6PIRMh?}802xdCvIqxY zz+pF3vtf$SZQvhu*_buh{=tHWBSs&Or7^?hv1K#^Zt;9ZrW(C~&Cs`OwtWmfNi?x- zd;-z)gI|!Oc8iP!<-t7nON~s9XO)Q-r~qiCy?NWx>{YEnG4ocj?3W?~2T4rHRWaH) z1loMe+tJ}q*+7f3!};Gy;3z8>Vbu|0ZmHpycFu|RqrtyVcT>rwS={SQKxQFL4W??Voy=3QQkH?JaxI=PgzJX(ce~6X+iupX2(X47KbtoF^sxM2tC4ff|F|0tAUTynD$pJBVFjg$R#!;7sHe0_j zZqtlya4R+OY*3>Q-FJh%y2DRL7>-A4W0%&+jNVA@2{t4Tt{*_tsqQh~^-|oOLDP{* z2K#L5jsyFAg&A9$(r0f`%@e!hRS|F&iE>m|7oc=wH?m!^VT7?86G6WEf%1_f0$~{2 z@85apml8Mn;)*hI>C)W+uRvcj#ag$0n)#%nbu&^3uSthrgO1|P8r9f$%v(ajc%`>UA@QL+=!?#+Xv5}8++|>w z2@XzGgOj$|c+MssEvQZVSm3&qF*T{rHa`_|?p}yP730=}=9HEbrpzr-*DpqQbOxXY z_^FM%l?9Z(HYXf(aP|QF`)B#7Len$`Jxe6uasuPWMikULy#MP<5Ui?ESW8R$F z_F@j(=UGNnHJEnXU*Wo+4|-ryoinRBP9vLdD#$k6=-DU{3`jKJRb+D6SSV zVc-h#7!HW5<9I5?Hl+imx{i4Bs&^Bw5SwAQp-Qz@wevry`2^V%>!^;z?CW-D+}K;b z%X>?(7LJ{ir27l8;QDpmx-rf!*bGoM-5%P{YLg%8xN1<3E^d}PTHMTk_YS-!{`S9F zfUNH_I}+_Ii=SK!+$8cL5VBzDj|fZL=f24$Yr9tUyS8Cb&7-7%8fzR-Ctrv~asDB| zIwT>T@#KQ*IsXVZ;p>llDc=ljDrjnqL|U9~<8n=>=~|m+EtfHGoaIXp)#O<9UO$0c zX@l^6Xlpjm{tAKLLuy-+FSf~{)Dfd;Z?b?ItAI6&2pRT0b+>LQXZ8M{xuto-)&$o5 zpmAEa$D!ROrRBgxq;7CbM z?EE!6U`zrdz+2&A^8(!wYN>Pouhs%S((-9-$TxuKUg%z8}_sBjli;x1}ms~Iz5-V_5VUQAK%keh{No05pq1= zS%dDp*5R{BxrwjtSAiaxh**z7UEW{z>T<{*?M8zO%pcGF$)S-+xh00ABZ-2bnZ)9f zjW4Y&odrfc;JucOl*)(TN^YHp)r>K-bQENnMGGC%{o3VvlTt_hC9ZGc`#uV5ga9=4 z7so$({MMe0>#10L>!L~HR}@%8>7)^{S97<1KR>H8BMJ=vjs)aSr&5iF8RnM7`-T}~ z&%=D0%SV}LM`I7`ep#3L73q5lS;a3>u~Fgv5msNsi5Q47-Q$T}llx0AS2ZuhYH^tj z_g(9OP#8q7vFlvhIgixZEA34M>`e;-`};gtYjGvv2gip_SbtEvkGQ~>Kc$A2n$ZEW zUqLE@{yyb0mMgnI{NzZ&2W#8VfD>eASs1wQ2>()iyi>)|S z8R{Q^6R$4yvGn57%31sn$Ieocc7FB2Qnhyp!ynIZGl~PEr!%M;2(DszbGJdWt7e-< zr}WYiS{ef1wgk~ZQ7qD#uq4|BDPT#U z+K5cJz+kx;L3@Bzli|l>_(*)!t>C}<_P*}$=7Qa`%yr$+NB5qjH=dvR`B)xaOK2I-If{Ux`^ZFtoN07+x-UZ1|KgxWAmi3{gm=x#k9&jk1B z@Qj9a_0G)n2-$zka5HT0TEr!Pv3@z+iyeht2ruGiuFrPa%JpPGj{JSgDDbE`z6VPR zLcz;Ni~c|Sb>U|~q!<80WV=}n^X#qq#{>ITKsqOdi(oS6ob7yd0Ut0IK4@xsOHRr9 zt~yhLWV%ih7KY2kDv)e1r9sG&5b4iB#y&h0Jr(;h$0qp_vz639JD4jeFhqm}bdT_W z$0mm9cd+Hn<(sc0h*_i4@TUM7a4Ay=UFG=*EY>*I3gzmMUx$k~oS{LHb2$%N+bxlgnC)P-p_gez`Q@Q

6WJuy4m`ipY7weji|S?<;N_Y>QF-z zCUB33ncepGe6=_TtyMLjRZArN|1Fv!q|R#CZREIade zR=+v$OR>0r#AFXULc|p@o@9S_BEU+M{leuqo+;IC&TW{7si0EpOSYZm*>fOSFgQas zg3cZ*VWMgkY#dv!7(F8j7A)nwuRNJrND8O8YajeB?l5sMszOI`ldQ|YFB}dTOdg45 zoO29}FXA&y%QKfE(SY4YM&_+R}`nNV;^J{9| zR%7`c*^)Bz49ab4Zj(crwLZMW6n?dxvzb*|Fc9>aU@;3e@ES)?{8~fr-fg)nRmj8k z@$zzPH=Ff2X*S0`U&a_kEwb}#zR1W2v_V1!f~0G^x+2NGk~aQ<{OW^XYv=5!!-L=7 z!M!m#aEpjc($s{3B0>0EpNZlQ8*mHd1`d+PzNw{$Dvlj2@%*hAO`F7RV@-!z6mExv zMxDT_iolW2pH+k0V!0=dRoe3o!t7G*DpLk%w`ss`!KjLG)|Q`n)=9xc8(Z)q8GQ|a z#)MB~)8`2hKK=_X_FF}??q2*MJ&xxo75g*Us{FSY)5gJseQh5pd}{I!=a>xISIeAI zCpiOME^N|%&A2?EJq%fy?qATdsrUfu=;fVrF}Ub{Yk_ z`<8&GKw;dCcCS zY^_4+LeIWJ+SzFR2cx|}WD0X-DM0}wNPN_(4itj@tXAKg$#R#VV>EbyMk*5pxi5>A zUjZB_V)&o`0%$PuWtN+IeAaaws3mN;$gw|+9EU=W2jog{Z4}wvqg?cuw`Oa_lE?OR zuk%cId@qB{7j63s3XRN>8=gVp%j|Ta5Y}Bp+ffxJG%m*ugZo25#mS#2tWho6?ZeuS z9K2UYLbnwy4-)to1C93UwS2ZW;%K^P|M+VgQn}|kz?I$3_aoFD(@-OV3y+*dqj|be>h>+ z)}2LNCg2AQjI(_xJSA*xuzm#t^!3HTq~Zn?c!kL)W-rePSx2u|e~<>LG4wXTQ-2I$ zd#;aLL#=6GOubccrt+bTIW*1lez!>elBq@nR@y1Eq%FS()8a_^ao^m{N~USs8yCFZ z4z17*jAx2N5yTBB#CBI}7pu{*MVOaJ~c;P2RPPuUH z)rmw=+h0uJmr-ttR;guH&LP?b+X_h%~pQ#GUbX(v7 zO5)nBX!$z!j%B=TEz#&QLlb5+4BH>^@G$7$zrl;Q_-joRsokjc$C*EJHZEk=fVi^= zz}->CI*Z4LVq$rAT>#I8cI-Hi_+wu0kr0c+8eMxlZGLk6_-+q*EGq^=iY5p9qkoiF z6{0WAW+$C4vIjN)p7#jFDa6X&XrsSp3%+ST)8ZwF7v9`9f3*-xm@Q^EWld81ihtMn zGN^2%pzEr~C1vbmYTJ^B-LT zR^^9(EaOs+SGeX9cQpR)8R?F?bnuo$q1F;`52LEmbr#ACF&jQL8EVz*76ZogBi-1^=7<8A6NO z_k6K#?Rj$L2>h1mpW7QvuGS|p$r`=l4&$IsJZ87G4m%;L^Uh*_8dm3_kpk=8b-O{^ zQ(CFd`Nbv^K1h>#&)T#6U2{OVdGQXc-=W06Hw^r?mk6h=~VV>oPTSQ<_f2_BoDbo}AXyx{9 zRWeU0&g|Z)ILB%B#_C1(0yEUXl?XD_r-p;9r(w@nUp;tHN{Oe4_5(`Biq`_Z|4Dg& zd)U~h!+(0JCB_ z9f9c0(NdRbha`@@ldv(8u(!=2DEaIEC@~MN9~|@*D1$F@SIswrw%wIrb9tWTv{3}v zHeLVja-wl*Q*c_%50FF55^*QrhF@aGPSsG>Ub?%IfGuzGfVhzzr9XdPlvB>m-3VdN zR1rK0_R(H!WJJGLimocNT+hKEY;suB{gj)tx!o-#x#{PSF-suyyiYTjr9o|Gq9_$_ zktQuLIGvI&ddRS3cH0yp%x;*0uGf?E&k~KfVb)e-;Kczda-Xkwq5GMee`>X%9T8GQ zql=5xbnV#s;Z0RxB97ed{Hj}JGL7HrQEzj-@n<%Jj*SkQnZ(yC>K!0IP~W`YHQYaY z2l-_>Xaj>RB`8U4gSD^y_KoR41)ix>h`#8s75G@M0Bd<;{l#S2?7pSxY6oe!;!^(h zR|jj`U)eCx__$iGqPV>g8oM37ignx!%FSu?L>(`gZ6r5d$S*q3m^%u2ldW0Ph$dv_ zc719bWZj6lR80cD=@9UDTJUr^3&%=iBezR2S83$v%dYSi&_&u@xy20VlDgdVU2QNV)Hiv;|pWfwtsg+H8$VNi?#WFH)SeG8BDxw zcaW_zG@p3r{(W-&w zYHOjn?)L7Y^7ZYb3R)`z3eeuVhVd1tzvIG!@~vj)PJfN`!=B+I>+EdFipwBQ|IjtQ zzT7UKQ}CG;&u1NXRqe@F8)Tzu5c*zFdCjL3RNq-N%4O#o24Um+fv zIc@N;^KIT8B-jPjl%xR^8P}Y%A4*kFZrWjKx;I1kQ*>T(g)6JT8LEoguPIIiVJEW4 zwX?E3Nn^d1zx01^F6ekFbP@zu0IJ}WA9B*8i1UgC*JzF3E_osDlfk*A&wtEyk-xt^ zs2#RMrr6cRE7hAvqCw|U3S5rkRcZZjWY`7dldDAJh8Y2ROS;48*WTn}^ z9e+Jd6gu;Fvx4XW2Gbz=HTL&H#5`Xg-LecfwVm&DK2CZn%Ok(oIq=IrJw;6PG6*L4 z;RTSZcrpHfov^k1%{d~Tv5WMAk1)!G>fkNTb@XsJ0ZQk52*Zr-Fh*jaUz z$l&rXVi#3R#A5kL`5*OuRtO3LhnPF9`;(=>iVnAhtMpw!>WlN=b4@6aUrFhWUk%PX z@6s1YoeiGolFbdzMw7pNge;e*5x;qf6KPT|a8uE#b+h-MC9_ z`)BFR475#wD%2x){>NxK#cVIZRzK{;;NsY`kCDm!@r+}cJ9R|a0t9&mdwXj>no;jH z1x;hvZa0Tr#?QZfdLrDLh+lux)!-{!U4Ylmpvo4!YJd~q-#{B;&3}9mR>ZL2fsY4x zDO}q`$rQhj_A9U^tJ!4>&D}CXUIIIK>-gGXDR%8>G_5pfyW!M~RjYHiwSimX!=Z89 z(eqc6C*qj_Od2TTYa^eFv+{RP%T|1&$r}|`uo5Sm6b%7Ejf9+ix8cnMT`skJ*e(uupGWRv@T_oQR-kG7Eie)*$c*UnH`il&Y zW%}L%aO~kiUL5a48Q_7 z-}I~1y~5spWRUpECz8<Qgx$#SueXmAxU8dIeX(r@bnE*x4XqxFb>>u=FNSy-J6vdbFREFV48 z;{`&MhYOW85oar;vgqm$OYUEk56U88*i~|}2e$_LOFziF)?eY}38cRPtGq?A4HY&= z;~n6O6G`s=C9a5#npix?(?PhkB~*8M;$)=ItDc#0_e~5VphUrYzKw1 z#8Vfn2sfN?4yLvUX)e*np4}A|)=PBb19z~u(QZw+zyI{Y7m>D}i4x_ai2L_$4{6O1 z1@N)X(;yt|ey{uuMT7lK=98y0k4Vj;z;cO219X3XylqS-n>~@qX9ZmVt=p?}@Z&$ziP4nps<`9@N9q#4lsM|+J+pC1m z!WQN92)PDdr5V{yh8NEoH%xTOlshMLy=ig9vx?*N`(6tn5YehL44`nOkZ!48c)kYo zI~9QZXxP2+-zVzmE!XonS=|*VfW)`6rFr*UH2zhBX85B&#Q9v1!*)#`>GHoRkOtc* zr@0?<6KFk{p?oqVD$B}yE=oHP6xZgAZ^K&y?{+Sbyq;3&Hj-qrkdWgDmFdsJC zhCVJbdsqvXC*{C*Xk4-<0}$qH-&d2|~bzo)Hw^BTN3Ua5YT( z*i&hX7t)%-5gvF^Hu)AMpVV(L9GLcsMK#+9CxGqavK0=m-0Z`OL*3e|9U=W8upl0M zL3VHr+5oZ|EiJ!XHrtv!>Z(vmm%FmC7)nF3gKQF{73nxuu$=gtfK=6nAd%<@>?b)z zFPGkiQ9eEuh{F*d)-eBY zO~v!ZXg0kT#)%A@JUc-)tnAfB%280R4v0l8m(e51gB* z{v~@iTyF!U?i_CRG4lbD!|&YKbNYUI?S4RXnL7gzD-l_x&^68~_rV{*IRQy=S~6Ux zx|oOKu{ zZ@x*6Ep!{Ti+0X+FTaR-2-lEW3B#HSVSH)kYs>~GC#*G9^&i;R_s1v$vfmf`RewDHReV2Hdm&v^7aJCFw4%z)bEBkFj@;&(B3&$t?o(^-hjtc-8M+Dwp@KO%!QQGZL0 zp3!4}?yGJiwYV!(UAmcn$!H7@Lovtv)Ks?9vIU7H6VHm>lt%C8K`@>(VoM{vuS)lD zx(`tAW8@Y_uRC9A7)Ehr-Z;op)~;Rj-)a#9EAr=fwqWKfb#c+@JXYjyuTO#P(F^4O zTsTNKThNnUH22CP_R#@nwPK4K3|)L%e_7O|Pf*r#_|yEck_73ArKN>vz7De1=-(}e zVtwq2E*%^Nd<2`j-7T|2Qo-u1)zYd(p2aTo3T7lNJ3#C~h>$B) z>kFOdtiSYH=w~!(_iJO(b~_Ul9_ILx^Dj0lpVeK*-nu$KWGo<`erQY;{d<;s+;X_> zCY%V88*qz|s4Wn8XN3p%XemG&-Sv!vdWyyOnp5~Ic&+Lr z@_O!y`n}tSB7+xYn`$)I1>sE&ZR3cidXRpGY8Adusb+9HJ4ermy{DJ`q5p;U&S^H< z%VPJW5RS3*H)P=T0|(Ej#N~sWFpR|sfrxRSp8Bi|LGUKffOPy*RN3zo=pt{PXRWeg z=f^)qYpiHmw6S>i4ML?mynphVSBCn9YH;4$BuT*jNAp#jOp$!F|0{T!{@*6LwZ(Yw zy%>~!QdmaMaA81e;8jo0I6mVz$C7nWDSnd`SGzay}!2Rtn(Tep}FH2+MLuqX|umhHOrv-o1{A!G6+P)D$K%HcBOdqc9f5 z(lqOG%{Peh3-m8HLo<5B3)26%0Tp4H;r1VhF z%!cYYaW#u3C)pzeupbmm9!5&ZDwpPbAoe;9RIe`~W#FVx{C#x1G&9qq`Nj{T& zq7o}H@;`LCHFY*nkPQptXy^dM@S#0WB)wAkvu2(L;~+3EOphN<;`k-$C~&BMRVgu2kPwQBrlz<*#DgK`%v)AW& zC7V^VDg!3a^Osyav0f`eO74i$Vu_AayFj2ub#SMtziEB?!Wp0|Xrd`c;ZQ5*{`xC1z_TiA~nr zL9`_p7`qxsQp%id($|~@3QK$17Vw(?ap|UMdHBr}^0~i0a>b_I&?`Jt6ft8WW<7Px zpoww@5_Cj~w?O#$Pl4{}yH+K01KV6SNlmWAziM-5l*W^IE$`+SAT;51feeWD=DKLD4wAv8_j9b~-KQ2^#P~VwCHov#X289+?B4 zE>AfCZrcY4lR zFWdNKQ*6`Jb?FlbK#UhMq5C)g){ZJ=!(!q>T4@lq_3B@>$ zIikf?xO!gZ_V6wx0|go4X8joVFM{dvWLz1D69q0#8$-GojrW)LOpqv3?0)rkz{fvC zwaPzsm7uwFEK}rB-%4S59JdyEg{lp6yy1D$gl?YQ) zdAH6=2NsoBjKsl?iCYdW4s_CSW`{IpigGx~xf)znMYd`-?M=0FtbSErRi{ZH7D&Wx zF0&FMoIms-e*RZ+`|5M!BAOQzVO%eLLRKm^6ikrJ$CW{z%K-LPdhirPhOJ|lz%b12 z(AZj)a}YO2gu}^H{z6l7@ai{tUAzHF5$78BRS}Ts&o836Wub+Vo!d<}^uD#c#J;^R zq~D!nn_fl`Ls$o*K`c=Mjzdp-?A8!u;2wQRpvZGTdk4d$ctHDn$XqnKYOs2n3dU^E z4n6wzHZ(9mc;Bz8OZkO!Fx6t zJXn3-Ow1oENPQK+x7h(3g5tm?AJ?avS0CT}cC!x9`fRir>!a&+;pDY;ae7q6@p5Xo z1&M~yecw3X;v8tbCyp`ebku6Bc@q@IpWr;)cGSKK1Ss2gKNYe26QX3Vz1SS9w-G}O zbRr=ZTxr79zmZEGJS&a<4^3YgRcF()yK#5dQi>IK_u{UFqQ%`^Hxw;epg?geZpGaS z#ogWAVdHvkp7%Sf#oyU;XOhg7E14uu#_L2O<3DbBKZO<)B4X{#|1?!G*zwoZA#PsRWR>XM_Szh;v_TLGD%O zzQTMrXZFHjj>;^Qetx#YQoeT#$cv*oVF2mg?YIMx`s==3+rsvfz1UE<>8e{H0ytiv z8GSAhyC7Sh*#uV>ET$2$giAbwdGFh@{aNf0y{xr}=JsF)V7*z2 z=7BsrbS{U5r!flYP;;a#=7^mjRsORKv&(s`l=M4#wwPYEVFjTqV!h!*c%!6r=TGkTJG4>6$sj#!9WDG zt$r(8ySm`n+aBna+qChi!w4AgsKInBhMf`Gw{|G&YWVO<5dsWh9nn#h+uQqB{2hZG znb^-iuV%}_kmOsBa6={#H-5VHS@;D>=fRfBu7gXpVn=Nd_LbvSQ=d9Z;=j5hdxU@2 z<|mQ>TxHfkWA=T{O)`QuMZ_I;kS}eBrMvCNjOc^ub__I(*Ir{%SkS^m*GOJV)^i0Q zfVD|fL-lktzA>&!y}+JHYSh2^zhP~4b1fI9qJn?wt4#0xXHsv2cna6i-1N(e7CWHE zQ{X2U`@|-R2B~g+tv1h1WP8OrxN1!x^N@~*Qw~t?isw_{EdCviqHVv|Fe(Dp>FKww z6*c{#dNNa&sWX%WKJWF>tBn-03At?Sq-wj(~%F@pTf}lz$)!22P7);HtJVKPFnp?RH~B8a$e^2#8nn zZOIRl?xWEWxs~gF;+!^W3A5lR4ErY=DKXjv;`t62%g5dpkg5K{_el-9lEKjQFw_o= ziB*HURBY7ld{XIL6*@6FuS*9y9pzk=*8ig3na3Yd0s9g!{of+-B=227{=kJMfPFbT zd^airMUeuZF?OSmF4CrF7 zNd}x=C8o8`3zQ@WcK`_|=E!G$uFK^;AVGxAj?AAiu`;F|mf7l&@bglvfvkm=JPZrD z25T?HQW*pAh~}8g=3mfh3Jpn=l*pFb&5oO-PtD%8jqCHGblZMF}M z*yYM5CI*Cnc&3ba*1Fdwraa{p7lb#f&@o~O+LsLM|As-I zYQc0TM?Y~a^JEKf>@2c<%xtKs#W{Q2LSCMbAd#S+;ItTl$rU*8HvyZ`NTsTR_scUO zjt(r{j%-8Dtce*nb<+b^T^Y);LXdG`8c7KWt|J#^^WB5-d zKuklN$uocLqzN&A>I7&Ab2iVJs4>%oM~B21eV}JE!GV6u_RZPL5=MaQp{KCrPRX1Rx`Nmpg25c}Rd4wT0XeA zPTPP758uvL2O(n40+<4>K5_)EGDqVbX*cDoYtWNIkY&MdE+BEBHM!N|A=|gv89Hcx z+a}_7Io|td5C11`HFjn#J%9+cdO~k-EnRe~+6)joX`dLsR{H5s{&c&A`@6eqo`(PM zHi8~v5V2a3)lBtQ--ABQks9pf>_E$2ni%_+1O?|ayUD6*g&_L(B7FAICo4{L^OIPp zb+^oLm_>6YnyzG9EXQKrj^Su~?Ck}-&CIQ!31R!m_0KUMoLs3wCONZ^6i>i6#Tjca zPqYQ-f;|1T0Cpy~j0zxD^zdjf4)$I%{JFcZUZ#>ulu&=!1W>(?G3u$t#2T^*VYg8Z zPh%ZzvUCv(PP6}bpe>90ws0e$bF2a|f4S#-mdL*(;n!At0nl%m=|cZ*;mT6JMtbVJ z2w-V;ZzpmH7l}FTNya^KBVCW!3I|&IK$+U~nn~!rIjhFT)kRlC3z2% zue&TL(z3ZN&QJdP#C}YJmuB&l;sv>erl!1oDx|yQr?HV4)9Asp6vMhVIdnTf@25D|r7|2Tdd5abc;4z|wE-SO{FV^d6UqP74GfNke-ahNm|K?Sg z0d5DHNs+a2Ak^oboSLzz@q?M1JPr<_*Hcb`t+%hx)PLV$49gXz0x3Cadl@}?k7Y(I z!SUhSVU`ZNOJw&5QBmk*w{!u}s{9o)KPGb7d=%zk?*|fHIi4@|Y6B8o(c4MFVp;?b zl}({yjq~#o_(^`3B2Y@qk;A7MeK5;!IiCz7F-9U{@#1q5wIMP{9L&H6*hVp&&LA&K<#}3r&f*Nih{$lNh;#Lsy@O49LFCYYdMY~En^(IjRCXa&mgp#9u z&+Keil%vsDV1AU`uoZNa04-&#Z5_AT4!*Cn5W+P@YxeJfA2PL09&WCO zwiXn;p#p$Gq9DZjy3MlNwy>HFy}^1i1#{%7wMTVzUhhX`!-{F*Ub-M~l=_#TvZw{G(K$umMiyoRaZY z1NaR!pI8Oa!Gy6;mk2lop*E~vyGN^EEqrp1Hm?=p0RI7n;>5`kf>X*IU0>GvZH{*$ zYF=%sxO3vy16a~LaOzoK* z`O4MM)%+;xRQ7!@Gyb;6YP~ficFpG^*OqO{a9C+Yb*&doGXG`YH7KY8${m7{;(hM0{onSZh@$`n8x z3zo8TGc+r*Wn7}{|AyrNC5s*{_l6qGmwQhcE-np`G2%;u<>?! z9)c4Ugh;s9{t>){VBqsE;IVnst8Q<3?X`ywz2_0uv7HF@DAQ38og&I4zdnKelYdb& zdW*Jkb{!S$unH#GUe*|+UT-xSDr%t;9|>~@n57y4V|2UkMlL)O>Ine{z^ys}kOE4| z990>ybs_Xf7F@uWbHwNqDntu<9041+zl;|=zJot!1u?cBH&-OiwcYw< zIv#tp#uX@m+7q5z0G^=NU_Nr$GyeVTkCG!x-9G!kC7F+Gf~lcq${jz`xcCACvsU=0 z-Z9~7YT>Rhq^re1G=U_QyC1HUsC{8k!68yz+YBl<7|TV^G_5F5;dg@1ixq%!D_N0q zIa;l5gt8XB_At)ogkCmgHTN>$vJZae`9VdUt%iwPN94@>V8Q>l@Xk{t$T7vXJ$S&C zyq-g`eAtQgDpFTfPHEJ4-2~QMUb3Z>mFtJaB%oppz9F%CV-y{yRFBI6e6(1*{k*VTPaDULW3$4*SxQ+i%-P-9*5@3joCxKnG$=p=3kw zq8TQxCcwZ4&IG97)t2eQ<@`xIO0`J%`Rmtkjs_6olNV}t?yq!wb4_oG@oVIK^O(3k zblW=Q4y_*-YoDA!48U%QQflQE&URe349##QYJ6|Bs|Rc{eC-;OIl9FFww9?Ju6wV$ z+NRPBlR16>KW-GS(2xLK^r6Y(hk&!`XovjFQ?tF4kOGi_vIZztXKGD|Woo6_C)px! zKWF%8cy0NydI~Vgay0sko((0hXkw}!xcLl1o0{#_sH9hi^nTp`MyFz8kFv=Z zDKCK!xBS3RcI0#=Wa9BG(Dj(_RaiN@jt5h>HD&-ffpP#6Rs8nJ)z2x|nFCUCXt`50 z@(bO?6J#Q?(I>t!!0i&EN0YDT)qEClb=`@0N2HZl+#K`I!BZ&6@w;!=-+l@BkWE1P zIs{4xg+a-DQU8_bTUHIgYvfkfND@lG{2UGtv}M;+59gRk0S>i3d3G+TZ$vr?Sz) zw}$Fl0E|IWEephNf%NhFc$=aUt>C;;{LoE#;U3O#r7`B67=;+YM;7%hmZwla z%KA%(*QEkuEmy4^{`N$~kxrVm=`|aNnLkBow0cbW!{T=o9{om5v2kLB5e3pJrgInr zarbH@0aS}WFF-Y?;%4Z=+Ps~CZ3*Ov3b~9DC)X5}j~_6k#58EIBZF~=#;%rN%8@L| zgPJtOjuvOeVW8ku_p0wHdud(hekxTx7Hj_9PNG#MBnfh*ug?`H2r#q70t5{Q2pUgg zq!hvA+2;3YYqAw)Mv|Rw(@Dy9W*Q-!DiPA%drZM82M2>HQghb+<&v)#N0sJ?Zp;Y` z8SXCdAQ_<=xB-iXu8+9%UcCN@67%%`BNpNWm^DVGHe;C|SV8t7Qm(}H`RFuZS6~I@ z(C{okK3W6Q6%R8@yaaO!%m|<_qu&m+H$!bVi(}wK1Tf2;sp>1nn#X1N zHjaB9aYT46xJUyJ>bcEQlZB_M6vrmoE}@7e?o>(Q5g)|p zJ)3X)GOy4$g||^Q;G|%`5Iot_Q$x(I^qaG&oBxnD8N&3 z^)W{&lCE6wEKGMH0m(W8H7p^uF7_^L5B;}93LSAv&Gmi*$Hryh$G-!{SP8m3A3XLc zew%4RFmXVO3;!`s07|&_aiR|m=g}YEM!9wjg3q18LkckMihnm&fG*q#h+0{u7Ud)U z@YULV8l{RGegysd$(a~!u!y02`KP`JN1|<%{3#{JwB=?b^)dUPj3LcuUcXiHe`kh0 zC~tY-2n_O)HL=*-5Z{T>G0=b7r}4vmBP=0^!~u{s9u7Qa)`3U_%VE$dguM+}+x1#g z1uaNyIz^!(JnjP3+3RRplJ_H9(>)Ruj|db)Ld8JUVSB*Zd01l|8^CShp#b%Z1O>Sa z6EQ9izv&&m0@_VQR_N+_+;3v|u>eCSDyYaAw#@-0dGtAX96^%8o)BpeW< z(Ip*E`GeiZMolCeFD|T4fAEu$fz}*=%dP~#DTgru=V^iU4)UU`Xt@Wn2MST7jc2;| z$A7wka@GgELwrmI^J@pZ5ak+n-J9Jbpct(};8dXk5h`#p@*lmr7FOh4cJV3GG`>7H z;1~M0BmNvui9{fOg8AiZj)MH@(+~7_`Vtb;zkv4<55&ialJZvuFa95U_y7z0nZEIS z;y`B_79aJkX!C)h%Dw(on~M|iJ%x?PW8bxF7D3La(%eWW45&j|nqNzt8+aG9je_qC zhZ2Fw>WUYajZkoq2^l0{M2*f@Wn6F^U0nXSrN_sGvo2#FI$rOc}E+U|S9`OMc_BpfJH)O_%YBB}7m&S!o z$Kta}4Gry&5NZbFz0RY-?H3T!@V9-9 z?cOdSkC$LROwBW>5Wd#0#QXc-9+sK$HwP)kSD_^5^u3OS&#vU-_|)2K+fzHM7R&3*V+)#{Pc9J*=|Q0Zk0HL;{8f zz=IMJ6aoXQpMo2)tVp@)NifHGEnu%tA4O8k2qhBdoK`gt2OIosl@i~9Dd9COl5HwJ zcO0FZ)28bAJViL0*Xbd3dVk3&)vk^kNTh(xE=3mi$MNrg(kwPG8jZtNKM;Hq!5546 z`m|xQxaB8antI!kIZ9FxVZe4M8eltI$9QThe}pdH!60%VNx`#u*=`;J*+Dt zI)z>H=QC>1Qe1$`u{YxvU=JiFEK2d8@!$$_DEB$mA;cDfC}*Z>#iUZSE{;@R$6Ujv z#L0`x(^#u6WO?(AR~jM>o!mlg{*&$ZZLF1 ziUk36q-lcwp*4IUjmQ>;w<|-3{N|t_o_b*BxIYAWE_6K^QCqxsb}7TvWVlQu2YOhW zW&%QpP@b-MVp-n<*3#=mlNEMZlSaklMENhH+&}qI`&eTSD8`yN52-scCYMFhRgrdL z13(VQ=DnGO9%PVYGh%x27wp7G_}rMr@hW6#)?_O_2~H9NP)^eerle*p-j*lAnD%R8 za`)O~RP(a9YiLPTMcPm7zC(*XLtCODF>VJVOG=jru+Q<@SW+VCr_1lstoO&pLvR&^xSL{J?39YFWJP>0=AYUsaN$r`S`Y=mZfD;Oe`4PULXo!KuYo|` zbiw_Hb>~;tpDymRi->Wj%B+o)bf|Ir`dI4@RH?H3+zkwUVou9EG%Vl(MgYF*zv`E~ zLjy-CR+`|;7NFW(f|f(r`!1(}0D`kb2siPRN6ZfkOU2+crN)VuK~N_BC(mRxGfxG# zHq+&00L7_NMJ}K9@;s=s8$;ZZes~ujHn}{wu!cmOhe89dqz~mE>yY6u7t)_Jy+1^p zF}poh4jx9RM~m!WE3lzE{+k@GAqJd+CUrB*ePhYvroy*<*hzXNKTZBwav*;22^A^2 zOIsRRRKBD&mcc;%n~5Nx@L8_05G7OFbTzB+6f&~;-spn}Yrs<{(B<2bILZ>Xi+X7N z!~l_y1|-UcX4wu3t{A&C(hzs@B?Y+buB;)cY9fdsE6!MASi6txeGmJ>9he4o{^fr9UAlcSM?-l_!` zaj4&r%?Ags`2|ucs}bcEs}-OB)N>mTqnpXJL&FD6S@KR5it#JzOwK+rKaZ`btC)!d^V^F*vMnEU zjXkjs)R%cC{jo7n^Ia&XxZkR%36hZ$JPK*uMFu#5FoW`$TuO8X(?;O726$3tj4jc} zuuGdsEb_1&i`-NN&@UQVe?iSepWGyTTCgnbiWil#>u5CK6o-h@jJyNly2MAZ9ZguCd%50yZmWc4B`3k~%N{~NyW8!n|2pulHC z$zS+OvK@>B=rJ4%QezB#m6Pw`q!ol>dJKt<92X!g)&q)m1-Z$9hG^+ zGsrOIOx$Hju3L}vyQ*dre~&cqy@~qnsxaaLesQiS)j^1tBxZldR9N7tw782YO>RLo z%?WcROM8udEq<=8rU_d_puO>3LQUhJ#6O(E|m%cf-lK zxd{~lzlZg$;$;curwU|7`s4JrIRL>&oQLqUOkZ`DLInfFmAn*kO4lp>4h^{xu7n#C zBL)4XSp4aQV>B-*gE&{EcFf)8OYexNDyHpTZ>!rNOn|x;;JN;{jx>I&F3+b(;Hr=} z73T1qn?J9Yc4=Z8;0-_2LH!}mFnPg_O1ku;t_n_I5$oW@et*t2q=a&D$gKY`k!7BI zeB{r-TS58pvzU~q*;!+-5A*o&IX`F_J!-!JhWR}=P#e*q+aJ>o+O=zuo3-YZCzADrYN5*3h^f4M|ck#g{gI3w1vM23OCpVlEpiK zb4pAUS#Fs1ogH~t_FfMQ~xTdJ_oxr+VacLYMXH~}het#HWd z)kcJp7Nns;oX+~wF3IVk?Ool5ff5K!NTB*9;~(OiT!F)`W`kh@HnFZlBX%_jI(kv_ z-(32)d`CA$;XF{oNQ@JgSXwLhf)1vf_`$IWsI&~DA+qF@XHMI(>_m~|7oKzJEw=8O z@u9bKN(a_schkUU=6Yr#fVMOymH-9h=somTvL`(9upv|4I)06eS1+?P>sed=Rap?q z&kvNx{32KwLK0P7Agk0OH$0Uap>sl}#llL&%X-qlFW`BhQN&2^rV5W4C`2GvK(~9A zWy`Al1s~^wNN0fOEl&*&+l6o~j}`-EM^v8~w-y~~TXuOr{fQxwHbBk(uUMK7khRO1 zo-_f|4eTNd$f6tTc{WMS=uqIF?E2S zhGenZN3Xz)ZGJ?m%{P}=6bJxt7wo8;h6i!|Qm8)!vap>h(=7^L9*)%BbN%BCUzP@` zzTdu-dnI4lgCpL)p8o11kg8R_`OmHTNRf`!R~R1PBBwD{<;3M!xU(Zu&RvQ|M1UJjL%*R_DW!LO44qAs Sry^Lurp`4!RqBzuUG&{L3 zV43*uiH7YMSW2`8FM{bBjuOSEYrzpXDc?meV_S;n5%KrbDe!SNNFeKc@b?a`3Cxg2 z;9alIY{izQTSTcMR}<_Pp{eLjn^zGZ4|1A5zcr5+Na>-Ax6&}GxnmG)mptXih-tqc zKN$z&%>?SVJ~%yjo?Htr2&z^;dh#sP%?;GFj0wji%nvNuRMyEJYYS5?G}BG|lu7wb z7r%-0gF@^Z#di|Iucrr(MHMA$FHrH6h+kQ!fWMh*e<6&luwQwF50KK+hnWU&*jB;( zA7BVKH}2;6!$89DQ%Qo%GpQ^n{&#Zs*M~9Rvvp2YP-A=?*^+7c>S^e;1NmJ2ZL2Pl zRQLPIVdjjrS}0IJeKuy;>=%KLRvcm=w7j`g{We1FjY;2^^lbPxcWH|1JqIArDOf%pHctOr{`FT^7d9LuFFhx_?e;jCv?C z2Ic%N&i3wWPPw1}fbhmyylKD1g7yA;SC4VhzfI!|G@-XdKz7ou3!FFt8#^BNX5nR_ zLzig%?b}o05hj2oS671)RWo-~Npon7i8Ej@Du*8X5N5wev&ys;g#`=BCZp>ynu&6% znMDayP=I^(uWvJ}+`c`85G!YptHkU}Or4P^E=XsBr%AT=YL*`3IU!*Yk}@{l z$bkD0c-g0nDFMshX)KEm5=~PH?~Xu=GH7uz9V+J;$AZG;EP>FPD+xNjQw3Sh?Albs zr>{2sE)#18V>SGKa0IQZ(dh}<|4yW{<3)i87VPC=P{+NcYi!YB&@%>s|PKU49v zdVIBnoa9kK6&+|26c)xV8`2bMk)|}!??N;z>JQe1nzo(^PZQ}adgcJjdsE1xDx0ya7w7EgEf81$3nBNPWg_*1v z#QyiA6_#JmAC~v$!)U0l|6J z{FxgO37AAdn1dO%ql{@`(W|)ZT*tOcKPg(~kYuZk+B#|Y12({&U8%Okct9aFLv4<- z))avC!@#*zv%~CWe8NFLGJ1d0AQx!pXLqyR{mKKJ%aNOVkw|r)eUz%bubz+afMw(7 zc~%WO{}>8NsyO0(9wkaKY*0WQV1q+hr1pb~1@I1>>}Bmyo@)z@CJfxSFNKhJ1=j4)#~iK@=a15%P?ya~r)zw_r3W8WocBgn&OP=mpG0EhE6G zi$;%9b@$b1IWy&4*3tfgCx;f2DNTr#FraAfysQxq%v&kX#SDw2;^>}3k6F801BBM{_no@ry zZ^-^0qRW1Gb8UFQhft)DFSGG_caO!x8Z$HQ#XgPofO;lBnp~eKy>z`?Ovu2RkV?<_ z9^%9swLR4R`K;5gg+y6$LffLNmh^ug@-7o~AZh^BVZqD{9KnfcBL!~dy>^YJ2+(eS z+5733d9a;DI$tG(J@K}c?UuC>zG|uH&`NVmx-yrP??qU$NX8&gEnx4fKV^1k;H7T; zlc+FV5;vdT&xXPnmvDOYid;-TlaW>YOiK@!;GXl{RZpj|`=ebXm!s+8pTtVJX!d#g zh94f}3N+yLOsFys!=VC)04F*Z_`)jkOxleb@QXfE*gkgowk|sX%)wJ%eB=TYzH92* zw4;%w~e*gyRaBxJxd zH@wpa1U!mQ?Ne>SbDhTH;YT^=f5VZEZ^zo95tsOzRpWl&1{PZt=7W!eD>l(AFm1Fp zSe%qf)}Fnk4(vHr2}s)S=p?j5SKHGnjw)bYz$pLB^)QzD;h~v7O3|lcayQL>vrIcr zuDp2js3l*=W`~IhOHP8w#DPmK=1t~6@7kvCw%G)Q;{U%E0Js%H$>Qy4(R#GYSzXp4 zv!Os+|BHIphyJDy82(#U@w^Ua7Q!P+#WtqpGvYay-QdS|T%B*(#C+E^fbaz zG`q~FA}Vex(Wo`V69>&bC45=vPLkbNQbEX|pP{dun)EJ40YbAw(o&Cr!*pj-xq`fY zkU7V=S``h@WR>_!>0nh}Fpg7R-kBUN_7?+EPyqg^2-X=+CtBYFJpC(-EJa1xv^MYK zu;Ie^tqe>j3B8L&8b{boM*S@LQO}xuEUL#0gs0bXtWfwpZOXQ(r7EG$2$SZtB|L$F z$2ZZW>kUd4jz+&}-T_#^J${Z9*Gnf6iDQG&UE`ozIFf~_@+0GYSh2ns{dx7r#)eQIeF<)nkw*)ef24YwgVU5Aes@>s&D4+ZM3l;*(Y$(5bANd zU$WPJG}^MwPFz7fE60IimPj9sQuw>Q;_gyDc$$RCuW+6Z4>S&q>{N9sQZ!a|^8q%n zk-fTOjZQhMo*PjXrqk@WrrJ>VOk}d7+y~joN95WG&31*pn+C1)Oy^5!N=SS}^2fh3 zO5|~N)hsHpcSS%0MUajt215nfS%Hg*fbi20RUjeCUZMZ_T%aKf4qU)cN@{=G!(^sn zZ29e)6UR!8&0*4#eL|Yt#9e7yQE*NH0X@BDt33V&mDP(62DZi-VbV4>4CreeUC+Ot zY)W8yfrnUG=qf)5$3ugG+Xh|U28glnCcon<0(|#wRNH3k-^Tsv^Du298Hq{ZwC3fr zC&5 z#)#}Tj~d=SK@=KO8+Y)V4mhLRk@KBD?d=ZHYqkx&ZSWzpQa;Hq5SXFmw-%3Ryl~v) z>OJ-P@q26wma-Ihv5Y{8l^DHKjud>{zOeVY3+Xeht2iSfY5R0gm-SrhytQ@wvsrvr zXM{zQGr6+pvx9*%Il`#FUhvA*-K68nnjAUA=xV zNq-N_jVe2Q5QDuiHm&}y@p_g3yJ-zTm5w>vbUt8|^XI%TrJm7hMpBc_8X8e?+-r{4 zp+X(YJ;BRV0{P5+x1SWpl52ld4%omCDM|W4Jm}354SY1YO{s%dkF}0IUuK%1PZKa>^qC)_6(xCh|4X zMCl3)Kfm-ERr$z`j%{}sBKRH)Uue9=r=YU)QzW<$OX{AGG> zA)&{7mi{r8Ds}o>H4sRTLJYRbxC~810X4k%aC%z(-dkwQk{85;lDbM@=9UrV-*iG? zRS3Dg`tGXsT|uW;McK3EpSULE-df(R_Q2sK+Ip|xS3EKZ#4KLPAm!c)*;``@9s5vV zt}J+$a6LG@MFsVh3EltXRlsX~=ymMEz)N_}_d$KoZdIYXU9w$c&ZZEbvte2X%l;tl zQB%l2N?*-n^ZCq21O}M*Q!4(RSvU%9WDOI>crdx=4=l*eD4&3;jqo`$aN_*5^yl$W zo+|asVf&~80cVZMXTo}wK$K5A?_M)65i+&*yhfF}C+bZFaNLy6u zQj1F1;d>|)cLeP2@5F#`O62P7`qOKBkw|RdfD?61igrVOUHl$gY6i>Ew5vg)cTHX2 zg&**6+>hyGtuWhQPZGN_Vc#U49Tvd#A4AppdTx%uFNX4r_)*%{!x^62JKkOeGPt0{ zjr@-L)?_8KLWh@yS7DZq)h(pQPvZzPJlyz0+_j&P#Y^Sta#H?XrrxL{wuSDt`!x(o z$Fh=fW!}ihz4?}WK$im2hWUHv*Qk(sR0%x||8ScYYJlu!%Cy=Kc}f~+C;d9cNBi8c zE5yOik$k;`*A5FM65ReAhu_9abZ$9YRN~&18O=c3;7!}4HtijiWD1U+7G2cI$IAoZ z7~bt|c=qi{>r3HnJoKh_Wvk=OfX`9lL4pKSb_TltwMd5Vj)0*sW${Ek0+-aMC0c&x zr{@HmDu%xyIhy4gKufFNH8uHMcpJ7gA07NZ@3p^}X)2TEArd&iEUQC-J_kigq2Br5 zoykHu;oj91+U;in^CXXQ4D@_^N^vo2k1nFNET=Ms$oOt-gs+j%PUi6=-T6Dv(<^E~waMn(PZ*Xw?H~WH47No8GZ($0o_HnpP zfQR#?l;2R^CWB4Rl;g;MxJAT04Bo@`Y=OJjr@f@EU^j{0#=q>3YO*1{wbfbz{X1{f z*;~E?CoJh^*~v9oYheERx*Xm3)gRZj`Cm~)6i7Ye3jQza2RdfWO1Plesg0J`R<>%l zv8p5CYm~@%skvn~-Lp9BUOEoW-xB;QE_PV3Ifo;nQ!X}3;TvfFRoR23VH$#-qR~x! zO740|Cd1{uj)rg`Z6k0jqW9e0Xp8?sj}a8%x?O7Y-{c_k=(-Ic%HOz}VEIg7v9{J= z1&OV^u8OK@FAX)73Qat&vMQdHtCLbsEGhhFY&R0Smn?71%v9ug?v-fZn~6ipz=yDA zR~}CVm>E(8XWNMjb_R5hQgQA1<<#}y6|O31Klsz_)BX3jQhQIYvp(=sb9V9irwv{S?ieXa|X;!L_2wlbiP@$ zIB-?FE?38TDpyE4oQX-$e;|@yVVX~+ueK%ba@?hxDmoUSMXXhsR?D8>YH{QR30%QiYJz4 zj*cX2&{dWr?j=Xiw_Sb1C-;(pV2Frfp)(zgotR6Ig1$=9^;{cEXM;Wd_CFKJegr~y z?MFGUi~R|2uTZ-akn8erjvq$i-YB{5R1jg_alwWgU%R4~VSanngp7q=IZvHN+0pXT zBDX1`jizQS>FnfGtj%f5ZOPMOsgAlNMpc8?CCsOo1XF!s}n{Z)q&gj77=Ekaot~9qL{IvC+TDO5Yr|(sU9y5j0R*}TKVve{1~y{S5GaK z3X?W^@Hoi8?&4!S=ue!|na|yr+*F~V?e%Rm+aZbF>-6;Td3LlTQV zt|ybWc2xOp>v6i}+K#wqWmH_^-92aB*Oh4KB@4mu)4I3FRB=0Gq$T5??oaZtkc9#{ zQsZuVOW0e^#n#Be81JXZxt&&7yRE)0?y`R>($?X5I(=LMo*)EZN-oaJ7 z?_`Lnk$PmyIj{A~Js*hTLF3V*LbpZbdju~klLwmmQ_)sSi1l3=0zcD!?Kj&JXI28p zPySzOt0w-2wKTd7fUgt5_F1j>UIUk3P@D5bDmrKffGfV!84yxH&VVigPVusmtSrDr z;ig{yC8})i0p@Er&jzx*bn(cSlHysA=CxnkFy8Z5+a$x&(6-PgSdUCCt7(SFeNqJP zd{`($t2^%x((ybt!ZY+1t6$iL{p7*+fOx9~3Sig}pMYU+`kv*KK20b~p=6z3+);`f z15Ts@xz@NKWEIWG-*+T&c5M21&CQJ0-{6j^!NADfxQ#tH?vxZR9O&zwXZj-<~l zKd*h>XnHOg8y#fF!&|Y^i+>OQzfS;}oQ%l;teEyi+hmIQ2XGXyc1z*wcEf8cr;EA$ zzd5(Lne~blo0k@cG7Q?93%5YlTKc4;RNa!Ijr(e#`X&{;i!>tSm?alxpfX?NBA%1R zCIAO=+z}?mx(a4vehN>qpjwee1?7G{J$K<4(b+MQ6`lh>99TC~K%i#R(6XDZe!}L~db?zW|CF7B5R1U6c(~t!jFu08%yGW^b1{zwDFD!t=>0qKi8e^;{}J`x zfmHtQ|M-2(%np*hR|pZ3kv$Hj?2$4n*)vi&3Xy%R%v4nNifk%-CnF-OjAWF(f7j{t zd4GTBpK|W!c|Nb_H6GXXysqoM@56V!F;ADYj)f;Ci|s63K4p4BTzIdvAe5m1p82%c zuT1}Zvbc5QVu({a0xd9M#7ZsnN3h6hjSa>y}D9+vUudw(|*my^i;y=K++{8fd1#mZg37iR-2mOdyP z`O{~9dsEUIN*T7+W8|=-+rescNeY?rfIa`D@R!)WYfBpsIf4{&!qwfTV)4raN@{X9 zr^U3*N2&81O$A(L=F+5&qJ6wJs)?1~hCDR-vJOlnZmB!XUUxO*uW`2z9~`_g zYxjtg`|kFN+{DpQ2UqEJA)9P-`NPKt-9d_6WQd;ks_VfFMMX~aBVgtA3mpxY`1bc? zx9Z@aycqH{Nn2|7LuZ8BkU^WI=ka9T&I|>4J&C{xKkP(~H~SxQc;7j>JB?ArTztcj zG)}3Yrt*d5$;TQgyolP?UcC%Uu=UPGOv}%J-l}Mi*-vr4x|tjcgI>yssc+6%w7jMy zeg0+KKQRAS2OYUTiR`Njj7z_HiT`Q8dO~7=`hXO0;J$g;aR?)#HZgJhyNx}MA=#np zUcZIOr*D<-Hg>`rB5A9gG&3VcD7}6;-E3%lnxR){bd=I2wLj(WFt@JH=%JtGv$xRd z!VmM79m;cuWwE$=Nl6TdNqmwodp6U5&mD)Ly?eAf-b){t;6$%SdeXvqVC*xv z2}_?0Y0JRF8pRlZ&W`f?#QBWG0+S|39c}N{+PCc#G(Nl#5Er3GO5lZ=G?F*jYcGB4 zj*cao@tPDGeOjRm)g`|VCILTR1^zP9PQG!f2g5J*?vATYOa>E^-aUBsx5ANmeXhZm z`yu&X8Ou^E2geDw9i~g4w0whFKo}HlK5sPY_=$;gN=N!zv>vfMLi&Y2Hk~XSYE1Up zwYC;nN&WR&-$OswTW(9gy2#)Qap6mnH?50wskPfILUn5goo`AA5M+ZBh_j8f<&Do; zM)Opf%aorB{6$(!LyW!oH^;K>@I7Ji_u)d7sM&Uzr7!9w#;KMC3%?)UH)=Et*j2yc zf7q3$d*$;&SjdeF$FK77UgF{@Pwv}xT~9R6AIg>Cwe*4nX&6dLtjyRlcCeDmhVVMS zYrQhUSK(}EaoikC^4{5bNu1=xP|D&<03!0q_l1uf)I$!h z-rYKSU~ku4yu@9e{D0G_SX{UjIZLh68B1crU6A;OV*O60ye5d^bN%rT&AR^WQ|;A>yl9pe{(4mV4~S4%$^P@t18W>q*V}U% z?u;k;4C#+X>K|5Dvc|8H%APlfeLujRzMAiM5caxp(@1LWw7p?-+UK5N1gT;LUGXT- z7HP&lTU)AF9Qgfv`{7HZe`(J0{^p&H%JB5)B_TP9s+qR-=%~EFt{s*}FygL8YJR6A482VxdMEJ+U$EbUduU8&tJM`#rp6eC9cR;u350sgw14;qL=_crAWbUM|=Xryae- z=XqGzxGGJUktUp5jf=xahE-V~&&N3KrCH{ng5G0tG^+au5DQv{u!WuAY?d&T_~(B8 zqjYLkt40N+^iR+6?3?`Ih20Bscf*4vX5pF2Ogyvi8uT7RiX~Gle|p#C`+_7T9PbN5 zG&%Y|#>aedYvQ}?v}+oT7Zl`x2xwDYWT$mUV{FOuy6Y5Qnp9;7#v8| z52;5dYAlamFBNrAI%R;%oVz`^J(u=#6q15i8M4$&|DE@W+fVJNwQ=Ga;a(2vjdJh2 zBU+1Y9Fm{DqS!xfqrMp`_8>>df-%qIM@jF*q0#+_2ua40@ug7*?EmPF9J&)ZRLC3j z?Cpb|nedPhLZ^CC&U-48#Tld9YmGR&C^*+k2=CR84;M05-(Jl-O5zAM7aBBlmAqAK z7($_HbkpDL%j8~-6T1IeQ{w)rNjw2ErGKVbeOg6yIZjfUHjGrx_TwBwJ4%UlH!Ah` zE5||WE8I<-I3AI9<%Z1@JE;=$G9RsVcDW}}vkh)2oSKHT&tz0U@P8y0b>&FdehMG7 zQp9lb7i=!A-~2P^6HcIHLENLU+kCQNeRX)mP% zM7~3T44J~6Y4)yecroE)=RtP?l9Mk-HgB;IfxP79X;wGiz;$^73wenQKLFlpXJeE)lH-6rb|5vzKu?d z1Dp_ZIID0KMcneB3pczc$s?%D9QL@WSoHL~Wlb6{(wt!;IQ;zhFGs)#qh~utbdst) z(rKWna@pizH+Y5Jhs#gZWuzR0RsyvnVieA@>`0P8#hV3AqBf%+mM>&`rWJFOHLDp6 z+_iybjXLeSe=0~D_MR*rHQWm9y~3B+F4|b@P|jqZYiLqm`p~*Z`WKwm%p>~b`IZdu zryu(Z`%h*}EUn*&RItj@D@iS}JT5D()A#r=L#_TuO)p$FOoMke9Mh zJ2BK@bFPd{q$1hifHsm2Gk65u?3tMuym!IwKp~M;-@XF9!=Ajq{X}bQM z<#EZ_nUT!u@$LAOND0-)$*Y*J>vGwrg`#fxXi+&D{oGc_Y4N`*yunO#5^@!~AkSO? zhtkU~HtEtTmnA=C1-vwU7*78IHC0&P989&^{<8tx=kQc?SMf=c+y~}D!#||N8ddYC zROz=A3qJ<=le3(f29k#9)H6jBS6CVq64~lgc+qrx9Y;*!NRzre&y^c1X(`jzR4$YE zI#!2R8veGV{_1k-wJJ2So;2v>_*$HGQ@}avM&#$U=328QCd7>fSkXIq{}RH0K`S#zYv1$L;g+jw>7*r@ z$ztw&7{`cA;orni@iF&T|5rJ&z09=BXHu?MZft7ze9^bw^`8E_pJ`})Y;bBi$=X75 z_QLn9&-Ei{4Tg&cQLZAj`>q!zJqC&K_pzl`w%tH0+@{34JY+pI+Vg8*dffE z$FFbl_dbj;-^^0UE&W~NwtIM0uyIvz_ooDx=!2j#=CM*$=osKCTi`Ap_bhh$67%r| zv*T}G5t4t5n7F&Ybd0L+HD@_H)Mela49JgXV;X*)FKR*2Fl?V^?s{{}M+j18i@SGz zChHLS{-j8Fq@h}79PX5)RHG)FY#_oBH*_{q{~wvAJk(7Z4yI-rpHNB^1CJm7n%w)u zX0SwM`O`sFnepZxmCvt)r+tb7u6Omy&IXp!CXSjJ&Cip)jBpw&7nUV;bjF<~MC#bU z_TyQ`ysx|LI1I-toej1wZ*bcyRpcf^OpliJ^$(XVC5wbFY<@yrh`2uiI!W$9(e z>7bF40;PcvPfS#txZ>|tDdvXG#ID~s2UYq^e~q7?o}7|S%dRGmQqhaz{^DEPG1YrR zPu4;;0)>2!zIwN&B#`TF7$cgn!H%7i1gUy?O@H%I?Sr;<%uxx8iR)h8u$Tg~CE(wF`^^dH7Shf zC;QsM85{LBQ0FL-=eZxtzosVWReygyeF3v!Kf)4S}aHzUJSTg z;W_`6%RA~(=o@UsW=&^wVLZd8(B<;|r#Y>dXC3*04^M(tR%LN$5=}JU=KTgeTA%!* zc=pqp5ky$Ood4mn{^VL}gHS~T1-fPaIRJgYJuTz z5ZC)%34|nCC4*M>X{TCEVH|tdC0bfbYtv*-r(0jSUklq8*7s4V-`f(CP74yk7qvC& z=uiIsG7ujlXeWE?y5ZcM(e+N4mWi1?thYX?Vq^eNoF=d4 z^LI3d?DsI4@ufz&$Q$x3`k7NAYsRN$YT7o-{7#BqDlkY&g_a0_J1k#>Vr=#Vs+;#ry8b z=DF9crN^nKkSwX(pn#l8ml6HVk5oiY#n=XC;uWjx^MtTewst*eaRi+C$EyqrEcf$)iE|fW(NoLT3s<}bme8ooYZVveBy0;ma2x4n<{|u&AS>BYW1j75^|3YAbA&*BwX4 zSWBl!qXM50>spIUL?)H!zQ1%I@{~x*r-L0l(cRSsHx~yUL{wR>di3PHF}*>^#0rlyHBy4HCloE zswWUEq>bKB{|+m?CwGyPn)&;TYb*isZrD23E8x1;YMd1&YWYJ7nwE} zextYAd(;pwnO3xG=(5#sqWw-3Q36_DNDyAv4CVh`5g{dlJ~>CkgeZ08l}*zKq}6jN zNEc;UB>2po%Jm8}$!#w#F6fVUkE)!vINVveNc8*x#> zR5~xwq)4n9my2!4776iI`9?pQbogr^Plo^*Vvm{Vy!i9>UP9*Ays=S15jNi>L)mO& zpbwA1&f`a~i~Acr zUI;8P`#-!B%5A}NaFCf!65lnMC)2yT%Ho)4FIlq{&{cYyLWCUgUH_;=#PaEiU54Rp z16_T^yPPCK6aws-iW9Ap?5Ts>)nZ8$CW*yIqWU1dW@#7L`G`$9yx4#Se^LZ~$FLCX=Rebo+@|wBJDZ_y`=gYii zvza4b@q;(7wQZniGa_ya;@js=qDjd>I zx>$kBOy9CA^xC|pkGb^srj%HTU*Xh}qo$+Kh2%c@=^8y>?dZr+)h+_$dtCs{2a@2- z_LR%Fby~Fvvx6AlK82)V7FC05&Q)5~duK6{Ye5u?PN$* zR9xbaj5sIBnI`BT_qO^zE(NbqJ%fMCGh1!@UMTPD90CNpwIv=2&(DZBhj98q^k&K} zb?*CxhnrCOaqoY3G|!iaf%fr_Pvcsf()91XeocV9e4=9Ugirsl*7LDkU?>E+ExW33 z2pU_j@F4G=+_5_yh6+19;thRRqARrs0*5Z=U=(HqIwP|5EoJ9uprh}~5c2~ZBi3GA zcOYBCRXdjGpJ~%rU<%GgFmA~9yyr7I0gJx2$>wQB)FKkaiHj9=Xb}W66Q80Rn!)^= zgRws7vs+lmwP_QPB?#f<#St3YLY(zucpV&;SEJcGA+6h~cf)lTvhLJ>zx1(F zKPE9SKxO$e<5GA?I6Wd~uiplL%?pR54JqI5_J?7t{p$rnn=^((p$&YLh<)1Y#2dUM zoR4ns*gi`o$74j%m*lBPJArnTLf~tiKJ7ec1$t*R1<$oXg|XV+g3d-n>(q=ae;kuG zrE|4^ST{|?_t)Z<370cL;|o=ot_#4j78nycLfG$_!Ro9GyFl66ML&=>q!v9ar%dv} z**pKdJ_>t@i;wL#0{UV>CeY!-lFz|I);z6Is+Un*dWjWzS1o4!nDcXi_%erY?YZB3 z4@8Aa<*AUV1NER2vds>r!wt!t{rf&o%@g9TqLKcan`1UI7uE@C^$0)5pBsq!zlm|e z;S)I6b;a}KigH+cdBiD+%$kRh$CmP9*3a`=`W)bIdCA?`;yNR%(u$8{)Mc7Keq7XS zTzZg00F8QB-E);oCZDvlYzpmx=`ipY+cT7{{(Cy2nWk+aDaYEtQfL_xc*&tq+>4H_cI&z~oEUveF6KOzv%nm60eD-a?= zAxj;ScHOKO5!Vex-cR#b1bgBI`u!0O#~%igceL+TkZ>AAU{t4cp;HjAY}XaOpZv7& zJ7hRgW!%qad_0>cJd?%m=2yzh$`?KHL`VS#51mZwAm(McM0Eh)d8S7T#!kbqaWpJ` zCXw08=l4OWHK;+iu7c$35Cb!aVku8FJU5r^HY<-(pSWS&`x6Lu2W8e_??#TU+guRJ8xwTIY{Q+ZAroje``?`7Arr@)OAL z;i0kj6B_r7)pNB)5QVEHmrm~r=KiC{u<_A};fe>biNBqHU^5%Vy?*m(B(bO>rmLAD zgOq%ib{0y0&ynRk(LC|8>n3HQwoz-Sw#n!_HU!OfYGzoETVbIcC+_$B&B5arslT6* zBTLX0z zNOUf_D~E8(o@p+8o^E$Zl`<^I0QU^C`D{}|r|Vlh_Zct*?sxc%9EusH>iB~Yg!{3| z_}*gt?-(?p-|=g=>7^HziWq>IEZc$~qycu+Ba9<I=zi^4^x3?L7q^rr6-u<@kGvM-XIIonO$s@>JA=u4nsWe=Lhhv;6 zPcNhGPAog3#N~2B<%%8Mr}*-3&=_PD4#S*?CSwL~>J?>QdB==kRmNq{=}X_G1=tG< z`hwF2dqreB%yadNH5X&)aQD7={9EMJRbxd>%7yzHfx?S2%6Njx97?VWA)JqZ2?j;O zU&U>diYXGly8i6OfkrGyPHw;QswygIy(@(v@YZCJ{L{TjX*W}1UZD`BHZzvo90$6x zd!&UV&^q~^TH1LmjhDDt`ITE0lVp+#n3rzZ3%gx35uhDZ$&|Q?dMqv^va&Y*Iphsu ze3obk$8zJqtK;rnw>|u|x9b5iKTbzR%k>vjQ3< z5NtFBA3!krLSljr7qzoYBCj))HIb02Egh|z495O-SN7pcuGvuZ#gE)=?q}+NNXhFd zXU*kiX$Rlg2FI`@LgZSQqax2;I`OQL#C6RWzs(%J|Hj{k@hNd!(7Om4JNT;(m;lI7 zbQsBdNm_l?2irBT-C**XffC7R3#+1r%*+`n87=h0@X0^SdUcvs<^L$Dym=$aQKPc% z4&3QL$bKORDX~^@geD2WaHr?Ai|CuryeOVYB1$JfhbAtpH}NPE&|uaMZ#vKz?1(4U?SsAc4}M-UpTdQQal+ry%#y$ zgzG4TX8Af=xpu#GOoMX4J0~PL2KqNBU^vUiWJ8`%YVQ>ba`%b17UAQAhCIViwFm>; zvapJ8#@-xpI#mZ%LVaUXW_9xDRNu(nR|8EQ1HxHuNxXWXg%YpgxvLdAu?OD**z&=SyCoI^-wBLw54@>(?dkbOE1jPHmP-Cm5$Kw=&0{mW4u=wFJkqHNaAl8s?;9^ zuc1cN7(qG2jcbw^`;D> z?W}Q?N8Q^esKZR_P4FSp(AU__3h|$5yBrYvwgtLQ(Y5g2bEN zUNiy1Sq7Ieuk~vaPxUWFcAH@$KmcKn?TRcrEEJW!QXC33-~iVC5zO-_mdFhWeB{(GjZGJafyTKH)wQLV&&cQ(6%cF4MfJ^0F0#Mj$ce@bjMnZwepo7oT^g=Yv|!Z{Z`JFh*cotk#@FHkR=OZ%8u?knW75dqW2J zefQMNnAMWE;wF>6((3s)d_6dD%U{pjnLjOdpwfOP7{S`VK#x!lAIWoNjWF3-(#b*O zIODa<^Qg!`FnMx})MRHkkPtzmV4hiNZl%8Rxoo@J5cQ!WbUh^C9s$lUQG@MNJZ<+l z9-kPqIZu-{LL9Jb!8myO-{xJoxh^Pk6)o-YD7$@V?D=*?g($s;%oIBzFv;YZVx6I2 zx{ad*qI9+@HzC~+@FJx`+wj+q|F_wuZ@dyrd=lV`xf@pLKwN(TGxhFS=_m?vk~|tY zCPX+-;|~3`=gNpatDjE#%Mkb(M}csf!>G?Yy>c#G#2d1FA(yF|^*@DX~m#%+7k#TQ(#a74sz1Zk65D98hZ zH*U_<6(+NBbXPRv`~ch?ip+4QbP1-|660LTOojeOV&YKk379UGg{^b|g9Dkcw5-=f zG~8ySA^j&YM<05cta-VHq1`V%djphsHx*?iX5e=IH$LJ3kvb-G5^U5sEPCbMOtHoz zTnQI5RSX(I6oMd00>g`p&zgwVixWb$jL@iZxloa`O^Vyuogk?1Z8!qk_+z~=>m5HD z5~8Df^Uy%elbss zKXix1sqyEi@vEb6WLp&8_`Ad(Y3LkrbPDhMQW+=s$=w$=zl=NR4bcAMgq&p7?=`ft zn({l14*V^H;6tNE25Q^U3V}&otnuA=Ry3>M`|{Rf)|Tiq-^bV>I|Tb(ndYu@5gpG= z|7_$(6T4-ae}g8!R_#f{9h=E-M_8OX!7VaD@WOwEXu*4EU@_P4zC$W6+~6?%`ncy%P{ldmh$>s1Kb!Eq16NEL9Mf z1I&bbM!0g-8{8H7$WvgtQk#t6f)^A~L(gS#jAQ?(lX&$U?pfPt*u|?hf-A7)wLL)~ zl5t55f1kw2-YqY=T`rPdj4U)N;G}P{Hknn?Y1ck5}ONMRK!gI+W zF#{1DX^Xi#1iG64+SLi&cBzZJhTC=FNQBI9*6-z&X~t;$u;-Dd8*muqwjh#_MOQf5_1i2Ba=^cnt7B89qFzofB;P!ox_wEiDoRji6ov` z$Z+@eQmX6`ng{-f{N?Z&d#3rt_I+aqG%z1N%gS8TO+R55_&>1HU=LFsM*RX_a81X3-~BZ) z_eOlaA0MpLC+644wxwjgJvX3b!yCojNf2a3Mokni{-KqE@$sj*ov^V-` z?0bdTW9LWW9d@UA)nbW~lsI4LvES98^SdoO-?@F1smL4jF}P^u*UE$Z@5P>TtQ`Aa z+vcx~6s{f)Qz3{XNko;b*(+3Om@37YZZ=V|UiLG3|82X~ipE4h*clxmgQpuo2E)}Y zGARG|*IB}dDnF*v|6CV>yybvtnDW_6Ax6M|lrl`4hdQ(`VnV@K@t?P(5mkFR%>W+% zKHfHfnEyTnD3~Mq4S%i*pdpr?g7*pzg>b?=ZVxL2{wcsM0*tL49PiKKMT=NT4cWbm z0XqR|Zq(F@X;ryAVvx$BH{?s~fXgK|LD^&yaBx2D?v1(Uq{Y}~OY_8dN^uv8mWXgh zegSptzv=UJroO&|LHH??^7w5zxF9iLLu10==$v%ho`&AG3a=hnDEvQb6lW8ESrO1N zm^8oeCeio~?infZ7$!33{O>nnMgMkvExAeRpot8LZPw4dDN2T)dGNz9YcXsL+ss{b zGmWpM9ees(r7<7ShZIC6I%Q$2$q`Q2NWI4lle<0DhxlW-QbN$G9j@+jg&2_!f;~>Y z96R&h0wrq7*D7D}T=*mH2M7Ms?wd1(x%gF*y3O;=oqP4ckCukzkKA)t2LLhE>~ONI zbe!c&I|F)$RyaHJYc>wQu7IsYn|FLSFZ`bd$h!1qDf}B=HHSkgo|@&K<%92EK2f*u zw*bjuH|??$N`K{mekyI0;CC5@7G@Ie>UxE3e`qIl1$rP2Ou>O7YGH968n=b{uv%6i zSYMKVjQwnrc|gi}@Q|q-${fD=QzgK@0wq z^~;v?#)1+^9HbUmnGocvHC|`XBJw=cVbM)w-H?C&_uZ6Q8%gv@1g%@tOz*CwM%3U~ zjTc&Eyfflk87!_1`lQm)F7XNV*~+;>whvTrje-&{_|G#>IdrgECk`CW9euqr4r1LQ z{*UmqNgL2OBy0}4CfnqstH7Q0v%gZn%T3KQmJoX<;(C|!Rrm`Gi1vRLVMPTybbDz_ z_#VP7q~LNQ5rVz*+=;NMyoHYs-`QB=WNcg~4x~}S_E=-4ckthF8*9wMTJmfHV@R|B z|0_l&gwu|6-n8|{H)&>V*gPNJAlAI_KRNztbzg%O3BDpfI#;>$&raZw9m;JX-J?ozxPp=yUg*~*n zGBm0K(){wzfWxAL#eZ=Tq63CW?%jLXcLsFgY8&2kIc48$VLHmf2)X~bQ%PR%35-I6 zU~T`&g-zTskq0zHNfK|o7U7lb0x7FLf+)n=M!w_SXHXK(RL~WfBD=ElDVM$Z;L5bD zJjnSYg1?{uZd$yu%=E6~>weUg<2;p8o-$mIC?pY8d-1X#wdtw)N><0<6)_@9BPj z=_}_n5z2Le5bM@%YIfRP=@g?U1r366jg_}B{@N#? z5~I8Al zj{=Cd>fs9v!GoDnGXPs(g0C(f*lb_dLzP|g**BP$J36Q+lk4k3mM_=^Tq-RK_m1+d zYhexxl>h`5_{d#EE!|)D-_-}pkw_SmB0EsKEyB=++gWMEK(;@43h}E}q4Y z>2tzJAk%m3IPgrcQ)NrC=t^igKhy8=DSK2fg)IT-m8l3`Qn(CFuHhwN`Axm>Tb`1iqTSEG<F$b9w?MoG2g%Z+ba)WuJIyozKo9Rtlj~ zVl+DQPaX)O)IQRH`RU4ji$j`nvprQ6I=oy-`G6_aH)WHTzPpP9sxv}Y;D2-QEj3?# zuj_OkKY&Sa$#k9@K*}wKH&c)bk>SE?VMnJ+W!YoULGQSdGL{ZRZR$rSnG0azRQ4o2 z@7tKTIbPxfrHI3y+nCo8)JTb_ye(A>DZ<^J@%2W!s9E`@!dveJx~x$AbU1_!uTUgVjnGILmrH3wUA(A~!9oEc$YOaoh^*y5#nZF*7$FLOj7qTXoc# zKKSnbWjqs}OVg(HG#;3eb+IJuI(z0JHpwokUMuu^_CK{U1$gZ$=M0~3enGEbIRLoX z^lN>F>5b3QCX+Vkd$uO;AC(J8lVEAs_3tLjk*3Wrq;i@?^pO(d_0W0W+)J{^G(&rU zj(!v^f;F(Fd4KkvIet~dIC*|p@-kSJfKkx8GsZ|81hSi<{ODGRpOS?ixEo`WF&}R} zChS^%%WoQ{6_8sxb41mxbJ3pft2S6g@@%JGiliJQxd6lSx;n=|*McYQ1H2^H^2fyq z5g9(9JN)*tf(=A~hx-?LyKZ%_Lonp%)}A=ljs#n}wVk|p0q*E>ZsJisq`q{}vQN<1 zoh)LGOPmnReF8RtT&2>opd>|1nL)3A(I=n6`!_7FV|3@=1AC-t0Q~(mG3eN~i{v6k z3g!~#29PkKecL}@nUN-aO?g`6t;tBgI?H+IPxpaDAq;P%=d`+@3Sg^Oa%5gGEKLl+ z>|H)=;hf_P1~W8G;!+uyw^{-I+fC(G_k;#s!E_G3bE2?*W{a0G;MlQ7@o}Za0)*(G zllf#T>yCBpP+NhX<)mWH4jihmT}u^*k6Ddl1V*g^jkfz$EpL;-79@Zfw;TA`JD(a~ zQr*zcob_c@MXWY&Q{8+@7|h|SLyQcD#4r4eZXG^mz#j&d7-a%wQkLiQAbjZWY;BRC zMy`><96ucA1aLof7(}`{EzYE%v=OCQKH3|i^jNL00s-ly$Y7BAxCC3c00BE1o|5IA zn0+M}!M1u#WXn}e*H9&ZXB-#GRU-ukfw~kaI5!ZmPdrK*&b=(HPJ-BP+^@&oD&7(f zMm`oBKC5aWxdaSq!okqWKJy|K&HM;g8IkFG8*clj5Xqf2A;)5V2vZDr-YcHoxupv_ zrGxjXWfcr#x>pBh0CKUR9#P#IctrI;ryM1>55+g{IO{A^FuE;?XO6}9t z7JQ@uhne@Z!$+4baM7oeyQ!&bO1)CS+&o(U)I|qTh~^NX142d#j~RD{E`o~x`oQf% zY(d9*DnjO6mDBuUaU|bge2h*t-9cg!i#pLb`scwa<$*^WSc@OnW6;MRj}3lL_y3CY zO3V%beyjQ#&q@5-kpS%Iob)}*QFA~ycMU;|C>N5YSDrHZ?=1D2ZL5SkG+8RE#Vo~X ze-0& z4Ks8P0NPKIB5zqDsuK6b$@+g4c-1EYvNS#IU5e`Pd46A(3wxsbQtQLd+UEf}2P4$S zpWeBB7)Fj=Om2qrNr^;Jh2KRc?3r{m;%!F!LFVj|9FZ`EeNToVVmDUPJ2ur)j`v8r z@{Y^GcnkjhwSG3B&=dreb@S|uvHjn6wwPE~h@cs*8dt8)pS0SYJC<$|nq$NW(9lC}Sa2QoklK$o`Eg?7u zmav|kVYbB)VF%}2R{mZV(jY_LCX3%-DXNR6RwBAoI1m#lc@=-F8cflh{KwW1$+i4Lq|6j%f<-hih@sEjh!q)(NuEHakDo?vR$Yi%P%rW+bpk@S+G@@%g zJ6bH8Os>RhN(*@ZRNdn7!NE#ko<7Pq^_@R7A_I6BP})jw=?j6Uxpj%>ZZI&9z9)d! zzMr9dGaQkByxWJ9T?98QTm9ZOivS%k3xWB`pC5Msnh@hh>K3*FcLTTxGi?$i=jz$H zkf{V+9&mL^jvxS;2Jt;!CamQzx#g+5Maecyq=Hsm9Q$hKfdK7}aC+Sw}V{4tsj`$oJ|_U1=EB82t1ctPQ(HL9ovz%m#XI zmBVedCI%jtPTm_OeWmIT~zwe(Mx~{d*|z^ ztMB+a5Tx(a)j10nZDQ^Zz3fMN)#lHpm>^o9x1|}m)_Ngmmz@oJ=hx)Hqjk2n*)2oF zH#O)>vEnzzWHZVWjZ7GNKo#bo>Eahoh5D&S{4kcBb^ft2zcLxJ!`C?DucjYKtJKc) zpw<_)e*4tC%gFDq{KGjy(R)DVxeiDGaKuGwDbX9K55; zRVoM$K`N_nvx)Ir3Q@OsW!7kz&ao??g7ght^lHh6ARCv>0%F0g#cY+m*O66$n$0V) z^AgUtyYZSU4})o&+^J<+kEq~Q*e7vyQQg)`JBbvp-Vf%5Kx~Uh{;qpQ1U(0(9hciG z15^*&cJV$HwZSKW^xZ1Bf07<~x8dIZ)iOCDn+Uyka)}K`4JC+vNP!vu z)VUtQX}nLwg`MrJ^j@xl6pHy5Vh!nlO5T{$N>zz-=PlnF&1@U|-|!$yk>R;F+tJqu zWEvi>-VanEMhee-t3K~KF0;;nMn#-|(khgr$pDsW&r9;bsLbuhv=BXZ_Izu#>_|0_ zCK*!5KO(hm(?+Hb{4C~HxCS9m)@SgBnR#*d!zi2F8R`1A(+@R(1J$3NZ=A*)pi#kx zkZnk$xc$g*!Q{V!bEPz}D^+j@$(PJ9uIxPrK*yu60!`{&~*v zlF030t4ufuqA`mIYaDlX@^4x+o$DJSw1=^0PK#~%P^Ww!I37vXh$z-bhQJbpGkjF)@jvt|2!iqCp{wb#Tnt08kYWm>i0&hv$A$ z+&jgGjo0qTE=~}genyl<_NH*E5Vv;%h)>AA57uYwEy?@gSJ`l}QI{v=*wWM8ZC8}G z-A2A)P+4Tlu?gIGUYZ>FP8nCn6C)*g;H*v%+YZu1EcL+7F4vHVE`{-p8Q>_Tux{67 zDt+F`c!zQH!z~C;)yK!4C3W}MA-Vzbe`lN@D||A!0q(trJE^^)SH?w=53(qZ^ej6@ zI^e_eGIwxRn`U=;PjnxH!g#dGsJfgz6o4fp&tpFe^KI%AoY^4Ulm2F3Z*?@6(oK*}QC zECSBIdcdB})_83Fe=AhbYvtikG8JkyzSezfpJVh0lz5r;Pj`4+7eWW*;L{H!{9q4- znDDOeL44Ys&$worC9)ATUu*b9iMg{1aDK*~D?eg)^j|SYAq7wtaNJYph$K?}*#$)N zr!3Z%-1(9i2t?`)R8RlC5`H-_8cd?+iu^`KbvS8*I#6#8C&SRrknquP)%UA$cDbmt z7+nSbJJk%9y+(Wpx1`0W2~+qt{h+mSznRbC;CQDC!YbzQKGHPx$IH=C@uVFQZ6u7~ zLXt%5AJOE&t5lpD(b}?V5CWP*fIeK1Aw9gpMO0#UsC#v?Vud()3rC>z)tmaJ{IgE# zH76ZT@<*M?PS^H`xDdoKTzxz>aN1<`WZpUl{Kl(8eg09j-nP#f6z9gnWAEXya!~Nh zjI@A)b8>&K#hrr3lLCZCB|eQ$47XKGdqH$9S+q^Z;zx@o3-EsM&Xz`Pi*v5hsmmB< z$ZuN}!hwc^9^+|;B#EXhHp)+W_SOobI16mTE2*N>>|cS18fGVd$yF2shL#Ki>ti0b zK1*vy2Knawax+TR0$2o*V7=>o7hQF3?}=u?c;cSOZbusq&_>57^>Nt~4mti61E(6B zP~O_N_lp;P*G{qh@s(>6MPAVAcwclmTYp1eKnmNRHqzW29L-9m1&_CJOz$pw?BQM} zt5A7Inbz`1$-gYOyQ!#h>q{>eG4>i;x?k^DX8YDQMq&_A5iRlQI_bg>SfBt{pm-iv z#mfHH=ia@IhXRQYROLtGvPiJ~l!gJDM*Jr-HNgh*a(16|Q5g8(!By0ghiv#s#g?_> zaI_;fZ>iTzOf8Gx96(@GExUL%g}Sd!3<- z9|s@yR`uDW1z#iiiA11oGwJSRjd051gc>YW;Le`l-U!aC$q$rDH!o*#Y-Xdu3{yUc z{7UZLvW&gU(f|pO;WY6YW!ktx0&-QW5=bI>l5fFyyXHAM-V{(8v>R@gi>TVX;GFXbG)EwAu~rB)hk`CQQ$`9Y$g#V( z_D(JD0U_k;&KT>I)3sE`k%Qw{0%exX=UuLEshn~I(nOwjZyY*sPk8bP;(t_mO#H`i zqy+0^kFrR2Vezw;nE0kli8yk_k1pgG5PsvO zx<+`SQTwjn@l5y8Dg+!Q-%XoCuxu>CX1PjaR->1>C-w(?^VZ4r5mSnAukBlZ4BJBR zFy1dbEadq_heGo;2DHxg?2Jm2>4W#BTgTT3OfLy9YgIV=UFZJt9ztg0>sG&i29+}4 z%SCOsdagjc9gK>oLZ6{;Wj9No2qfjhCStN{ix(HZc^h$0B)=xX4&geT$74I4WmNS; znU!{oo*IzOzhOUO2g9&#;ua-xNZi%Vk*Pi-m_<^;J?fjcv|Or|blnhDkm_4D8)_;M zr~vGHyI1j1t@@lacoQYYk}!%G(#U2b_omK_#ATV;#E#38w;_gkclq|2qz;#_+;_v6 zm559gYzX~vP!1`vqG34Q;2Go4yG@2Fu#g}6Lw+IYQVcD^+@n)DiPouXOrTYgG}JcG zoaf&f*M_ju6jliT`0+#aZpxFkmgQU}o{b9c=ZBVbDVJRIi2JV%+duK{HpAC+)(-Vw z=%)(dpzGx{u+j1`;EcyGQLud7#ZA$(b zl$vUejFgoB@0I6Zv*^4O9MQD6;{Mkk-5ZbFc+DITs?T&j27xP#t`J!isPQ-E78mY! zv*A1jp_d80Q_+^6xXTRsXZ?{|RPAA-onytvY&-t9D9O;$t=}s^=;iF%+P5gHQNsAN zy5Y@%;v~Dbkc>uY&T&AA>coTv=8>D(v+w)WyB(y8%4AvA76M84pNltE!t)4{c)nC# zOj*5j$5fv~3TyxN)SKJ<8UZ_WU?Lww)h*hI0&a<}3701z%z3XKs7`+gH{^K#mf&rg zd;L;|>CAiFCqdcKot_4?!G9D_t>4$gCyklm+299Mgx8DY_l3`99~)qiFWVPro6L-*5E6g8x5f^Oo|(;-3d?clcN4L#UKyTMFrppW6A}WX1B8 z4pU!ud=6+SL0-1i!1sz3hbTGtyfKQM^6aclHa z+jdidMoFr4kqnbu0^@k0z`>_1(-XaX@!W+!Q(TxN<|YTqU%hRS?iX|b@7J6bE2r6R zs3zCrY>CiDSDF=jx z*4~_Woe7FZA0yQ*!pxQeTgImL6AzCuz+^Ym+WCc5`;2=ZIC#F)pnW*FpLl+A;CeFjyK*;Ms>Z6>9h_mfDTQUo~7y$8}3e#CrO`>5^! zZ})q@n|dbsV6oq@m_?59j@FRaA)E(>h8j0pYiDw|*k_;=7`#VFIH~Z@QB`6LY&gG@<*VOpGF@&jN*^f{%=|Sre$lIA3DRkiRJR}f z{r#K<1Vnsx=B4H`#lu|Rzr+j} zKNOf0*+J(4#J)>ug$Dyu7uvsI5LSc-R6bI1GnJ&-FhL>DtV z?rpiQGP)^VrsDK-LU&(byVUHNfI>c}Dr3-WKWCbg9fv+1CLqIW@)LhV7uail`!9}% z-t<6vTB35kjqUd}>LuMj5yolxw~>E>J{F{r%alO$Sx-=x9g|&C9K-j`=jr*cU1SoGstdp6$*ae_m+g4Zf9+y-xuOYL{ZTaCcN?8IfMKO|A?RUnue$ z?Rvu&ZsZj1P!eSIg*0Z6^mlK6a80SGe<;%7trhUJcK1(Q{#dYl5wA#i*M-WwSnC%Z zd;zz6(1g`Cxb@3E1aibGYEF~j)1(ddY0^lPX7-iqMkg(I z$la-tV7K}}YC5iPZtE?ThbOVXu6z{+k@HTm-jb1)`K?wl+CO)whQFDkXJ`mXYVp{l z+e$HtKldq1ta3S;(#&?TT>+5xrzx0DW=nKAv3C6RV@7}0F+CF8L2=ttjcCHnO zbDD7u8t3Yiz8k}{ndLn&M82CaRR+O+j$_#rch5u+B~3sAnSkt4J7iLBd-N2^np=Im z-Ee#d$u2d`iY#*}uaJDkr=bYHaaXSTo$#5q+}M!`NSB~ien(hX@M(r!5O6tK^#P)!glfT;G*tDU^i6NY6xP@riMU=19)U=7S#WvTWkRE17reFRs-@ygiiW;>go zn(9zrTuFJQhKAhMfB6d)dXQ3v3>U7VpZGidjYUEXBk%#=eq`>!PYjptkUPAbPG}JC zz|VhA`y9Qu{YeeHh?JZ=tVahEsM&{`mmpY#05MTX7CZhgvzEMt&LfFHZ6D$vw88uC zDD9Mjm@KU34NDXVoqaLYs6ztcK9c6{c)kQjqMYB-LBU=C*g;yjiYddFm@GQ*Sa^~5 zBwP_Vl&{&Orf(rVH~u>9a4HuMphEy9gQIrlN#ovwK5alCR#$$VEG+0+R`Qq(-D`>uU_`NGS*OBYPqNxu{G^g|Cu z_)8~^Df6T7VI%$z5?FnLL}^_Re$*~y*9Eh%e!MWrN{q;t)+#5kG&{6tk%O>KhKhm3Np|{SXf_rz_JQ>v_5f=X9*rhtrgaZNJZxyy8O{ z6r;;)Gr&tUdf#A$?T47Em$r)ca+Clb0F=eJWARUdhts!xjbIN8z^VOvG#2QWvIFGz zEgylu*VU+Q4fy&6NSVHRBBz>pbf@bYs+iI;jxHJs}?j^&EZth}RhLmjKbGrrheySF&$trX85%E~1K3EBcX) zCmct|YT9`p)QM4;yAB_2JTCBZBrnF-a>liJ<7_Fnnp7L5i}Anm&h7Xg`3OC{)*Its zed1D0Fi!ks)CvV=HD8}-$}c#7mvIAlN@9iQUX~Ns!xw^)GtQIJt&U~;OW$3sr&v?T zJis^?A&AZQl^Fi|XPg{Y&Ut}-@0lY1S*&nv&5O9zx(YxIGlN`xv!(vv_k>fW0=XXj zORr?Y8A82j%}6aC6I&d>!>0elH}vFS#zVjA!8xZ6;77j-?#J#s0-@Nu?EttkdqN^& zwxYkIM9`JYJv2I(g(SXw+lDXAHj5xDX$Y!-7X%53E=7Gh8mo#_j4o5V`pI6?=)>k}(%FOssrh+{wVhlaKI3X)@M>n% zQT+~By~~j`B3}3ME4KwAdf~(yxQM+sN`s0yA_>L@&SDpt)7N-5#yBq!?Y;r$G=f$ooprbK`VN*Ho z=dam}RSF-kVzNu>y9#g8o3Q#k&3m1kf+f)^M)YlzM8rn=$2B`Hj0eVyzhqZVhPB zf3g*P%2BsU<`ZJ81L0(#0Vwr(%VO;$*LPI+yktE9%w<0A$q@f?yuvh2%}Yd2U_g{$ z%yN(VM^mU-T~0yG9OYQ|z%+LPn@Zp?@EbAd54^WRG35_5K`V^F3;GeAe06HhA=87g z_*;b`l*Vr&pGB}D>&5XallDO?-{xBQ!C2+n`8*nfXahowY6uxAvRrSEj@x{jM(l;Ges5ffo!Byk1@`WG2 ztxUiQk>hZ>PnkA#&yRn2OtefD!%dQ`*;ZMWo);K=iM-+{=F!w2fSR%+D2(_Xqixp1 z7%7Yg)<*GkOiQZunt(Ns-A!Dt9=9Ai3rgJ3@~YJ|4Tj3noJHb%F|tx+oHu(sSyqjV z9uZGekhk;48-8WJC&}Ak?~b^F$vmzA`q|iGbpXCq%etqN-MP zs@@bh3r^WvEjQo&5=K>i!?5jz3Y*9nep7y_3$XI$hwN%|fx;hGj1yxw5>kD(beoSF z>2xWfUf@-huiUkvBpBdRoL1%Lp5AH{`Ws?vciJYR;_sIPBuzf|LkdPGQJ<20w1rCl zQx3pBb)H=QJyy(@FtL^D#-2?;_V0YT^Z*lgGu*%SK@B++WLaRKaYP?v%~4F&lYQ{& zrLYWWE92itnTPmSHG+|=Q}fOJOZV&QuM!Mcb^^MSUOR*{be0_`ZdQVl3O{ARp*=5z zQ7*WJL_th(uT7D8iFQW$f;B-RxPvm1<{x8J@yB(!vum90vMP|A#?Q-(AD#=6Pi?BP zqhZ@y|dle4`(rl1Z${P0b86v7EH_>+TnWFO-NV zQRk6{x0^klDcf;A;2A?Azci$!2v>^`{3&<5{bTZT>cy-pVAkWS6?}jmd&1!2a~SpoY$_m*wa4_oj$M}`#_T&~kHgRgCeK@ERYtPiS;@Q;#b*UkV;b~O zk3sUI-Ld9`R)yxF*Pxf+&ed5%>N$7ogLCWpgXDOm8}FrA#%{!tB%ag$=k|>nN<#I= zJt6@++dy|;Z9o>_5{U+%$RM-mSIBkqHKaM;=Vre$tZu)^0st;hO^0}6k;BMLJ2N$0 zxl`JbIHGg8;l^H;8!Of<%BpZ=?4c8L{vJUtA?YY2U>4(Sg3@6Rsu>7pAZyLjej6&{MfvIUOAU%c<(|M$p|h4wrys!`C3p?~%&G38?q(@VI4et+Zc! zMw<1{C5DRQF-j`uJp_X@tE4IM*nGbSY$QqTU`;H%^%AX;eAd#X%3^QN4#*ucn4F_DIaaa7KOGZ%m+V|$>Vo~d^nK9yNL`%!YY(d_=sLcR# zZ`jlsdfDT^*|Bz~z=>+-QlKBxA}z-608eix$sP}EdhA*p zCPm5eUHbIKQ~Z~w?4?8xTxgY&Sr){1l7bgNu5?i+90oyav- z!Ka$7VJ>tTUhO4;lwa8fFt)CANKhb$52ax3iPWSkJ8-GpM@eXL03cdIJJMs2j(@J4 zbQ&YEhzzCXIORCn#Ieo50N=;q&j|O$d@qc=r_9$$j&nXoZ z0jve#L6IoF(N!+G-39%)b#_AwS&;;ThP5^NGdT2G{!#y~Ud_Gi$6=K0yVg588bIM| z;QZ=U8)8YT=GEt9-}sE3$>-H0u5(jFeqSg|^dVvE&_W~Pe78rRf7|I|&7iV6oc#Wm z&$3f%g_JiSNaY~(HGDDbr_XGB-jKtx=-g;emzLaC*W2ZQxeTLfe@yAXwEJ*^{#5AI?Aq^1E}r3y($&nd8PdC0^{1A4!Ku0-rAQN zWZR9#l3nLQYPN-Gdvn3_#uPVrqk1NbQaI%Vw@hR77Y|2z9uTtF8zdOovFC2u(6QQ4$z;s z1akOut{*O>(VXUHh~7F$02xJWiRs5z?E;HY3FqH8XyWUJj8FLIe9XdGBaj`4kew7d z%W~hp6bzHcPrB1pHxoaD(&5%t?eOT1d99f-`LYw`;oq@?&~>*r8Bu$ADvm+fcpmTU z?Nd63d*{88jTS=3m}2|FKSEn4Cco2&u;n+eC2{XKBg9%SZ>gsNX(a_g-FaLT- zb?#?1zpn_OMu-QLD)NviMZ=@*XZGU&NOUfR6#-??@p>`(*GCg$^FMeH$`_MLzSGH% zOp*^NQ>o8}+*`IMKR>jnLJn8;x7ClU+o?SXpOf(;I#gaJKcjrE^|mS~HC663UCMYh z{(|d+L3>4VO!;HW0y4W}mugt?X2zd^I9G04TdvkqqQK@2PBSjPKQk=njEa0$2%DWN zFK+F8wb?9fnPiH|x+6y#t)wexCZ{V;SPZUnk!tdXoHob3{( z>sRiYW);xvm;4L*9R|PsxQdKgmtGf231U7=!G9xNf)Wz~7O`i^HAVqOOtyM`3_Uq{ zE5Q?1c~6G18)Q+2==t`J$Y-WHItKU;H~d=u?vgt}Cv3igBDG&p*NfAyop?_9W@`b7 z&CZMf8kq!WyYH9zx>BTZuu$)M!9(qfN(Mty>ujE)d zeCMxlw)3ow+J~ROACPlV5K>>bXUzEe<>03?H?5OVjG&0@l@SKSfhYzl?AFP}T+5*T z+!LfM&`Xnt;B_sp$crmC3)-)ofBs}}eo?XY1vFv^T5siIgti1M23^g$|Aau0WhlhV zvw>$h`o)6)rKSO(e%g6Q7k$8-#ZlKKu>nG5;OyRH9lW#CNlgYd)S&)Pg)NSIxtar6 zy`jpj59AH%73+xY-^I75d!OPMFGj>mSQ;%ZMda$4IYng4ddq6&N5H^5T|HOIRXg4m z&+v6~+{p$z+XL-!wt2vGde!L(M{ZPU>x5j4(YfzGY@Lvm{sJ9l?@e@zZwwdx zBqv!2O~UvtE|XW<=?;+mvR=fuPfNF2bZB>ifG!ih&Y5zVm|q{kt!5S$D#CCZe}(mSm#F zc`pHSF#59&ZU{fUS8`*R;<@|pxNqfNd{%Sl zO1aht_S0#LXkeCGz#dw(P?-ta4I=Tx_V6VcG8XN9$9CBayqbJ6S+KvNzhZzaV85sO z=jFd!Xj#yArKP`1HjO4U6}P%av9-ARM-^{BWwA^4!O6EO-<&AcNdDK-sdO{+x*G4@ z|Ngz~u0VuU1xkKY&l$dBWxISxg`R2=qVEHanWY(`H2=^WSsHoJb$ zQcs$0g3leB7fV-drV96P_-OwHNgw!&$L{1RV7EByHb^P-eL4WMn_)T-2d<1Z6(p^g z3?^CeT&*js6cc?=yW3es_$VM2<^ENGHuu#W5tb_tD)H0jzA5W++j7x%GX&5f+ zP7;tRkL!_RQ+a`-4Ah^6vpJ?sB+f9aob%B4>4THn(;qlpsxGKHdm;@zmwzWc{Bv+y zQ2un6kEE!Hw!nPlP0YCseeKw6yzzCoBlhzz`H|WM zOYkZu*y3yR^6c=b{S{5e#m6%=R3Il5EIn)*SCN=ahdIquhoEE&1pd?s?|J9p4~mBI z2TgsTJVMB2aJV3 zG*taJ+Xpn4u>aI#adU{U|;v?W7vl2F}*!asQjzoq>h6jo3@j zF#cY6ee_b@+Tiydxfr{;o+tlCfR<3X&DGr90Uj;?z2KMb-Jla=mqMBC%(w5O=Zakf zW`j98%|jiKbg59y3gbm|#17hPa)A4kj)U%npU1Z9_4C&$z!sapw>15xYELR^+crUd z>i9kQR;+E8l@-yN!Ki2tN#6(b=ImJezg;aouL_#2kGXK(tV^F)7%vq8JJc!Zl$fYdQ~lQqLDh#yjnp2Z2P*0q&+Y8V=5E zS#BP0d#k0#uk3=uz0ilC`mW{XoD*!Fk)m%NBh;G;xQ5ktxV>6nU~jy)dt2S}ckC}X z59m1ctJ6GFlw9RejPhay?*Au6z<`F}=qFKKDYse1F#%Pw{#8J}>FYF$U0g|@56a@T z4mHi7-`0SWQ{+%RHgyZCEfuEwBp9Dr{-4m-1SWchy|E+jLcx?%)25QUP;V>vcn`;9 zhPb@+13&4Uu$dUR=_fD}vFEq`RuLsEzx$>PU2%^J+LcmfTh)p44DreqwO}9Z&mVl^ z9D;kM4+VB*O_(3H^baa;I*X+GKBs|xW5MSOWf%tt*fv0|G`X z*u#FjzR;X5l?QGU14rvAgfu|{sTy-Yt%azgc5``(0Em%2D^Cd5^S4I}08c}*+SzdypP9fw&MbGE{RCtT!$KUr9_8j%A@;y6 z*Z3CUK5U==669n_Wf*sR)7V`4ybAY+1i0=S9O6BjDX++i+tCXvZC2%yUMRTf7jQi( zMLEz2bVY*GWPBOAgqsS=7kMWw`9TnsXZ5xWCc7{2oK7 z^WXmpzJ3nAu3b_#q+YX^qweXmq{}mx3wVt6aTGInvM(ExSN!_W3w%C6tf}~yjlHAr zy7}ug(N$NG zaK7!53+SG7FPuiGbFoLyfirq&d^8kByrBZR5*PKdB~CVu*Ulrdmes+b=evLj%j8f@ zb^HC`DWwcUc9t4RnAev};5}_*g0ths#@n0#!hmzv;Ked~uD7-j{?VAQA2*lP&j}AV z#Iwyk(W8Kl%_=1bE-y&+7OUcRbl~%>;Mg_pb&hvAqkpLtNP5{Xv`6F^0CFB1&_{S1 z?+~UxPLP>IA%=JxYO=$r{WwoLW`#xqmWFE$kmebN&0m~iJXPaZ$O3!<{!poW$g zI7{93(r*7s->I`9fP&`@_IHAicZ1$#Zvm7h41^H2UqDtCNE-{$|&4)9&n6VbH?L zRvxES5sr((bnD(D9X`MFw{>4zu0x0qZ^dYEEFf8T=f5mq`p{uc7a7f z&&EMrIrQ}^3-K;21lE~^9U1y-dyQm{vM^O5H)=Y=~cnF_yjxUpY*D0HC5 z-&k~wXATNv=IxZt*5o)G1$MGw5jZ9Obau2D5>-i=uM6A;oP|h3GCeIM3AC3SHL$0o zA*1nA2Nv+lCA(rn4-3%r>E!%Q+9ytv<8*$tf4ZZNJ`!-K9M!tH_Yrf?2KjdexS+f0 z7I9Wa+Wl~#Xx)?5f{qU0WcU==x}?HG-}zDwu(YkOS9}Ty;s{^M1AUiMCr`^&I#oCT z*Jd9rdy?Msq#pjJyXiZwM+pjyXz}T3ZL+#rlgDx9=_b)*R4y8zT?gknlmyFO5C+FW!;S6 z*YYkwNyga&$wx({+4aZua;t}G{LmKaVlb2MxPwS)oulRJ)>oO5Zu;;KxnQu)v{6F6 zuNSfV!WYw|g=R$n%gAWY*BRK^9=}W`#O?0n(>fa!4n8Rg$EDA4ov;dvusHEtcaQCU@CggpxOjc=*dB)63uV)zP;zT?~FW zVTwPD2#r3#!q$aH7J#*8R%T1)0rt6n|1`(Jc1@|EdM*mq4kQhJ3mUyXUeMBX5_g~D zP|Lnj6xfO_Cmv)Rqv>b*Izke)cO zkWxWMl|0{Lr|}#P-jR1AIm0bQw1BlOryPd0F#h((zh>4vSKb3@z*8%;8(3?v>z6O7Mjr+oR!6O+0srA5TA_p-9Wsp+x0jFubY$luT}3`B=(R8l@sk#?xFr)t{u#0l6!r&M zq=3244_oXHju6&VJe>gM{V5GJYbx6Gm;?BaPmAQqYM!rGt$?U_!kzU00A1Im7?L%Z z^z6wviVXx1((%_^d+kcf>UvQ<94suOepnzWW^mICFmFh$3?Az*nsi9$&xw8yIH_Ag znQeE@OljQN%ea(>>WT2TA|3Ynd$9d~--BMoBfSsTKKRQTC7dx+^K2^KuEWECinPN8 zDa6{P)Ru}ptg7`{3m>1|XYCyN9NNf21udzVXCO`>i0Svm8v)PM6=$9+koJ$FPr|bb zo`7S0tpIi$v0#+l*1u?qwp|ejYGe+N&r>#3Z3+~FUT>7$=9KEu5IpPeAey!MrPy+^ zL9eG-h?eYar5k^vkJjWVIcw-*1cDT=(^~_^p*Cb}Utf=nCe)U^?qpDj`|A>vZ1_(4 zIS;9}rr%g0z{h{WwvsDzJmM`+?xhCmn8_(4nVlYHo=pYq`JCw2V26a`|3Cok=J|#3 z*?9R0iQ~+nO42`e#u~5`^Y`|tsg^OUbii?UqtS3X@azm0@aC7ETZG67h16C~n#%6K zi7YVu5UV~E{{*gb3-a-j9qJyrKR0UQqOW+A>)<)N zh+{Fh4An0UZJwnJ@Af=puUr2Dx_ehFKy}bb{5L&5m8M?qk@`KSs2KSs3aGwRwYp%e zQj`6p|5sX@KMx!9bw&L~!?D4)D?) zMVi|!hk=mwlSK@7AdTO_Gn=W{x9J@0Q*H~^D)TOC>I?Vup;1AXAVkv}@q!tT{jH^} zd4MLojmY|OUpptr&;kR;K>yvlTpFlqE{`DdSq$HlZjol175AcoDHg;I}xY_)Bi z`9_{u=9scl&oEqVxDw@+OJ^Z1w-u5efoU6fMiE#)pPuve>d=)+3xfM5U0sb1Yu$aA zu=eFZ&%)p3%pCIQT_P*&++7$fr08NHyhxLQso}M3sw$o2XK)+3(_@X=g1hV~7GAF3OJJLCF-vkg=XlZsTC2|rlQwlNx#C_~@#;Q$#;12(x z%y!+##rE@v{p7R4YwQti=B9<$oP%+>V$g05=X&h2VzT^EELMNs-h>vitb5QzK~EpK z7q_$6UmOG?%FpAy;l`O_(l_WI{aBWUjyoGu7TRaGEGv?7LIaGXl;x-FHXy|6a2)m&Y2TL&Wg!)Dau1{gu>ixU)vpS zNJ9py>A<)qylp?hcviXY2B-bz1M6}G>oNjpmgb_-Nw^@YCl0*R?q+ML3ctn&pd+{Z zp^t}KKjzN}310D9Z4}^=;fwLBo^d7nqy*KF{1Y`#8~-fRTi?V(Y%Y$Xp~}-D&viu+ z^a&nrlubwrP(fdxu@D27@_|R+oxZbIHt_Fa5u>^L6B^6W|=bFLb3*kEq32Ed=k z9>5d<4`Ed8nrp*OzoRhI`a+H>sjQ}G#7NF8Cu~DF*6~qYA+#mO1eiVBV1?&AwPo^q zQD#bJ1sBxtWbWN9pT{7Q^~;v=w7`35(p+;%<&AY1c{kt++(eZoojlR);!F^PX*e!7 z1^Etd+vq`-%3@6vOYovnpNux+15e{pJBJ_>PCk8+unSi4mqKv08i(m(tHsZ@uz=&~ z0KDd8189)EToEsuvbYvxH0}pf{@L$_t?qd4Op=e!M@p#YE?)?FIzRPzsyRVbVboE? zPYhW8U$=;X?uCF3D;10Twsni`-}|QCM`fIL;JyG_>yqSxlT5;by@#_o>OuTt8UqM% z4G_Y2U{z27zi)8INupa$LA;(PO0AG4dk&JUi8YwpZCxon!8!s^v)6!dZpFprroKV0`KYlo<)4xle3R!%J%%r9n?KGf!g}Dap%h ze_YeaVF-MW^i;ed?!UBgR#Ol4j;QAvrYUkf$#G;5; z{mYihs}=F!siU79r(O9YJ{c3{q-TJ%1Aw%SyDQOLL7dEH2K*z=lfK(|GdB^nj~N4L zmOKFB*mJXc18o6_uLOi}VCF9*l)oIRt&@ArsqRo^!Lf*7-`c;g_#uNu{|`$_-jINl z*c_~kqVG%)Kp`^jLeM+uPZs7MS}JL2Vk@S;dTu%v7R?_KLjMe6YdDVaN_^-os42hI zLn?FsGE#8BMFIJ&OdDSMl&bgM`@^7viADV!k>fBJ=TkGn7wBnQJ&{NjDP{1klUg5+ z=l~aIt#{mj&bT(q{eZvNmz!Ekm@xll``SZG3K7K`OYz$U(eypP{E+VD{rdd`!0YLM z0&vOvA;s=8wO)=+q2BvSx^UCovk9-6!_G)9tS(bvee}NU9{QzLz#f*5v}-eNvaP2F z1#Wtz3Q+VOi0Eaimz&<2RFJDbP_0<^Ykm}hi_f{#%bX=U6^j)^pAHw<)h!x<+;V;E zS;6IoYTxht`d0vg8V8JRJ!Z9#b(&tqwT>ThglUEJH}-|3CQI-2wF{!WxhbH-X@6C= zuNX#OgZa8xXY@v|rOq)Z^1}1@`C`oAP7G9*LuLJCJ6OCtDaWEfWIeT`#Opq2Ju3wW zfxQ3Wrq#Zmn`3O8fcUW(TRp;k}iJ=*7F5gSC# zZG_HL#VQl|h=lgjxC1C;_P9M+Qv;ypPmO&f# z=bf6PELElWctnD6^eD*uOUA<(NiTWVEr!%^nD<{7uBWLH0&;>gTlnkRT6PYRVPW&} zFof_29Drkb9zU}|&=WjK4ew`+7>)SBaZuagC?#uC&=x0Tny`(dqYS-q`pB5NeF}d= z`^Y7MY`OqquH5R+c7UsH9KPcJ`U`||&Urlw$1aY_kCK2j3XRGTXF=#(1q@Z$5v>-t zQ+vQ_M~7X+c9(jYfr5AUP7U6#%ced@tC-KSU)RaJ@l?VgT({^rO#b+?WaLdA9&hNm zBfFJjJ+574vfVnGGeBG^X9-10s}Uc)OQA2P}WuDLZV4gf#)Z}@3?&YVRxkblyH zE8wL)U;~`|n9Yxmsrf0>D**=d2o@Czvr=NR4C;)AC8!S!8@|-U9F5ehA6Z8M_!;1N zf!S;I$E__on|mN5qrreC+3Z{HA?nwi^QUHNU?{J1a6) zb09%kw-PrK!ZoeUI$8Y?w*Tdl6Vz$%vfzY2IB)zhcA_w7lwkZHXq5E4MTF`HsBLm| zraX^kd>o@~F+=ZG9*r{qsh-489Gl)z5sloB(-M3-O@0KFdl%SDzK}9ogS&Q4n0eV5 zkW<6V3_mlFImo0h9&zOB^=9@_dAjG<`M$(?jv%>67mz>p@f8Srd#Oh3Z$NV1!1PU2 zZ1^>sbPl~q&81x*O<{C3gX_gm43GMcn<+$j-cB_&mpir-*ki~B0`i*!P>%VNY*~RU z<`)wCfRC7}m)lt57&K87hNG^lzm%6B?G+!fD;t_>Kde=cwcQ@QAownaR0y=x*02R` zNMHZ9zp3xQIb*>-iSfPRih^I`r%YCzDl?RyL`iJg3vHeZcKqn;L4Y+9z}1aQqYZ6^ zpCeI*1j*pT;FX3r0q`;NoAk_ll*!8Wa5iL(0KkQWvCh6PHU{YF7)!v{05;O#D=uEK z6!L2uIitfmc}@ax{3?uCqyhqkh8;bSVaTY9HF+eU>+O+P%3)?CHyQWG{}5S<{C|EG zhf-Z>6~+X>Yn?5B|06&2y#U0=ZCE7%h zUlO^E^d5dEavH2gJpSA#KVs-$eUbDkYHxVh#>uD2y2QrsA&5S?}2<%OX zgJr^RNG(%xUsyY9A}#adlJP^EBb7|9>6=eveq&X{)AUZKiSx>Kp=k$iTTXvj zKVy9R0D_W&?$KJx6L1!;w)Q@s%XkpPumzG5YNC*}u!r;fF~E5!1Ki7FRqxOVcFCQ7 z7DQ>N+(-q7UZFRiaB+SEX16bWy%(b%OKLmJWL~1j*6Fa1iDAC)iG*?a>fUh8k^OrA zW;EB)A@OJ9%+p7mR3>4%2a85U+|&T)vBgo22C6T->6rbkf`$>R-+V_gJCC_eg_dL- zZgUYcR~nA$zp3xr#&)#cj;3SRoS;6ajO)R7l!o(s0x3E0>)#!*-$_6=fF*N$cjdrT zy#B0u%BX558o^k!qW~?b(c9l>$L95{lYzI7uH}v0_m1OP^&Q{e)`g&RDk)1SNi9<> zaqx=wHD-jDC){;;Ym~bY@P(gsB$rDtPUx@i?os!jtvUYWIM#MNTKmNtrCPII6)cFd z?T4VPr$EY*o@dJH0HEgJCc;!f0n^^Q6(?Yh=NU>3;VKNpCH}qjA>0l@I*)zZ=nb4; z&3##<>VSEvFg1XlLhsQA;&5LK-bcB4dYDAU?BAz1Y4P@-Q3JeO)=Bi6jC<7VUp?CF!>%pypUNIru9BX9uPLOt+79Vh9a;r}>rWQ;=-Nps-0`(w0 zEu9>>7Qg}?5SNfOh0jkPE$E!n&86NwPnxXod)+VVnCQ$P zXg1j`{c?lMvjWuEZA39Q?21VBqKgY(zmGmp`rJ|9Oxcfn{dGF9Qk^$l@Unh}@~}9P zoLQTn2FIg6z|*ckcf9V_cSz6VwR!-nP|eg4A`nj;(Kjp1qPb%gLvTn&D;h?6(a0xz z+1YdvJx@+6#bJw~Z3ZRIovoUnr<&;<2wGG!#q@-tY*gVfj^*3^8}f?&8U}f%#$4?h z&lo3|h;kppVMh(ijj4x~T4yq$H~>iRMgry;{udmc#4}+!y%5|lwX~Zim9g&%G0iHE z;wlMMydhFN^isS{J6{>;Q`^uBig>yG_Br2a4bC(G;ADvdMN0|jcDDUauP`PGmeUfg zWhq4GLU6E|P?0j%?T9Amay9qB>1*_fB4>jhX>DX_zIX*$-}$&!bc&x8tRMG#(YGfD z+{$;n`IiAmeC})T^Tor`*yuBHYWr)xw!R)q2WB(sXqN~afr(z3AI0BEzuld%U3E}N z=kKm?lR4CfG?!)c%%*NalR^Y9%asm0h>kqAV_nAe@34_0VI;z>$XW%M zmnxhOp{_+B2domF%lpb5T^@j?;nH|XIBEK4lTq*^fK#^C7fbKVmOKUT%fOu#piw{zsQ`yFhetB`?hpRHNl%ZYZ(o1b z)XFI&-)k&~@$%Z=z?*`}ZaO~zHzdb)s4bTWOxM?EC>r=yYt1!6NYkfrSW4a&#YpyWC2}r7`Ud z`*lT_XHi*-EGm;M2vY|73C^ej`sUdIY>{3r7b1tWt9&1SevnPxsXX^BkeB}=f9Be^ zW^$gqU?e3Faw_tq3>L>3{Fc{B7Ir6us~&Sa+p4(A1FC@V?0_YbttY(dJZwBB^lM7w z_Tfz`h-aqP5hdBU(ImL(4SH+(Voq4@%vq}Vodx54!!hfnXnKCx(akaxExfl>}8b6=<@99<6_3jaPfg8sPSpPS8ByapADk0M4 zq$ng!uPiB@o?hjKih)q}5Xo}YgskqqY$NNvvJr;?)j0p}-~UAvpr=k^lez literal 0 HcmV?d00001 diff --git a/README.md b/README.md index 87c3199b1..54fecb59d 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +pygama logo + # pygama [![PyPI](https://img.shields.io/pypi/v/pygama?logo=pypi)](https://pypi.org/project/pygama/) diff --git a/docs/source/conf.py b/docs/source/conf.py index d267228d3..5accbd4a7 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -39,6 +39,7 @@ "source_directory": "docs/source", } html_title = f"{project} {version}" +html_logo = "../../.github/logo.png" # sphinx-napoleon # enforce consistent usage of NumPy-style docstrings From fd04ff81bc752448d38e1d1061cebc78cbaee282 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Mon, 15 Jan 2024 11:41:48 +0100 Subject: [PATCH 084/191] Another round of updates to latest legend-pydataobj --- src/pygama/evt/build_tcm.py | 7 ++-- src/pygama/flow/data_loader.py | 14 +++---- src/pygama/hit/build_hit.py | 4 +- tests/evt/test_build_tcm.py | 8 ++-- tests/flow/test_data_loader.py | 4 +- tests/hit/test_build_hit.py | 72 ++++++++++++++++------------------ 6 files changed, 53 insertions(+), 56 deletions(-) diff --git a/src/pygama/evt/build_tcm.py b/src/pygama/evt/build_tcm.py index e821ee50f..05c7638c4 100644 --- a/src/pygama/evt/build_tcm.py +++ b/src/pygama/evt/build_tcm.py @@ -2,7 +2,8 @@ import re -import lgdo as lgdo +import lgdo +from lgdo import lh5 from . import tcm as ptcm @@ -57,7 +58,7 @@ def build_tcm( """ # hash_func: later can add list or dict or a function(str) --> int. - store = lgdo.LH5Store() + store = lh5.LH5Store() coin_data = [] array_ids = [] all_tables = [] @@ -65,7 +66,7 @@ def build_tcm( if isinstance(patterns, str): patterns = [patterns] for pattern in patterns: - tables = lgdo.ls(filename, lh5_group=pattern) + tables = lh5.ls(filename, lh5_group=pattern) for table in tables: all_tables.append(table) array_id = len(array_ids) diff --git a/src/pygama/flow/data_loader.py b/src/pygama/flow/data_loader.py index a171ddfd3..7e5c38616 100644 --- a/src/pygama/flow/data_loader.py +++ b/src/pygama/flow/data_loader.py @@ -588,13 +588,13 @@ def build_entry_list( except KeyError: log.warning(f"Cannot find table {tcm_table_name} in file {tcm_path}") continue - # Have to do some hacky stuff until I get a get_dataframe() method + # Have to do some hacky stuff until I get a view_as("pd") method tcm_lgdo[self.tcms[tcm_level]["tcm_cols"]["child_idx"]] = Array( nda=explode_cl(tcm_lgdo["cumulative_length"].nda) ) tcm_lgdo.pop("cumulative_length") tcm_tb = Table(col_dict=tcm_lgdo) - f_entries = tcm_tb.get_dataframe() + f_entries = tcm_tb.view_as("pd") renaming = { self.tcms[tcm_level]["tcm_cols"]["child_idx"]: f"{child}_idx", self.tcms[tcm_level]["tcm_cols"]["parent_tb"]: f"{parent}_table", @@ -666,7 +666,7 @@ def build_entry_list( tb_table.join(tier_table) if tb_table is None: continue - tb_df = tb_table.get_dataframe() + tb_df = tb_table.view_as("pd") tb_df.query(cut, inplace=True) idx_match = f_entries.query(f"{level}_idx in {list(tb_df.index)}") if level == parent: @@ -878,7 +878,7 @@ def build_hit_entries( continue # convert to DataFrame and apply cuts - tb_df = tb_table.get_dataframe() + tb_df = tb_table.view_as("pd") tb_df.query(cut, inplace=True) tb_df[f"{low_level}_table"] = tb tb_df[f"{low_level}_idx"] = tb_df.index @@ -1144,7 +1144,7 @@ def explode_evt_cols(el: pd.DataFrame, tier_table: Table): if self.output_format == "lgdo.Table": return f_table elif self.output_format == "pd.DataFrame": - return f_table.get_dataframe() + return f_table.view_as("pd") else: raise ValueError( f"'{self.output_format}' output format not supported" @@ -1255,7 +1255,7 @@ def explode_evt_cols(el: pd.DataFrame, tier_table: Table): return load_out elif self.output_format == "pd.DataFrame": for file in load_out.keys(): - load_out[file] = load_out[file].get_dataframe() + load_out[file] = load_out[file].view_as("pd") return load_out else: raise ValueError( @@ -1336,7 +1336,7 @@ def load_evts( return load_out elif self.output_format == "pd.DataFrame": for file in load_out.keys(): - load_out[file] = load_out[file].get_dataframe() + load_out[file] = load_out[file].view_as("pd") return load_out else: raise ValueError( diff --git a/src/pygama/hit/build_hit.py b/src/pygama/hit/build_hit.py index 9e4c3b027..2b3e8ef5f 100644 --- a/src/pygama/hit/build_hit.py +++ b/src/pygama/hit/build_hit.py @@ -11,7 +11,7 @@ import lgdo import numpy as np -from lgdo import LH5Iterator, LH5Store, ls +from lgdo.lh5 import LH5Iterator, LH5Store, ls log = logging.getLogger(__name__) @@ -170,7 +170,7 @@ def build_hit( else: flag_dtype = np.uint64 - df_flags = outtbl_obj.get_dataframe(flags_list) + df_flags = outtbl_obj.view_as("pd", cols=flags_list) flag_values = df_flags.values.astype(flag_dtype) multiplier = 2 ** np.arange(n_flags, dtype=flag_values.dtype) diff --git a/tests/evt/test_build_tcm.py b/tests/evt/test_build_tcm.py index 49296e9fe..c0ba352e0 100644 --- a/tests/evt/test_build_tcm.py +++ b/tests/evt/test_build_tcm.py @@ -2,7 +2,7 @@ import lgdo import numpy as np -from lgdo import LH5Store +from lgdo import lh5 from pygama import evt @@ -11,8 +11,8 @@ def test_generate_tcm_cols(lgnd_test_data): f_raw = lgnd_test_data.get_path( "lh5/prod-ref-l200/generated/tier/raw/cal/p03/r001/l200-p03-r001-cal-20230318T012144Z-tier_raw.lh5" ) - tables = lgdo.ls(f_raw) - store = LH5Store() + tables = lh5.ls(f_raw) + store = lh5.LH5Store() coin_data = [] for tbl in tables: ts, _ = store.read(f"{tbl}/raw/timestamp", f_raw) @@ -66,7 +66,7 @@ def test_build_tcm(lgnd_test_data, tmptestdir): wo_mode="of", ) assert os.path.exists(out_file) - store = LH5Store() + store = lh5.LH5Store() obj, n_rows = store.read("hardware_tcm", out_file) assert isinstance(obj, lgdo.Struct) assert list(obj.keys()) == ["cumulative_length", "array_id", "array_idx"] diff --git a/tests/flow/test_data_loader.py b/tests/flow/test_data_loader.py index 3c3b857f9..9c7763c16 100644 --- a/tests/flow/test_data_loader.py +++ b/tests/flow/test_data_loader.py @@ -153,13 +153,13 @@ def test_setter_overwrite(test_dl): test_dl.set_cuts({"hit": "trapEmax > 5000"}) test_dl.set_output(columns=["trapEmax"]) - data = test_dl.load().get_dataframe() + data = test_dl.load().view_as("pd") test_dl.set_files("timestamp == '20230318T012144Z'") test_dl.set_datastreams([1084803, 1121600], "ch") test_dl.set_cuts({"hit": "trapEmax > 0"}) - data2 = test_dl.load().get_dataframe() + data2 = test_dl.load().view_as("pd") assert 1084804 not in data2["hit_table"] assert len(pd.unique(data2["file"])) == 1 diff --git a/tests/hit/test_build_hit.py b/tests/hit/test_build_hit.py index 5387b289f..a0d8542c3 100644 --- a/tests/hit/test_build_hit.py +++ b/tests/hit/test_build_hit.py @@ -1,10 +1,9 @@ import os from pathlib import Path -import lgdo.lh5 as store import numpy as np import pytest -from lgdo import LH5Store, ls +from lgdo import lh5 from pygama.hit import build_hit from pygama.hit.build_hit import _reorder_table_operations @@ -40,9 +39,9 @@ def test_basics(dsp_test_file, tmptestdir): ) assert os.path.exists(outfile) - assert ls(outfile, "/geds/") == ["geds/hit"] + assert lh5.ls(outfile, "/geds/") == ["geds/hit"] - store = LH5Store() + store = lh5.LH5Store() tbl, _ = store.read("geds/hit", outfile) assert tbl.calE.attrs == {"datatype": "array<1>{real}", "units": "keV"} @@ -79,7 +78,7 @@ def test_lh5_table_configs(dsp_test_file, tmptestdir): ) assert os.path.exists(outfile) - assert ls(outfile, "/geds/") == ["geds/hit"] + assert lh5.ls(outfile, "/geds/") == ["geds/hit"] lh5_tables_config = { "/geds/dsp": { @@ -102,7 +101,7 @@ def test_lh5_table_configs(dsp_test_file, tmptestdir): ) assert os.path.exists(outfile) - assert ls(outfile, "/geds/") == ["geds/hit"] + assert lh5.ls(outfile, "/geds/") == ["geds/hit"] def test_outputs_specification(dsp_test_file, tmptestdir): @@ -115,7 +114,7 @@ def test_outputs_specification(dsp_test_file, tmptestdir): wo_mode="overwrite", ) - store = LH5Store() + store = lh5.LH5Store() obj, _ = store.read("/geds/hit", outfile) assert sorted(obj.keys()) == ["A_max", "AoE", "calE"] @@ -130,7 +129,7 @@ def test_aggregation_outputs(dsp_test_file, tmptestdir): wo_mode="overwrite", ) - sto = LH5Store() + sto = lh5.LH5Store() obj, _ = sto.read("/geds/hit", outfile) assert list(obj.keys()) == [ "is_valid_rt", @@ -140,11 +139,7 @@ def test_aggregation_outputs(dsp_test_file, tmptestdir): "aggr2", ] - df = store.load_dfs( - outfile, - ["is_valid_rt", "is_valid_t0", "is_valid_tmax", "aggr1", "aggr2"], - "geds/hit/", - ) + df = sto.read("geds/hit", outfile)[0].view_as("pd") # aggr1 consists of 3 bits --> max number can be 7, aggr2 consists of 2 bits so max number can be 3 assert not (df["aggr1"] > 7).any() @@ -175,9 +170,9 @@ def test_build_hit_spms_basic(dsp_test_file_spm, tmptestdir): hit_config=f"{config_dir}/spms-hit-config.json", wo_mode="overwrite_file", ) - assert ls(out_file) == ["ch0", "ch1", "ch2"] - assert ls(out_file, "ch0/") == ["ch0/hit"] - assert ls(out_file, "ch0/hit/") == [ + assert lh5.ls(out_file) == ["ch0", "ch1", "ch2"] + assert lh5.ls(out_file, "ch0/") == ["ch0/hit"] + assert lh5.ls(out_file, "ch0/hit/") == [ "ch0/hit/energy_in_pe", "ch0/hit/quality_cut", "ch0/hit/trigger_pos", @@ -193,9 +188,9 @@ def test_build_hit_spms_multiconfig(dsp_test_file_spm, tmptestdir): lh5_tables_config=f"{config_dir}/spms-hit-multi-config.json", wo_mode="overwrite", ) - assert ls(out_file) == ["ch0", "ch1", "ch2"] - assert ls(out_file, "ch0/") == ["ch0/hit"] - assert ls(out_file, "ch0/hit/") == [ + assert lh5.ls(out_file) == ["ch0", "ch1", "ch2"] + assert lh5.ls(out_file, "ch0/") == ["ch0/hit"] + assert lh5.ls(out_file, "ch0/hit/") == [ "ch0/hit/energy_in_pe", "ch0/hit/quality_cut", "ch0/hit/trigger_pos", @@ -211,22 +206,23 @@ def test_build_hit_spms_calc(dsp_test_file_spm, tmptestdir): wo_mode="overwrite_file", lh5_tables_config=f"{config_dir}/spms-hit-a-config.json", ) - assert ls(out_file) == ["ch0", "ch1", "ch2"] - assert ls(out_file, "ch0/") == ["ch0/hit"] - assert ls(out_file, "ch0/hit/") == ["ch0/hit/energy_in_pe"] - - df0 = store.load_nda(out_file, ["energy_in_pe"], "ch0/hit/") - df1 = store.load_nda(out_file, ["energy_in_pe"], "ch1/hit/") - df2 = store.load_nda(out_file, ["energy_in_pe"], "ch2/hit/") - - assert len(df0["energy_in_pe"]) == 5 - assert len(df1["energy_in_pe"]) == 5 - assert len(df2["energy_in_pe"]) == 5 - - assert len(df0["energy_in_pe"][0]) == 20 - assert len(df1["energy_in_pe"][0]) == 20 - assert len(df2["energy_in_pe"][0]) == 20 - - assert np.nanmean(df0["energy_in_pe"]) == 0 - assert np.nanmean(df1["energy_in_pe"]) == 1 - assert np.nanmean(df2["energy_in_pe"]) == 2 + assert lh5.ls(out_file) == ["ch0", "ch1", "ch2"] + assert lh5.ls(out_file, "ch0/") == ["ch0/hit"] + assert lh5.ls(out_file, "ch0/hit/") == ["ch0/hit/energy_in_pe"] + + store = lh5.LH5Store() + df0 = store.read("ch0/hit/energy_in_pe", out_file)[0].view_as("np") + df1 = store.read("ch1/hit/energy_in_pe", out_file)[0].view_as("np") + df2 = store.read("ch2/hit/energy_in_pe", out_file)[0].view_as("np") + + assert len(df0) == 5 + assert len(df1) == 5 + assert len(df2) == 5 + + assert len(df0[0]) == 20 + assert len(df1[0]) == 20 + assert len(df2[0]) == 20 + + assert np.nanmean(df0) == 0 + assert np.nanmean(df1) == 1 + assert np.nanmean(df2) == 2 From f1869329b7c51a185940cf8c8471248f783a9bf6 Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 16 Jan 2024 14:44:56 +0100 Subject: [PATCH 085/191] modification for dsp processing --- src/pygama/pargen/dplms_ge_dict.py | 37 +++++++++--------------- src/pygama/pargen/energy_optimisation.py | 10 +++---- 2 files changed, 19 insertions(+), 28 deletions(-) diff --git a/src/pygama/pargen/dplms_ge_dict.py b/src/pygama/pargen/dplms_ge_dict.py index 0c1f9fcbc..6a155d239 100644 --- a/src/pygama/pargen/dplms_ge_dict.py +++ b/src/pygama/pargen/dplms_ge_dict.py @@ -8,10 +8,7 @@ import json import logging import os -import pathlib -import pickle import time -from collections import OrderedDict import matplotlib.pyplot as plt import numpy as np @@ -25,14 +22,9 @@ gauss_step_pdf, radford_pdf, ) -from pygama.pargen.cuts import find_pulser_properties, generate_cuts, get_cut_indexes +from pygama.pargen.cuts import generate_cuts, get_cut_indexes from pygama.pargen.dsp_optimize import run_one_dsp -from pygama.pargen.energy_cal import hpge_find_E_peaks -from pygama.pargen.energy_optimisation import ( - event_selection, - fom_FWHM, - fom_FWHM_with_dt_corr_fit, -) +from pygama.pargen.energy_optimisation import fom_FWHM_with_dt_corr_fit log = logging.getLogger(__name__) sto = lh5.LH5Store() @@ -72,11 +64,12 @@ def dplms_ge_dict( Returns ------- - out_dict : dict + out_dict """ t0 = time.time() log.info(f"\nSelecting baselines") + dsp_fft = run_one_dsp(raw_fft, dsp_config, db_dict=par_dsp[lh5_path]) cut_dict = generate_cuts(dsp_fft, parameters=dplms_dict["bls_cut_pars"]) idxs = get_cut_indexes(dsp_fft, cut_dict) @@ -133,11 +126,7 @@ def dplms_ge_dict( # penalized coefficients dp_coeffs = dplms_dict["dp_coeffs"] - if lh5_path in dplms_dict["noisy_bl"]: - log.info("Setting explicit zero area condition") - za_coeff = dp_coeffs["za"] - else: - za_coeff = dplms_dict["dp_def"]["za"] + za_coeff = dplms_dict["dp_def"]["za"] dp_coeffs.pop("za") coeff_keys = [key for key in dp_coeffs.keys()] lists = [dp_coeffs[key] for key in dp_coeffs.keys()] @@ -177,9 +166,7 @@ def dplms_ge_dict( dplms_dict["length"], wsize, ) - par_dsp[lh5_path]["dplms"] = {} - par_dsp[lh5_path]["dplms"]["length"] = dplms_dict["length"] - par_dsp[lh5_path]["dplms"]["coefficients"] = x.tolist() + par_dsp[lh5_path]["dplms"] = {"length": dplms_dict["length"], "coefficients": x} log.info( f"Filter synthesis in {time.time()-t_tmp:.1f} s, filter area", np.sum(x) ) @@ -402,11 +389,11 @@ def dplms_ge_dict( plot_dict["dplms"]["wf_sel"] = fig fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white") - ax.plot(np.flip(x), "r-", label=f"filter") + ax.plot(x, "r-", label=f"filter") ax.axhline(0, color="black", linestyle=":") ax.legend(loc="upper right", title=f"{lh5_path}") axin = ax.inset_axes([0.6, 0.1, 0.35, 0.33]) - axin.plot(np.flip(x), "r-") + axin.plot(x, "r-") axin.set_xlim( dplms_dict["length"] / 2 - dplms_dict["zoom"], dplms_dict["length"] / 2 + dplms_dict["zoom"], @@ -560,14 +547,18 @@ def filter_synthesis( fmat: np.array, length: int, size: int, + flip: bool = True, ) -> np.array: mat = nmat + rmat + za * np.ones([length, length]) + pmat + fmat flo = (size // 2) - (length // 2) fhi = (size // 2) + (length // 2) - x = np.linalg.solve(mat, ref[flo:fhi]) + x = np.linalg.solve(mat, ref[flo:fhi]).astype(np.float32) y = convolve(ref, np.flip(x), mode="valid") maxy = np.max(y) x /= maxy y /= maxy refy = ref[(size // 2) - (len(y) // 2) : (size // 2) + (len(y) // 2)] - return x, y, refy + if flip: + return np.flip(x), y, refy + else: + return x, y, refy diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index 50d4ddfe5..cbf071ab5 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -13,12 +13,12 @@ import sys from collections import namedtuple -import lgdo.lh5 as lh5 import matplotlib as mpl import matplotlib.pyplot as plt import numpy as np import pandas as pd from iminuit import Minuit, cost, util +from lgdo import Array, Table, WaveformTable, lh5 from matplotlib.backends.backend_pdf import PdfPages from matplotlib.colors import LogNorm from scipy.optimize import curve_fit, minimize @@ -892,14 +892,14 @@ def get_wf_indexes(sorted_indexs, n_events): def index_data(data, indexes, wf_field="waveform"): - new_baselines = lh5.Array(data["baseline"].nda[indexes]) + new_baselines = Array(data["baseline"].nda[indexes]) new_waveform_values = data[wf_field]["values"].nda[indexes] new_waveform_dts = data[wf_field]["dt"].nda[indexes] new_waveform_t0 = data[wf_field]["t0"].nda[indexes] - new_waveform = lh5.WaveformTable( + new_waveform = WaveformTable( None, new_waveform_t0, "ns", new_waveform_dts, "ns", new_waveform_values ) - new_data = lh5.Table(col_dict={wf_field: new_waveform, "baseline": new_baselines}) + new_data = Table(col_dict={wf_field: new_waveform, "baseline": new_baselines}) return new_data @@ -1070,7 +1070,7 @@ def event_selection( log.warning("Less than half number of specified events found") elif len(peak_ids[final_mask]) < 0.1 * n_events: log.error("Less than 10% number of specified events found") - out_events = np.unique(np.array(out_events).flatten()) + out_events = np.unique(np.concatenate(out_events)) sort_index = np.argsort(np.concatenate(final_events)) idx_list = get_wf_indexes(sort_index, [len(mask) for mask in final_events]) return out_events, idx_list From bffd00f6854aeca80817edcefd40352ec8b218cb Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 16 Jan 2024 15:24:55 +0100 Subject: [PATCH 086/191] revert modification on ene_opt --- src/pygama/pargen/energy_optimisation.py | 57 +++++++++++------------- 1 file changed, 26 insertions(+), 31 deletions(-) diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index cbf071ab5..1c34901d9 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -13,12 +13,12 @@ import sys from collections import namedtuple +import lgdo.lh5 as lh5 import matplotlib as mpl import matplotlib.pyplot as plt import numpy as np import pandas as pd from iminuit import Minuit, cost, util -from lgdo import Array, Table, WaveformTable, lh5 from matplotlib.backends.backend_pdf import PdfPages from matplotlib.colors import LogNorm from scipy.optimize import curve_fit, minimize @@ -857,7 +857,6 @@ def fom_FWHM_fit(tb_in, kwarg_dict): csqr, n_sig, n_sig_err, - _, ) = get_peak_fwhm_with_dt_corr( Energies, alpha, dt, func, gof_func, peak=peak, kev_width=kev_width, kev=True ) @@ -892,14 +891,14 @@ def get_wf_indexes(sorted_indexs, n_events): def index_data(data, indexes, wf_field="waveform"): - new_baselines = Array(data["baseline"].nda[indexes]) + new_baselines = lh5.Array(data["baseline"].nda[indexes]) new_waveform_values = data[wf_field]["values"].nda[indexes] new_waveform_dts = data[wf_field]["dt"].nda[indexes] new_waveform_t0 = data[wf_field]["t0"].nda[indexes] - new_waveform = WaveformTable( + new_waveform = lh5.WaveformTable( None, new_waveform_t0, "ns", new_waveform_dts, "ns", new_waveform_values ) - new_data = Table(col_dict={wf_field: new_waveform, "baseline": new_baselines}) + new_data = lh5.Table(col_dict={wf_field: new_waveform, "baseline": new_baselines}) return new_data @@ -1070,20 +1069,17 @@ def event_selection( log.warning("Less than half number of specified events found") elif len(peak_ids[final_mask]) < 0.1 * n_events: log.error("Less than 10% number of specified events found") - out_events = np.unique(np.concatenate(out_events)) + out_events = np.unique(np.array(out_events).flatten()) sort_index = np.argsort(np.concatenate(final_events)) idx_list = get_wf_indexes(sort_index, [len(mask) for mask in final_events]) return out_events, idx_list -def fwhm_slope(x, m0, m1, m2=None): +def fwhm_slope(x, m0, m1, m2): """ Fit the energy resolution curve """ - if m2 is None: - return np.sqrt(m0 + m1 * x) - else: - return np.sqrt(m0 + m1 * x + m2 * (x**2)) + return np.sqrt(m0 + m1 * x + m2 * (x**2)) def interpolate_energy(peak_energies, points, err_points, energy): @@ -1091,7 +1087,7 @@ def interpolate_energy(peak_energies, points, err_points, energy): if len(points[~nan_mask]) < 3: return np.nan, np.nan, np.nan else: - param_guess = [2, 0.001] + param_guess = [2, 0.001, 0.000001] # # param_bounds = (0, [10., 1. ])# try: fit_pars, fit_covs = curve_fit( @@ -1141,11 +1137,6 @@ def fom_FWHM(tb_in, kwarg_dict, ctc_parameter, alpha, idxs=None, display=0): dt = np.subtract(tb_in["tp_99"].nda, tb_in["tp_0_est"].nda, dtype="float64") elif ctc_parameter == "rt": dt = np.subtract(tb_in["tp_99"].nda, tb_in["tp_01"].nda, dtype="float64") - - if idxs is not None: - Energies = Energies[idxs] - dt = dt[idxs] - if np.isnan(Energies).any() or np.isnan(dt).any(): if np.isnan(Energies).any(): log.debug(f"nan energy values for peak {peak}") @@ -1160,6 +1151,10 @@ def fom_FWHM(tb_in, kwarg_dict, ctc_parameter, alpha, idxs=None, display=0): "n_sig_err": np.nan, } + if idxs is not None: + Energies = Energies[idxs] + dt = dt[idxs] + # Return fwhm of optimal alpha in kev with error try: ( @@ -1212,37 +1207,39 @@ def single_peak_fom(data, kwarg_dict): return out_dict -def new_fom(data, kwarg_dict, alpha=None): +def new_fom(data, kwarg_dict): peaks = kwarg_dict["peaks_keV"] idx_list = kwarg_dict["idx_list"] ctc_param = kwarg_dict["ctc_param"] peak_dicts = kwarg_dict["peak_dicts"] - if alpha is None: - out_dict = fom_FWHM_with_dt_corr_fit( - data, peak_dicts[-1], ctc_param, idxs=idx_list[-1], display=0 - ) - alpha = out_dict["alpha"] - + out_dict = fom_FWHM_with_dt_corr_fit( + data, peak_dicts[-1], ctc_param, idxs=idx_list[-1], display=0 + ) + alpha = out_dict["alpha"] log.info(alpha) fwhms = [] fwhm_errs = [] n_sig = [] n_sig_err = [] - chisquares = [] - for i, peak in enumerate(peaks): + for i, peak in enumerate(peaks[:-1]): out_peak_dict = fom_FWHM( data, peak_dicts[i], ctc_param, alpha, idxs=idx_list[i], display=0 ) + # n_sig_minimum = peak_dicts[i]["n_sig_minimum"] + # if peak_dict["n_sig"] Date: Tue, 16 Jan 2024 17:15:59 +0100 Subject: [PATCH 087/191] change load data --- src/pygama/pargen/utils.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/pygama/pargen/utils.py b/src/pygama/pargen/utils.py index e58785e4e..5c8f8c101 100644 --- a/src/pygama/pargen/utils.py +++ b/src/pygama/pargen/utils.py @@ -3,10 +3,10 @@ import logging from types import FunctionType -import lgdo.lh5_store as lh5 import numpy as np import pandas as pd from iminuit import Minuit, cost, util +from lgdo import Table, lh5 log = logging.getLogger(__name__) @@ -70,15 +70,20 @@ def load_data( masks = np.array([], dtype=bool) for tstamp, tfiles in files.items(): table = sto.read(lh5_path, tfiles)[0] + file_df = pd.DataFrame(columns=params) if tstamp in cal_dict: - file_df = table.eval(cal_dict[tstamp]).get_dataframe() + cal_dict_ts = cal_dict[tstamp] else: - file_df = table.eval(cal_dict).get_dataframe() + cal_dict_ts = cal_dict + for param in params: + if param in cal_dict_ts: + expression = cal_dict_ts[param]["expression"] + parameters = cal_dict_ts[param].get("parameters", None) + file_df[param] = table.eval(expression, parameters) + else: + file_df[param] = table[param] file_df["run_timestamp"] = np.full(len(file_df), tstamp, dtype=object) params.append("run_timestamp") - for param in params: - if param not in file_df: - file_df[param] = lh5.load_nda(tfiles, [param], lh5_path)[param] if threshold is not None: mask = file_df[cal_energy_param] > threshold file_df.drop(np.where(~mask)[0], inplace=True) @@ -96,10 +101,14 @@ def load_data( params = get_params(keys + list(cal_dict.keys()), params) table = sto.read(lh5_path, files)[0] - df = table.eval(cal_dict).get_dataframe() + df = pd.DataFrame(columns=params) for param in params: - if param not in df: - df[param] = lh5.load_nda(files, [param], lh5_path)[param] + if param in cal_dict: + expression = cal_dict[param]["expression"] + parameters = cal_dict[param].get("parameters", None) + df[param] = table.eval(expression, parameters) + else: + df[param] = table[param] if threshold is not None: masks = df[cal_energy_param] > threshold df.drop(np.where(~masks)[0], inplace=True) From cb2c4d8cea4b961c12755f9aa198785b4f6aa2e7 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 23 Jan 2024 12:17:09 +0100 Subject: [PATCH 088/191] error parameter deprecation in pandas 2.2 to_numeric function --- src/pygama/flow/file_db.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/pygama/flow/file_db.py b/src/pygama/flow/file_db.py index 4047f8c97..66545c419 100644 --- a/src/pygama/flow/file_db.py +++ b/src/pygama/flow/file_db.py @@ -272,7 +272,10 @@ def scan_files(self, dirs: list[str] = None) -> None: # convert cols to numeric dtypes where possible for col in self.df.columns: - self.df[col] = pd.to_numeric(self.df[col], errors="ignore") + try: + self.df[col] = pd.to_numeric(self.df[col]) + except ValueError: + continue # sort rows according to timestamps utils.inplace_sort(self.df, self.sortby) @@ -669,7 +672,10 @@ def scan_daq_files(self, daq_dir: str, daq_template: str) -> None: # convert cols to numeric dtypes where possible for col in self.df.columns: - self.df[col] = pd.to_numeric(self.df[col], errors="ignore") + try: + self.df[col] = pd.to_numeric(self.df[col]) + except ValueError: + continue def get_table_name(self, tier: str, tb: str) -> str: """Get the table name for a tier given its table identifier. From bc183a50acbbdb4a72a96968aa3a8e4897221be2 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 23 Jan 2024 12:18:30 +0100 Subject: [PATCH 089/191] add pyarrow dependency required by pandas 2.2 --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index 6582215a1..74c036924 100644 --- a/setup.cfg +++ b/setup.cfg @@ -42,6 +42,7 @@ install_requires = numpy>=1.21 pandas>=1.4.4 pint + pyarrow scikit-learn scipy>=1.0.1 tables From a4ea61aee9a5acb361364ef99c22d45dcf7b94bf Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Thu, 28 Sep 2023 19:03:02 +0200 Subject: [PATCH 090/191] first idea for evt tier --- src/pygama/evt/build_evt.py | 309 ++++++++++++++++++++++++++++++++++ src/pygama/evt/modules/spm.py | 44 +++++ 2 files changed, 353 insertions(+) create mode 100644 src/pygama/evt/build_evt.py create mode 100644 src/pygama/evt/modules/spm.py diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py new file mode 100644 index 000000000..5765e1372 --- /dev/null +++ b/src/pygama/evt/build_evt.py @@ -0,0 +1,309 @@ +""" +This module implements routines to build the evt tier. + +TODO: +- make me faster! Currently 37.70 ms/evt +- write tests +- get feedback +- write everything smart +""" +from __future__ import annotations +from importlib import import_module +import itertools +import json +from legendmeta import LegendMetadata +import logging +import numpy as np +import pygama.lgdo.lh5_store as store +from pygama.lgdo import Array +import re +import os + +log = logging.getLogger(__name__) + +def num_and_pars(value: str,par_dic: dict): + # function tries to convert a string to a int, float, bool + # or returns the value if value is a key in par_dic + if value in par_dic.keys(): return par_dic[value] + try: + value = int(value) + except ValueError: + try: + value = float(value) + except ValueError: + try: + value = bool(value) + except ValueError: + pass + return value + +def evaluate_expression(f_evt:str,f_hit:str, f_dsp: str, chns: list, mode: str, expr: str, para: dict = None, defv = np.nan, getch: bool = False) -> np.ndarray: + """ + Evaluates the expression defined by the user across all channels according to the mode + Parameters + ---------- + f_evt + Path to event tier file + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + chns + List of channel names across which expression gets evaluated (form: "ch") + mode + The mode determines how the event entry is calculated across channels. Options are: + - "first": The value of the channel in an event triggering first in time (according to tp_0_est) is returned. It is possible to add a condition (e.g. "first>10"). Only channels fullfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, nan is returned for this event. + - "last": The value of the channel in an event triggering last in time (according to tp_0_est) is returned. It is possible to add a condition (e.g. "last>10"). Only channels fullfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, nan is returned for this event. + - "tot": The sum of all channels across an event. It is possible to add a condition (e.g. "tot>10"). Only channels fullfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, zero is returned for this event. Booleans are treated as integers 0/1. + - "any": Logical or between all channels. Non boolean values are True for values != 0 and False for values == 0. + - "all": Logical and between all channels. Non boolean values are True for values != 0 and False for values == 0. + - ch_field: A previously generated channel_id field (i.e. from the get_ch flage) can be given here, and the value of this specific channels is used. + - "single": !!!NOT IMPLEMENTED!!!. Channels are not combined, but result saved for each channel. field name gets channel id as suffix. + expr + The expression. That can be any mathematical equation/comparison. If mode == func, the expression needs to be a special processing function defined in modules (e.g. "modules.spm.get_energy). In the expression parameters from either hit, dsp, evt tier (from operations performed before this one! --> JSON operaions order matters), or from the "parameters" field can be used. + para + Dictionary of parameters defined in the "parameters" field in the configuration JSON file. + getch + Only affects "first", "last" modes. In that cases the rawid of the resulting values channel is returned as well. + """ + #define dimension of output array + out = np.zeros(store.LH5Store().read_n_rows(chns[0]+"/dsp/",f_dsp)) + out[:] = defv + out_chs = np.zeros(len(out),dtype=int) + + if mode == "func": + exprl = re.findall(r"[a-zA-Z_$][\w$]*",expr) + var = {} + if os.path.exists(f_evt):var = store.load_nda(f_evt,[e.split('/')[-1] for e in store.ls(f_evt) if e.split('/')[-1] in exprl]) + if para: var = var | para + + # evaluate expression + func, params = expr.split('(') + params = params[:-1].split(',') + params = [f_hit,f_dsp,chns]+[num_and_pars(e,var) for e in params] + + # load function dynamically + p,m = func.rsplit('.',1) + mod = import_module(p) + met = getattr(mod,m) + + return met(*params) + + else: + for ch in chns: + #find all potential variables + exprl = re.findall(r"[a-zA-Z_$][\w$]*",expr) + + # find fields in either dsp, hit, evt or parameters, prepare evaluation + evt_dic = {} + if os.path.exists(f_evt): evt_dic = store.load_nda(f_evt,[e.split('/')[-1] for e in store.ls(f_evt) if e.split('/')[-1] in exprl]) + hit_dic = store.load_nda(f_hit,[e.split('/')[-1] for e in store.ls(f_hit,ch+"/hit/") if e.split('/')[-1] in exprl],ch+"/hit/") + dsp_dic = store.load_nda(f_dsp,[e.split('/')[-1] for e in store.ls(f_dsp,ch+"/dsp/") if e.split('/')[-1] in exprl],ch+"/dsp/") + + var= hit_dic | dsp_dic | evt_dic + if para: var = var | para + + # evaluate expression + res = eval(expr,var) + if not isinstance(res, np.ndarray): + res = np.full(len(out),res,dtype=type(res)) + + # append to out according to mode + ops = re.findall(r'([<>]=?|==)', mode) + if len(ops)>0: + op = ops[0] + lim = float(mode.split(op)[-1]) + limarr = eval("res"+op+"lim",{"res":res,"lim":lim}) + else: + + limarr = np.ones(len(res)).astype(bool) + if "first" in mode: + outt = np.zeros(len(out)) + outt[:] = np.inf + t0 = store.load_nda(f_dsp,["tp_0_est"],ch+"/dsp/")["tp_0_est"] + out = np.where((t0outt) & (limarr),res,out) + out_chs = np.where((t0outt) & (limarr),t0,outt) + elif "tot" in mode: + if ch == chns[0]: out[:] = 0 + if res.dtype == bool: res = res.astype(int) + out += np.where(limarr,res,out) + elif mode == "any": + if ch == chns[0]: + out = out.astype(bool) + if res.dtype != bool: res = res.astype(bool) + out = out | res + elif mode == "all": + if ch == chns[0]: + out = out.astype(bool) + if res.dtype != bool: res = res.astype(bool) + out = out & res + elif mode in store.ls(f_evt): + ch_comp = store.load_nda(f_evt,[mode])[mode] + out = np.where(int(ch[2:]) == ch_comp,res,out) + else: + raise ValueError(mode + " not a valid mode") + + if getch: return out, out_chs + else: return out + +def build_evt( + f_dsp: str, + f_hit: str, + f_evt: str, + meta_path: str = None, + evt_config: str | dict = None, + wo_mode: str = "write_safe" +) -> None: + """ + Transform data from the hit and dsp levels which a channel sorted + to a event sorted data format + + Parameters + ---------- + f_dsp + input LH5 file of the dsp level + f_hit + input LH5 file of the hit level + f_evt + name of the output file + evt_config + dictionary or name of JSON file defining evt fields. Channel lists can be defined by the user or by using the keyword "meta" followed by the system (geds/spms) and the usability (on,no_psd,ac,off) seperated by underscores (e.g. "meta_geds_on") in the "channels" dictonary. The "operations" dictionary defines the fields (name=key), where "channels" specifies the channels used to for this field (either a string or a list of strings), "mode" defines how the channels should be combined (see evaluate_expression). For first/last modes a "get_ch" flag can be defined, if true an additional field with the sufix "_id" is returned containing the rawid of the respective value in the field without the suffix. "expression" defnies the mathematical/special function to apply (see evaluate_expression), "parameters" defines any other parameter used in expression For example: + + .. code-block::json + + { + "channels": { + "geds_on": "meta_geds_on", + "geds_no_psd": "meta_geds_no_psd", + "geds_ac": "meta_geds_ac", + "spms_on": "meta_spms_on", + "pulser": "PULS01", + "baseline": "BSLN01", + "muon": "MUON01", + "ts_master":"S060" + }, + "operations": { + "energy":{ + "channels": ["geds_on","geds_no_psd","geds_ac"], + "mode": "first>25", + "get_ch": true, + "expression": "cuspEmax_ctc_cal" + }, + "aoe":{ + "channels": ["geds_on"], + "mode": "energy_id", + "expression": "AoE_Classifier" + }, + "is_muon_tagged":{ + "channels": "muon", + "mode": "any", + "expression": "wf_max>a", + "parameters": {"a":15100} + }, + "multiplicity":{ + "channels": ["geds_on","geds_no_psd","geds_ac"], + "mode": "tot", + "expression": "cuspEmax_ctc_cal > a", + "parameters": {"a":25} + }, + "lar_energy":{ + "channels": "spms_on", + "mode": "func", + "expression": "modules.spm.get_energy(0.5,t0,48000,1000,5000)" + } + } + } + """ + lstore = store.LH5Store() + tbl_cfg = evt_config + if isinstance(tbl_cfg,str): + with open(tbl_cfg) as f: + tbl_cfg = json.load(f) + + # create channel list according to config + # This can be either read from the meta data + # or a list of channel names + log.debug("Creating channel dictionary") + if meta_path: lmeta = LegendMetadata(path=meta_path) + else: lmeta = LegendMetadata() + chmap = lmeta.channelmap(re.search("\d{8}T\d{6}Z",f_dsp).group(0)) + chns = {} + for k, v in tbl_cfg['channels'].items(): + if isinstance(v,str): + if "meta" in v: + m,sys,usa = v.split("_",2) + tmp = [f"ch{e}" for e in chmap.map("daq.rawid") if chmap.map("daq.rawid")[e]['system'] == sys] + chns[k] = [e for e in tmp if chmap.map("daq.rawid")[int(e[2:])]['analysis']['usability'] == usa] + else: + chns[k] = [f"ch{chmap.map('name')[v]['daq']['rawid']}"] + elif isinstance(v,list): + chns[k] = [f"ch{chmap.map('name')[e]['daq']['rawid']}" for e in v] + + + # do operations + first_iter = True + log.info(f"Applying'{len(tbl_cfg['operations'].keys())} operations' to dsp file {f_dsp} and hit file {f_hit} to create evt file {f_evt}") + for k, v in tbl_cfg['operations'].items(): + log.debug("Processing field" + k) + + # if channels not defined in operation, it can only be an operation on the evt level. + if 'channels' not in v.keys(): + exprl = re.findall(r"[a-zA-Z_$][\w$]*",v["expression"]) + var = {} + if os.path.exists(f_evt):var = store.load_nda(f_evt,[e.split('/')[-1] for e in store.ls(f_evt) if e.split('/')[-1] in exprl]) + if "parameters" in v.keys(): var = var | v['parameters'] + res = Array(eval(v["expression"],var)) + lstore.write_object( + obj=res, + name= k, + lh5_file=f_evt, + wo_mode=wo_mode #if first_iter else "append" + ) + continue + + if isinstance(v['channels'],str): chns_e = chns[v['channels']] + elif isinstance(v['channels'],list): chns_e = list(itertools.chain.from_iterable( [chns[e] for e in v['channels']])) + + pars = None + defaultv = np.nan + if "parameters" in v.keys(): pars = v['parameters'] + if "initial" in v.keys() and not v['initial'] == "np.nan" : defaultv = v['initial'] + + if "get_ch" in v.keys(): + if "first" in v['mode'] or "last" in v['mode']: + res, chs = evaluate_expression(f_evt,f_hit,f_dsp,chns_e,v['mode'],v['expression'],pars,defaultv, v["get_ch"]) + lstore.write_object( + obj=Array(res), + name= k, + lh5_file=f_evt, + wo_mode=wo_mode #if first_iter else "append" + ) + lstore.write_object( + obj=Array(chs), + name= k+"_id", + lh5_file=f_evt, + wo_mode=wo_mode #if first_iter else "append" + ) + + else: + raise ValueError("get_ch can be only applied to first and last modes") + + else: + res = Array(evaluate_expression(f_evt,f_hit,f_dsp,chns_e,v['mode'],v['expression'],pars,defaultv)) + + lstore.write_object( + obj=res, + name= k, + lh5_file=f_evt, + wo_mode=wo_mode #if first_iter else "append" + ) + if first_iter: first_iter = False + + log.info("Done") \ No newline at end of file diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py new file mode 100644 index 000000000..4d4604452 --- /dev/null +++ b/src/pygama/evt/modules/spm.py @@ -0,0 +1,44 @@ +import numpy as np +import pygama.lgdo.lh5_store as store + +""" +Module for special event level routines for SiPMs + +functions must take as the first 3 args in order: +- path to the hit file +- path to the dsp file +- list of channels processed +additional parameters are free to the user and need to be defined in the JSON +""" +#get LAr energy per event over all channels +def get_energy(f_hit,f_dsp,chs,lim,trgr,tdefault,tmin,tmax): + trig = np.where(np.isnan(trgr),tdefault,trgr) + tmi = trig - tmin + tma = trig + tmax + sum = np.zeros(len(trig)) + for ch in chs: + df =store.load_nda(f_hit, ["energy_in_pe","is_valid_hit",'trigger_pos'],ch+"/hit/") + mask = (df["trigger_pos"]tmi[:,None]/16) & (df["energy_in_pe"] > lim) + pes=df["energy_in_pe"] + pes= np.where(np.isnan(pes), 0, pes) + pes= np.where(mask,pes,0) + chsum= np.nansum(pes, axis=1) + sum = sum + chsum + return sum + +#get LAr majority per event over all channels +def get_majority(f_hit,f_dsp,chs,lim,trgr,tdefault,tmin,tmax): + trig = np.where(np.isnan(trgr),tdefault,trgr) + tmi = trig - tmin + tma = trig + tmax + maj = np.zeros(len(trig)) + for ch in chs: + df =store.load_nda(f_hit, ["energy_in_pe","is_valid_hit",'trigger_pos'],ch+"/hit/") + mask = (df["trigger_pos"]tmi[:,None]/16) & (df["energy_in_pe"] > lim) + pes=df["energy_in_pe"] + pes= np.where(np.isnan(pes), 0, pes) + pes= np.where(mask,pes,0) + chsum= np.nansum(pes, axis=1) + chmaj = np.where(chsum>lim,1,0) + maj = maj + chmaj + return maj \ No newline at end of file From 1a966de2186ab238cea2873a25fe3d9ad72664fc Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Fri, 29 Sep 2023 14:05:36 +0200 Subject: [PATCH 091/191] 3x speedup and added LAr classifier module --- src/pygama/evt/build_evt.py | 133 +++++++++++++--------------------- src/pygama/evt/modules/spm.py | 41 ++++++++++- 2 files changed, 91 insertions(+), 83 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 5765e1372..3340c58fc 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -37,7 +37,7 @@ def num_and_pars(value: str,par_dic: dict): pass return value -def evaluate_expression(f_evt:str,f_hit:str, f_dsp: str, chns: list, mode: str, expr: str, para: dict = None, defv = np.nan, getch: bool = False) -> np.ndarray: +def evaluate_expression(f_evt:str,f_hit:str, f_dsp: str, chns: list, mode: str, expr: str, para: dict = None, defv = np.nan, nrows: int = None) -> np.ndarray: """ Evaluates the expression defined by the user across all channels according to the mode Parameters @@ -67,59 +67,47 @@ def evaluate_expression(f_evt:str,f_hit:str, f_dsp: str, chns: list, mode: str, Only affects "first", "last" modes. In that cases the rawid of the resulting values channel is returned as well. """ #define dimension of output array - out = np.zeros(store.LH5Store().read_n_rows(chns[0]+"/dsp/",f_dsp)) - out[:] = defv + n = nrows if nrows is not None else store.LH5Store().read_n_rows(chns[0]+"/dsp/",f_dsp) + out = np.full(n,defv,dtype=type(defv)) out_chs = np.zeros(len(out),dtype=int) + # find parameters in evt file or in parameters + exprl = re.findall(r"[a-zA-Z_$][\w$]*",expr) + var_ph = {} + if os.path.exists(f_evt):var_ph = store.load_nda(f_evt,[e.split('/')[-1] for e in store.ls(f_evt) if e.split('/')[-1] in exprl]) + if para: var_ph = var_ph | para + if mode == "func": - exprl = re.findall(r"[a-zA-Z_$][\w$]*",expr) - var = {} - if os.path.exists(f_evt):var = store.load_nda(f_evt,[e.split('/')[-1] for e in store.ls(f_evt) if e.split('/')[-1] in exprl]) - if para: var = var | para - # evaluate expression func, params = expr.split('(') - params = params[:-1].split(',') - params = [f_hit,f_dsp,chns]+[num_and_pars(e,var) for e in params] + params = [f_hit,f_dsp,chns]+[num_and_pars(e,var_ph) for e in params[:-1].split(',')] # load function dynamically p,m = func.rsplit('.',1) - mod = import_module(p) - met = getattr(mod,m) - - return met(*params) + met = getattr(import_module(p),m) + out = met(*params) else: + # evaluate operator in mode + ops = re.findall(r'([<>]=?|==)', mode) + ch_comp = None + if os.path.exists(f_evt) and mode in store.ls(f_evt): + ch_comp = store.load_nda(f_evt,[mode])[mode] for ch in chns: - #find all potential variables - exprl = re.findall(r"[a-zA-Z_$][\w$]*",expr) - - # find fields in either dsp, hit, evt or parameters, prepare evaluation - evt_dic = {} - if os.path.exists(f_evt): evt_dic = store.load_nda(f_evt,[e.split('/')[-1] for e in store.ls(f_evt) if e.split('/')[-1] in exprl]) - hit_dic = store.load_nda(f_hit,[e.split('/')[-1] for e in store.ls(f_hit,ch+"/hit/") if e.split('/')[-1] in exprl],ch+"/hit/") + # find fields in either dsp, hit + var = store.load_nda(f_hit,[e.split('/')[-1] for e in store.ls(f_hit,ch+"/hit/") if e.split('/')[-1] in exprl],ch+"/hit/") dsp_dic = store.load_nda(f_dsp,[e.split('/')[-1] for e in store.ls(f_dsp,ch+"/dsp/") if e.split('/')[-1] in exprl],ch+"/dsp/") - - var= hit_dic | dsp_dic | evt_dic - if para: var = var | para - + var= var |dsp_dic | var_ph + # evaluate expression res = eval(expr,var) - if not isinstance(res, np.ndarray): - res = np.full(len(out),res,dtype=type(res)) + if not isinstance(res, np.ndarray): res = np.full(len(out),res,dtype=type(res)) # append to out according to mode - ops = re.findall(r'([<>]=?|==)', mode) - if len(ops)>0: - op = ops[0] - lim = float(mode.split(op)[-1]) - limarr = eval("res"+op+"lim",{"res":res,"lim":lim}) - else: - - limarr = np.ones(len(res)).astype(bool) + if len(ops)>0: limarr = eval("".join(["res",ops[0],"lim"]),{"res":res,"lim":float(mode.split(ops[0])[-1])}) + else: limarr = np.ones(len(out)).astype(bool) if "first" in mode: - outt = np.zeros(len(out)) - outt[:] = np.inf + outt = np.full(len(out),np.inf) t0 = store.load_nda(f_dsp,["tp_0_est"],ch+"/dsp/")["tp_0_est"] out = np.where((t0outt) & (limarr),res,out) - out_chs = np.where((t0outt) & (limarr),int(ch[2:]),out_chs) outt = np.where((t0>outt) & (limarr),t0,outt) elif "tot" in mode: - if ch == chns[0]: out[:] = 0 if res.dtype == bool: res = res.astype(int) out += np.where(limarr,res,out) elif mode == "any": - if ch == chns[0]: - out = out.astype(bool) if res.dtype != bool: res = res.astype(bool) out = out | res elif mode == "all": - if ch == chns[0]: - out = out.astype(bool) if res.dtype != bool: res = res.astype(bool) out = out & res - elif mode in store.ls(f_evt): - ch_comp = store.load_nda(f_evt,[mode])[mode] + elif ch_comp is not None: out = np.where(int(ch[2:]) == ch_comp,res,out) else: raise ValueError(mode + " not a valid mode") - if getch: return out, out_chs - else: return out + return out, out_chs def build_evt( f_dsp: str, @@ -248,7 +229,7 @@ def build_evt( # do operations - first_iter = True + first_iter,nrows = True,None log.info(f"Applying'{len(tbl_cfg['operations'].keys())} operations' to dsp file {f_dsp} and hit file {f_hit} to create evt file {f_evt}") for k, v in tbl_cfg['operations'].items(): log.debug("Processing field" + k) @@ -266,44 +247,32 @@ def build_evt( lh5_file=f_evt, wo_mode=wo_mode #if first_iter else "append" ) - continue - - if isinstance(v['channels'],str): chns_e = chns[v['channels']] - elif isinstance(v['channels'],list): chns_e = list(itertools.chain.from_iterable( [chns[e] for e in v['channels']])) - - pars = None - defaultv = np.nan - if "parameters" in v.keys(): pars = v['parameters'] - if "initial" in v.keys() and not v['initial'] == "np.nan" : defaultv = v['initial'] - - if "get_ch" in v.keys(): - if "first" in v['mode'] or "last" in v['mode']: - res, chs = evaluate_expression(f_evt,f_hit,f_dsp,chns_e,v['mode'],v['expression'],pars,defaultv, v["get_ch"]) - lstore.write_object( - obj=Array(res), - name= k, - lh5_file=f_evt, - wo_mode=wo_mode #if first_iter else "append" - ) - lstore.write_object( - obj=Array(chs), - name= k+"_id", - lh5_file=f_evt, - wo_mode=wo_mode #if first_iter else "append" - ) - - else: - raise ValueError("get_ch can be only applied to first and last modes") - else: - res = Array(evaluate_expression(f_evt,f_hit,f_dsp,chns_e,v['mode'],v['expression'],pars,defaultv)) + if isinstance(v['channels'],str): chns_e = chns[v['channels']] + elif isinstance(v['channels'],list): chns_e = list(itertools.chain.from_iterable( [chns[e] for e in v['channels']])) + + pars, defaultv = None , np.nan + if "parameters" in v.keys(): pars = v['parameters'] + if "initial" in v.keys() and not v['initial'] == "np.nan" : defaultv = v['initial'] + res,chs = evaluate_expression(f_evt,f_hit,f_dsp,chns_e,v['mode'],v['expression'],pars,defaultv,nrows) lstore.write_object( - obj=res, - name= k, - lh5_file=f_evt, - wo_mode=wo_mode #if first_iter else "append" + obj=Array(res), + name= k, + lh5_file=f_evt, + wo_mode=wo_mode ) + + # if get_ch true flag in a first/last mode operation also obtain channel field + if "get_ch" in v.keys() and ("first" in v['mode'] or "last" in v['mode']) and v["get_ch"]: + lstore.write_object( + obj=Array(chs), + name= k+"_id", + lh5_file=f_evt, + wo_mode=wo_mode + ) + if first_iter: first_iter = False + if not nrows: nrows = len(res) log.info("Done") \ No newline at end of file diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 4d4604452..04956bec6 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -41,4 +41,43 @@ def get_majority(f_hit,f_dsp,chs,lim,trgr,tdefault,tmin,tmax): chsum= np.nansum(pes, axis=1) chmaj = np.where(chsum>lim,1,0) maj = maj + chmaj - return maj \ No newline at end of file + return maj + +def get_etc(f_hit,f_dsp,chs,lim,trgr,tdefault,tmin,tmax,swin,trail): + predf = store.load_nda(f_hit, ["energy_in_pe",'timestamp'],chs[0]+"/hit/") + + peshape = (predf["energy_in_pe"]).shape + # 1D = channel, 2D = event num, 3D = array per event + pes=np.zeros([len(chs),peshape[0],peshape[1]]) + times = np.zeros([len(chs),peshape[0],peshape[1]]) + + tge = np.where(np.isnan(trgr),tdefault,trgr) + tmi = tge - tmin + tma = tge + tmax + for i in range(len(chs)): + df =store.load_nda(f_hit, ["energy_in_pe",'trigger_pos','timestamp'],chs[i]+"/hit/") + mask = (df["trigger_pos"]tmi[:,None]/16) & (df["energy_in_pe"] > lim) + pe=df["energy_in_pe"] + time = df["trigger_pos"]*16 + + pe= np.where(mask,pe,np.nan) + time= np.where(mask,time,np.nan) + + pes[i] = pe + times[i] = time + + outi = None + if trail >0: + t1d = np.nanmin(times,axis=(0,2)) + if trail == 2: t1d[t1d>tge] = tge[t1d>tge] + tt = t1d[:,None] + outi = np.where(np.nansum(np.where((times >= tt),pes,0),axis=(0,2)) > 0, + np.nansum(np.where((times >= tt) & (times < tt+swin),pes,0),axis=(0,2))/np.nansum(np.where((times >= tt),pes,0),axis=(0,2)), + np.nansum(np.where((times >= tt),pes,0),axis=(0,2))) + return outi + + else: + outi = np.where(np.nansum(pes,axis=(0,2)) > 0, + np.nansum(np.where((times >= tge[:,None]) & (times <= tge[:,None]+swin),pes,0),axis=(0,2))/np.nansum(np.where((times >= tge[:,None]),pes,0),axis=(0,2)), + np.nansum(pes,axis=(0,2))) + return outi \ No newline at end of file From 4d782302a718f3c04e9d644f3a41f7a58c441cbd Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Fri, 29 Sep 2023 17:28:37 +0200 Subject: [PATCH 092/191] small changes and additional lar modules --- src/pygama/evt/build_evt.py | 384 +++++++++++++++++++++------------- src/pygama/evt/modules/spm.py | 225 ++++++++++++++------ 2 files changed, 401 insertions(+), 208 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 3340c58fc..92af205ca 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -1,30 +1,29 @@ """ This module implements routines to build the evt tier. - -TODO: -- make me faster! Currently 37.70 ms/evt -- write tests -- get feedback -- write everything smart """ from __future__ import annotations -from importlib import import_module + import itertools import json -from legendmeta import LegendMetadata import logging +import os +import re +from importlib import import_module + import numpy as np +from legendmeta import LegendMetadata + import pygama.lgdo.lh5_store as store from pygama.lgdo import Array -import re -import os log = logging.getLogger(__name__) -def num_and_pars(value: str,par_dic: dict): + +def num_and_pars(value: str, par_dic: dict): # function tries to convert a string to a int, float, bool # or returns the value if value is a key in par_dic - if value in par_dic.keys(): return par_dic[value] + if value in par_dic.keys(): + return par_dic[value] try: value = int(value) except ValueError: @@ -37,110 +36,160 @@ def num_and_pars(value: str,par_dic: dict): pass return value -def evaluate_expression(f_evt:str,f_hit:str, f_dsp: str, chns: list, mode: str, expr: str, para: dict = None, defv = np.nan, nrows: int = None) -> np.ndarray: - """ - Evaluates the expression defined by the user across all channels according to the mode - Parameters - ---------- - f_evt - Path to event tier file - f_hit - Path to hit tier file - f_dsp - Path to dsp tier file - chns - List of channel names across which expression gets evaluated (form: "ch") - mode - The mode determines how the event entry is calculated across channels. Options are: - - "first": The value of the channel in an event triggering first in time (according to tp_0_est) is returned. It is possible to add a condition (e.g. "first>10"). Only channels fullfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, nan is returned for this event. - - "last": The value of the channel in an event triggering last in time (according to tp_0_est) is returned. It is possible to add a condition (e.g. "last>10"). Only channels fullfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, nan is returned for this event. - - "tot": The sum of all channels across an event. It is possible to add a condition (e.g. "tot>10"). Only channels fullfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, zero is returned for this event. Booleans are treated as integers 0/1. - - "any": Logical or between all channels. Non boolean values are True for values != 0 and False for values == 0. - - "all": Logical and between all channels. Non boolean values are True for values != 0 and False for values == 0. - - ch_field: A previously generated channel_id field (i.e. from the get_ch flage) can be given here, and the value of this specific channels is used. - - "single": !!!NOT IMPLEMENTED!!!. Channels are not combined, but result saved for each channel. field name gets channel id as suffix. - expr - The expression. That can be any mathematical equation/comparison. If mode == func, the expression needs to be a special processing function defined in modules (e.g. "modules.spm.get_energy). In the expression parameters from either hit, dsp, evt tier (from operations performed before this one! --> JSON operaions order matters), or from the "parameters" field can be used. - para - Dictionary of parameters defined in the "parameters" field in the configuration JSON file. - getch - Only affects "first", "last" modes. In that cases the rawid of the resulting values channel is returned as well. - """ - #define dimension of output array - n = nrows if nrows is not None else store.LH5Store().read_n_rows(chns[0]+"/dsp/",f_dsp) - out = np.full(n,defv,dtype=type(defv)) - out_chs = np.zeros(len(out),dtype=int) - - # find parameters in evt file or in parameters - exprl = re.findall(r"[a-zA-Z_$][\w$]*",expr) - var_ph = {} - if os.path.exists(f_evt):var_ph = store.load_nda(f_evt,[e.split('/')[-1] for e in store.ls(f_evt) if e.split('/')[-1] in exprl]) - if para: var_ph = var_ph | para - - if mode == "func": + +def evaluate_expression( + f_evt: str, + f_hit: str, + f_dsp: str, + chns: list, + mode: str, + expr: str, + para: dict = None, + defv=np.nan, + nrows: int = None, +) -> np.ndarray: + """ + Evaluates the expression defined by the user across all channels according to the mode + Parameters + ---------- + f_evt + Path to event tier file + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + chns + List of channel names across which expression gets evaluated (form: "ch") + mode + The mode determines how the event entry is calculated across channels. Options are: + - "first": The value of the channel in an event triggering first in time (according to tp_0_est) is returned. It is possible to add a condition (e.g. "first>10"). Only channels fulfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, nan is returned for this event. + - "last": The value of the channel in an event triggering last in time (according to tp_0_est) is returned. It is possible to add a condition (e.g. "last>10"). Only channels fulfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, nan is returned for this event. + - "tot": The sum of all channels across an event. It is possible to add a condition (e.g. "tot>10"). Only channels fulfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, zero is returned for this event. Booleans are treated as integers 0/1. + - "any": Logical or between all channels. Non boolean values are True for values != 0 and False for values == 0. + - "all": Logical and between all channels. Non boolean values are True for values != 0 and False for values == 0. + - ch_field: A previously generated channel_id field (i.e. from the get_ch flag) can be given here, and the value of this specific channels is used. + - "single": !!!NOT IMPLEMENTED!!!. Channels are not combined, but result saved for each channel. field name gets channel id as suffix. + expr + The expression. That can be any mathematical equation/comparison. If mode == func, the expression needs to be a special processing function defined in modules (e.g. "modules.spm.get_energy). In the expression parameters from either hit, dsp, evt tier (from operations performed before this one! --> JSON operations order matters), or from the "parameters" field can be used. + para + Dictionary of parameters defined in the "parameters" field in the configuration JSON file. + getch + Only affects "first", "last" modes. In that cases the rawid of the resulting values channel is returned as well. + """ + # define dimension of output array + n = ( + nrows + if nrows is not None + else store.LH5Store().read_n_rows(chns[0] + "/dsp/", f_dsp) + ) + out = np.full(n, defv, dtype=type(defv)) + out_chs = np.zeros(len(out), dtype=int) + outt = np.zeros(len(out)) + + # find parameters in evt file or in parameters + exprl = re.findall(r"[a-zA-Z_$][\w$]*", expr) + var_ph = {} + if os.path.exists(f_evt): + var_ph = store.load_nda( + f_evt, + [e.split("/")[-1] for e in store.ls(f_evt) if e.split("/")[-1] in exprl], + ) + if para: + var_ph = var_ph | para + + if mode == "func": # evaluate expression - func, params = expr.split('(') - params = [f_hit,f_dsp,chns]+[num_and_pars(e,var_ph) for e in params[:-1].split(',')] + func, params = expr.split("(") + params = [f_hit, f_dsp, chns] + [ + num_and_pars(e, var_ph) for e in params[:-1].split(",") + ] # load function dynamically - p,m = func.rsplit('.',1) - met = getattr(import_module(p),m) + p, m = func.rsplit(".", 1) + met = getattr(import_module(p), m) out = met(*params) - else: + else: # evaluate operator in mode - ops = re.findall(r'([<>]=?|==)', mode) + ops = re.findall(r"([<>]=?|==)", mode) ch_comp = None if os.path.exists(f_evt) and mode in store.ls(f_evt): - ch_comp = store.load_nda(f_evt,[mode])[mode] + ch_comp = store.load_nda(f_evt, [mode])[mode] + for ch in chns: # find fields in either dsp, hit - var = store.load_nda(f_hit,[e.split('/')[-1] for e in store.ls(f_hit,ch+"/hit/") if e.split('/')[-1] in exprl],ch+"/hit/") - dsp_dic = store.load_nda(f_dsp,[e.split('/')[-1] for e in store.ls(f_dsp,ch+"/dsp/") if e.split('/')[-1] in exprl],ch+"/dsp/") - var= var |dsp_dic | var_ph - + var = store.load_nda( + f_hit, + [ + e.split("/")[-1] + for e in store.ls(f_hit, ch + "/hit/") + if e.split("/")[-1] in exprl + ], + ch + "/hit/", + ) + dsp_dic = store.load_nda( + f_dsp, + [ + e.split("/")[-1] + for e in store.ls(f_dsp, ch + "/dsp/") + if e.split("/")[-1] in exprl + ], + ch + "/dsp/", + ) + var = dsp_dic | var_ph | var + # evaluate expression - res = eval(expr,var) - if not isinstance(res, np.ndarray): res = np.full(len(out),res,dtype=type(res)) + res = eval(expr, var) + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) # append to out according to mode - if len(ops)>0: limarr = eval("".join(["res",ops[0],"lim"]),{"res":res,"lim":float(mode.split(ops[0])[-1])}) - else: limarr = np.ones(len(out)).astype(bool) + if len(ops) > 0: + limarr = eval( + "".join(["res", ops[0], "lim"]), + {"res": res, "lim": float(mode.split(ops[0])[-1])}, + ) + else: + limarr = np.ones(len(out)).astype(bool) if "first" in mode: - outt = np.full(len(out),np.inf) - t0 = store.load_nda(f_dsp,["tp_0_est"],ch+"/dsp/")["tp_0_est"] - out = np.where((t0outt) & (limarr),res,out) - out_chs = np.where((t0>outt) & (limarr),int(ch[2:]),out_chs) - outt = np.where((t0>outt) & (limarr),t0,outt) + t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/")["tp_0_est"] + out = np.where((t0 > outt) & (limarr), res, out) + out_chs = np.where((t0 > outt) & (limarr), int(ch[2:]), out_chs) + outt = np.where((t0 > outt) & (limarr), t0, outt) elif "tot" in mode: - if res.dtype == bool: res = res.astype(int) - out += np.where(limarr,res,out) + if res.dtype == bool: + res = res.astype(int) + out += np.where(limarr, res, out) elif mode == "any": - if res.dtype != bool: res = res.astype(bool) - out = out | res + if res.dtype != bool: + res = res.astype(bool) + out = out | res elif mode == "all": - if res.dtype != bool: res = res.astype(bool) - out = out & res + if res.dtype != bool: + res = res.astype(bool) + out = out & res elif ch_comp is not None: - out = np.where(int(ch[2:]) == ch_comp,res,out) + out = np.where(int(ch[2:]) == ch_comp, res, out) else: raise ValueError(mode + " not a valid mode") - return out, out_chs - + return out, out_chs + + def build_evt( - f_dsp: str, - f_hit: str, - f_evt: str, - meta_path: str = None, - evt_config: str | dict = None, - wo_mode: str = "write_safe" + f_dsp: str, + f_hit: str, + f_evt: str, + meta_path: str = None, + evt_config: str | dict = None, + wo_mode: str = "write_safe", ) -> None: """ Transform data from the hit and dsp levels which a channel sorted @@ -155,8 +204,8 @@ def build_evt( f_evt name of the output file evt_config - dictionary or name of JSON file defining evt fields. Channel lists can be defined by the user or by using the keyword "meta" followed by the system (geds/spms) and the usability (on,no_psd,ac,off) seperated by underscores (e.g. "meta_geds_on") in the "channels" dictonary. The "operations" dictionary defines the fields (name=key), where "channels" specifies the channels used to for this field (either a string or a list of strings), "mode" defines how the channels should be combined (see evaluate_expression). For first/last modes a "get_ch" flag can be defined, if true an additional field with the sufix "_id" is returned containing the rawid of the respective value in the field without the suffix. "expression" defnies the mathematical/special function to apply (see evaluate_expression), "parameters" defines any other parameter used in expression For example: - + dictionary or name of JSON file defining evt fields. Channel lists can be defined by the user or by using the keyword "meta" followed by the system (geds/spms) and the usability (on,no_psd,ac,off) separated by underscores (e.g. "meta_geds_on") in the "channels" dictionary. The "operations" dictionary defines the fields (name=key), where "channels" specifies the channels used to for this field (either a string or a list of strings), "mode" defines how the channels should be combined (see evaluate_expression). For first/last modes a "get_ch" flag can be defined, if true an additional field with the sufix "_id" is returned containing the rawid of the respective value in the field without the suffix. "expression" defnies the mathematical/special function to apply (see evaluate_expression), "parameters" defines any other parameter used in expression For example: + .. code-block::json { @@ -175,24 +224,28 @@ def build_evt( "channels": ["geds_on","geds_no_psd","geds_ac"], "mode": "first>25", "get_ch": true, - "expression": "cuspEmax_ctc_cal" + "expression": "cuspEmax_ctc_cal", + "initial": "np.nan" }, "aoe":{ "channels": ["geds_on"], "mode": "energy_id", - "expression": "AoE_Classifier" + "expression": "AoE_Classifier", + "initial": "np.nan" }, "is_muon_tagged":{ "channels": "muon", "mode": "any", "expression": "wf_max>a", - "parameters": {"a":15100} + "parameters": {"a":15100}, + "initial": false }, "multiplicity":{ "channels": ["geds_on","geds_no_psd","geds_ac"], "mode": "tot", "expression": "cuspEmax_ctc_cal > a", - "parameters": {"a":25} + "parameters": {"a":25}, + "initial": 0 }, "lar_energy":{ "channels": "spms_on", @@ -204,75 +257,110 @@ def build_evt( """ lstore = store.LH5Store() tbl_cfg = evt_config - if isinstance(tbl_cfg,str): + if isinstance(tbl_cfg, str): with open(tbl_cfg) as f: - tbl_cfg = json.load(f) - + tbl_cfg = json.load(f) + # create channel list according to config - # This can be either read from the meta data + # This can be either read from the meta data # or a list of channel names log.debug("Creating channel dictionary") - if meta_path: lmeta = LegendMetadata(path=meta_path) - else: lmeta = LegendMetadata() - chmap = lmeta.channelmap(re.search("\d{8}T\d{6}Z",f_dsp).group(0)) + if meta_path: + lmeta = LegendMetadata(path=meta_path) + else: + lmeta = LegendMetadata() + chmap = lmeta.channelmap(re.search(r"\d{8}T\d{6}Z", f_dsp).group(0)) chns = {} - for k, v in tbl_cfg['channels'].items(): - if isinstance(v,str): + for k, v in tbl_cfg["channels"].items(): + if isinstance(v, str): if "meta" in v: - m,sys,usa = v.split("_",2) - tmp = [f"ch{e}" for e in chmap.map("daq.rawid") if chmap.map("daq.rawid")[e]['system'] == sys] - chns[k] = [e for e in tmp if chmap.map("daq.rawid")[int(e[2:])]['analysis']['usability'] == usa] + m, sys, usa = v.split("_", 2) + tmp = [ + f"ch{e}" + for e in chmap.map("daq.rawid") + if chmap.map("daq.rawid")[e]["system"] == sys + ] + chns[k] = [ + e + for e in tmp + if chmap.map("daq.rawid")[int(e[2:])]["analysis"]["usability"] + == usa + ] else: - chns[k] = [f"ch{chmap.map('name')[v]['daq']['rawid']}"] - elif isinstance(v,list): - chns[k] = [f"ch{chmap.map('name')[e]['daq']['rawid']}" for e in v] - + chns[k] = [f"ch{chmap.map('name')[v]['daq']['rawid']}"] + elif isinstance(v, list): + chns[k] = [f"ch{chmap.map('name')[e]['daq']['rawid']}" for e in v] # do operations - first_iter,nrows = True,None - log.info(f"Applying'{len(tbl_cfg['operations'].keys())} operations' to dsp file {f_dsp} and hit file {f_hit} to create evt file {f_evt}") - for k, v in tbl_cfg['operations'].items(): - log.debug("Processing field" + k) + first_iter, nrows = True, None + log.info( + f"Applying'{len(tbl_cfg['operations'].keys())} operations' to dsp file {f_dsp} and hit file {f_hit} to create evt file {f_evt}" + ) + for k, v in tbl_cfg["operations"].items(): + log.debug("Processing field" + k) - # if channels not defined in operation, it can only be an operation on the evt level. - if 'channels' not in v.keys(): - exprl = re.findall(r"[a-zA-Z_$][\w$]*",v["expression"]) + # if channels not defined in operation, it can only be an operation on the evt level. + if "channels" not in v.keys(): + exprl = re.findall(r"[a-zA-Z_$][\w$]*", v["expression"]) var = {} - if os.path.exists(f_evt):var = store.load_nda(f_evt,[e.split('/')[-1] for e in store.ls(f_evt) if e.split('/')[-1] in exprl]) - if "parameters" in v.keys(): var = var | v['parameters'] - res = Array(eval(v["expression"],var)) + if os.path.exists(f_evt): + var = store.load_nda( + f_evt, + [ + e.split("/")[-1] + for e in store.ls(f_evt) + if e.split("/")[-1] in exprl + ], + ) + if "parameters" in v.keys(): + var = var | v["parameters"] + res = Array(eval(v["expression"], var)) lstore.write_object( obj=res, - name= k, + name=k, lh5_file=f_evt, - wo_mode=wo_mode #if first_iter else "append" + wo_mode=wo_mode, # if first_iter else "append" ) - else: - if isinstance(v['channels'],str): chns_e = chns[v['channels']] - elif isinstance(v['channels'],list): chns_e = list(itertools.chain.from_iterable( [chns[e] for e in v['channels']])) + else: + if isinstance(v["channels"], str): + chns_e = chns[v["channels"]] + elif isinstance(v["channels"], list): + chns_e = list( + itertools.chain.from_iterable([chns[e] for e in v["channels"]]) + ) - pars, defaultv = None , np.nan - if "parameters" in v.keys(): pars = v['parameters'] - if "initial" in v.keys() and not v['initial'] == "np.nan" : defaultv = v['initial'] + pars, defaultv = None, np.nan + if "parameters" in v.keys(): + pars = v["parameters"] + if "initial" in v.keys() and not v["initial"] == "np.nan": + defaultv = v["initial"] - res,chs = evaluate_expression(f_evt,f_hit,f_dsp,chns_e,v['mode'],v['expression'],pars,defaultv,nrows) - lstore.write_object( - obj=Array(res), - name= k, - lh5_file=f_evt, - wo_mode=wo_mode + res, chs = evaluate_expression( + f_evt, + f_hit, + f_dsp, + chns_e, + v["mode"], + v["expression"], + pars, + defaultv, + nrows, ) + lstore.write_object(obj=Array(res), name=k, lh5_file=f_evt, wo_mode=wo_mode) - # if get_ch true flag in a first/last mode operation also obtain channel field - if "get_ch" in v.keys() and ("first" in v['mode'] or "last" in v['mode']) and v["get_ch"]: + # if get_ch true flag in a first/last mode operation also obtain channel field + if ( + "get_ch" in v.keys() + and ("first" in v["mode"] or "last" in v["mode"]) + and v["get_ch"] + ): lstore.write_object( - obj=Array(chs), - name= k+"_id", - lh5_file=f_evt, - wo_mode=wo_mode + obj=Array(chs), name=k + "_id", lh5_file=f_evt, wo_mode=wo_mode ) - if first_iter: first_iter = False - if not nrows: nrows = len(res) + if first_iter: + first_iter = False + if not nrows: + nrows = len(res) - log.info("Done") \ No newline at end of file + log.info("Done") diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 04956bec6..4722ded87 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -1,6 +1,3 @@ -import numpy as np -import pygama.lgdo.lh5_store as store - """ Module for special event level routines for SiPMs @@ -10,74 +7,182 @@ - list of channels processed additional parameters are free to the user and need to be defined in the JSON """ -#get LAr energy per event over all channels -def get_energy(f_hit,f_dsp,chs,lim,trgr,tdefault,tmin,tmax): - trig = np.where(np.isnan(trgr),tdefault,trgr) + +import numpy as np + +import pygama.lgdo.lh5_store as store + + +# get LAr energy per event over all channels +def get_energy(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): + trig = np.where(np.isnan(trgr), tdefault, trgr) tmi = trig - tmin tma = trig + tmax sum = np.zeros(len(trig)) for ch in chs: - df =store.load_nda(f_hit, ["energy_in_pe","is_valid_hit",'trigger_pos'],ch+"/hit/") - mask = (df["trigger_pos"]tmi[:,None]/16) & (df["energy_in_pe"] > lim) - pes=df["energy_in_pe"] - pes= np.where(np.isnan(pes), 0, pes) - pes= np.where(mask,pes,0) - chsum= np.nansum(pes, axis=1) + df = store.load_nda(f_hit, ["energy_in_pe", "trigger_pos"], ch + "/hit/") + mask = ( + (df["trigger_pos"] < tma[:, None] / 16) + & (df["trigger_pos"] > tmi[:, None] / 16) + & (df["energy_in_pe"] > lim) + ) + pes = df["energy_in_pe"] + pes = np.where(np.isnan(pes), 0, pes) + pes = np.where(mask, pes, 0) + chsum = np.nansum(pes, axis=1) sum = sum + chsum return sum -#get LAr majority per event over all channels -def get_majority(f_hit,f_dsp,chs,lim,trgr,tdefault,tmin,tmax): - trig = np.where(np.isnan(trgr),tdefault,trgr) + +# get LAr majority per event over all channels +def get_majority(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): + trig = np.where(np.isnan(trgr), tdefault, trgr) tmi = trig - tmin tma = trig + tmax maj = np.zeros(len(trig)) for ch in chs: - df =store.load_nda(f_hit, ["energy_in_pe","is_valid_hit",'trigger_pos'],ch+"/hit/") - mask = (df["trigger_pos"]tmi[:,None]/16) & (df["energy_in_pe"] > lim) - pes=df["energy_in_pe"] - pes= np.where(np.isnan(pes), 0, pes) - pes= np.where(mask,pes,0) - chsum= np.nansum(pes, axis=1) - chmaj = np.where(chsum>lim,1,0) + df = store.load_nda(f_hit, ["energy_in_pe", "trigger_pos"], ch + "/hit/") + mask = ( + (df["trigger_pos"] < tma[:, None] / 16) + & (df["trigger_pos"] > tmi[:, None] / 16) + & (df["energy_in_pe"] > lim) + ) + pes = df["energy_in_pe"] + pes = np.where(np.isnan(pes), 0, pes) + pes = np.where(mask, pes, 0) + chsum = np.nansum(pes, axis=1) + chmaj = np.where(chsum > lim, 1, 0) maj = maj + chmaj return maj -def get_etc(f_hit,f_dsp,chs,lim,trgr,tdefault,tmin,tmax,swin,trail): - predf = store.load_nda(f_hit, ["energy_in_pe",'timestamp'],chs[0]+"/hit/") - - peshape = (predf["energy_in_pe"]).shape - # 1D = channel, 2D = event num, 3D = array per event - pes=np.zeros([len(chs),peshape[0],peshape[1]]) - times = np.zeros([len(chs),peshape[0],peshape[1]]) - - tge = np.where(np.isnan(trgr),tdefault,trgr) - tmi = tge - tmin - tma = tge + tmax - for i in range(len(chs)): - df =store.load_nda(f_hit, ["energy_in_pe",'trigger_pos','timestamp'],chs[i]+"/hit/") - mask = (df["trigger_pos"]tmi[:,None]/16) & (df["energy_in_pe"] > lim) - pe=df["energy_in_pe"] - time = df["trigger_pos"]*16 - - pe= np.where(mask,pe,np.nan) - time= np.where(mask,time,np.nan) - - pes[i] = pe - times[i] = time - - outi = None - if trail >0: - t1d = np.nanmin(times,axis=(0,2)) - if trail == 2: t1d[t1d>tge] = tge[t1d>tge] - tt = t1d[:,None] - outi = np.where(np.nansum(np.where((times >= tt),pes,0),axis=(0,2)) > 0, - np.nansum(np.where((times >= tt) & (times < tt+swin),pes,0),axis=(0,2))/np.nansum(np.where((times >= tt),pes,0),axis=(0,2)), - np.nansum(np.where((times >= tt),pes,0),axis=(0,2))) - return outi - - else: - outi = np.where(np.nansum(pes,axis=(0,2)) > 0, - np.nansum(np.where((times >= tge[:,None]) & (times <= tge[:,None]+swin),pes,0),axis=(0,2))/np.nansum(np.where((times >= tge[:,None]),pes,0),axis=(0,2)), - np.nansum(pes,axis=(0,2))) - return outi \ No newline at end of file + +# get LAr energy per event over all channels +def get_energy_dplms(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): + trig = np.where(np.isnan(trgr), tdefault, trgr) + tmi = trig - tmin + tma = trig + tmax + sum = np.zeros(len(trig)) + for ch in chs: + df = store.load_nda( + f_hit, ["energy_in_pe_dplms", "trigger_pos_dplms"], ch + "/hit/" + ) + mask = ( + (df["trigger_pos_dplms"] < tma[:, None] / 16) + & (df["trigger_pos_dplms"] > tmi[:, None] / 16) + & (df["energy_in_pe_dplms"] > lim) + ) + pes = df["energy_in_pe_dplms"] + pes = np.where(np.isnan(pes), 0, pes) + pes = np.where(mask, pes, 0) + chsum = np.nansum(pes, axis=1) + sum = sum + chsum + return sum + + +# get LAr majority per event over all channels +def get_majority_dplms(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): + trig = np.where(np.isnan(trgr), tdefault, trgr) + tmi = trig - tmin + tma = trig + tmax + maj = np.zeros(len(trig)) + for ch in chs: + df = store.load_nda( + f_hit, ["energy_in_pe_dplms", "trigger_pos_dplms"], ch + "/hit/" + ) + mask = ( + (df["trigger_pos_dplms"] < tma[:, None] / 16) + & (df["trigger_pos_dplms"] > tmi[:, None] / 16) + & (df["energy_in_pe_dplms"] > lim) + ) + pes = df["energy_in_pe_dplms"] + pes = np.where(np.isnan(pes), 0, pes) + pes = np.where(mask, pes, 0) + chsum = np.nansum(pes, axis=1) + chmaj = np.where(chsum > lim, 1, 0) + maj = maj + chmaj + return maj + + +def get_etc(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax, swin, trail): + predf = store.load_nda(f_hit, ["energy_in_pe", "timestamp"], chs[0] + "/hit/") + + peshape = (predf["energy_in_pe"]).shape + # 1D = channel, 2D = event num, 3D = array per event + pes = np.zeros([len(chs), peshape[0], peshape[1]]) + times = np.zeros([len(chs), peshape[0], peshape[1]]) + + tge = np.where(np.isnan(trgr), tdefault, trgr) + tmi = tge - tmin + tma = tge + tmax + for i in range(len(chs)): + df = store.load_nda( + f_hit, ["energy_in_pe", "trigger_pos", "timestamp"], chs[i] + "/hit/" + ) + mask = ( + (df["trigger_pos"] < tma[:, None] / 16) + & (df["trigger_pos"] > tmi[:, None] / 16) + & (df["energy_in_pe"] > lim) + ) + pe = df["energy_in_pe"] + time = df["trigger_pos"] * 16 + + pe = np.where(mask, pe, np.nan) + time = np.where(mask, time, np.nan) + + pes[i] = pe + times[i] = time + + outi = None + if trail > 0: + t1d = np.nanmin(times, axis=(0, 2)) + if trail == 2: + t1d[t1d > tge] = tge[t1d > tge] + tt = t1d[:, None] + outi = np.where( + np.nansum(np.where((times >= tt), pes, 0), axis=(0, 2)) > 0, + np.nansum( + np.where((times >= tt) & (times < tt + swin), pes, 0), axis=(0, 2) + ) + / np.nansum(np.where((times >= tt), pes, 0), axis=(0, 2)), + np.nansum(np.where((times >= tt), pes, 0), axis=(0, 2)), + ) + return outi + + else: + outi = np.where( + np.nansum(pes, axis=(0, 2)) > 0, + np.nansum( + np.where( + (times >= tge[:, None]) & (times <= tge[:, None] + swin), pes, 0 + ), + axis=(0, 2), + ) + / np.nansum(np.where((times >= tge[:, None]), pes, 0), axis=(0, 2)), + np.nansum(pes, axis=(0, 2)), + ) + return outi + + +def get_time_shift(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): + predf = store.load_nda(f_hit, ["energy_in_pe"], chs[0] + "/hit/") + peshape = (predf["energy_in_pe"]).shape + times = np.zeros([len(chs), peshape[0], peshape[1]]) + + tge = np.where(np.isnan(trgr), tdefault, trgr) + tmi = tge - tmin + tma = tge + tmax + for i in range(len(chs)): + df = store.load_nda(f_hit, ["energy_in_pe", "trigger_pos"], chs[i] + "/hit/") + mask = ( + (df["trigger_pos"] < tma[:, None] / 16) + & (df["trigger_pos"] > tmi[:, None] / 16) + & (df["energy_in_pe"] > lim) + ) + + time = df["trigger_pos"] * 16 + time = np.where(mask, time, np.nan) + times[i] = time + + t1d = np.nanmin(times, axis=(0, 2)) + + return t1d - tge From 90d3516e986738f92bb59644447926a9e937ff57 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Mon, 9 Oct 2023 17:04:43 +0200 Subject: [PATCH 093/191] Add TCM event building --- src/pygama/evt/build_evt.py | 74 +++++++++++++++++++++++-------------- 1 file changed, 46 insertions(+), 28 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 92af205ca..2caa7c022 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -38,20 +38,23 @@ def num_and_pars(value: str, par_dic: dict): def evaluate_expression( + f_tcm: str, f_evt: str, f_hit: str, f_dsp: str, chns: list, mode: str, expr: str, + nrows: int, para: dict = None, defv=np.nan, - nrows: int = None, ) -> np.ndarray: """ Evaluates the expression defined by the user across all channels according to the mode Parameters ---------- + f_tcm + Path to tcm tier file f_evt Path to event tier file f_hit @@ -77,12 +80,7 @@ def evaluate_expression( Only affects "first", "last" modes. In that cases the rawid of the resulting values channel is returned as well. """ # define dimension of output array - n = ( - nrows - if nrows is not None - else store.LH5Store().read_n_rows(chns[0] + "/dsp/", f_dsp) - ) - out = np.full(n, defv, dtype=type(defv)) + out = np.full(nrows, defv, dtype=type(defv)) out_chs = np.zeros(len(out), dtype=int) outt = np.zeros(len(out)) @@ -115,8 +113,17 @@ def evaluate_expression( ch_comp = None if os.path.exists(f_evt) and mode in store.ls(f_evt): ch_comp = store.load_nda(f_evt, [mode])[mode] + + # load TCM data to define an event + nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') + ids =nda['array_id'] + idx =nda['array_idx'] + # cl = nda['cumulative_length'] for ch in chns: + # get index list for this channel to be loaded + idx_ch = idx[ids==int(ch[2:])] + # find fields in either dsp, hit var = store.load_nda( f_hit, @@ -126,6 +133,7 @@ def evaluate_expression( if e.split("/")[-1] in exprl ], ch + "/hit/", + idx_ch ) dsp_dic = store.load_nda( f_dsp, @@ -135,48 +143,54 @@ def evaluate_expression( if e.split("/")[-1] in exprl ], ch + "/dsp/", + idx_ch ) var = dsp_dic | var_ph | var # evaluate expression res = eval(expr, var) + + # if it is not a nparray it could be a single value + # expand accordingly if not isinstance(res, np.ndarray): res = np.full(len(out), res, dtype=type(res)) - # append to out according to mode + # get unification condition if present in mode if len(ops) > 0: limarr = eval( "".join(["res", ops[0], "lim"]), {"res": res, "lim": float(mode.split(ops[0])[-1])}, ) else: - limarr = np.ones(len(out)).astype(bool) + limarr = np.ones(len(res)).astype(bool) + + # append to out according to mode if "first" in mode: if ch == chns[0]: outt[:] = np.inf - t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/")["tp_0_est"] - out = np.where((t0 < outt) & (limarr), res, out) - out_chs = np.where((t0 < outt) & (limarr), int(ch[2:]), out_chs) - outt = np.where((t0 < outt) & (limarr), t0, outt) + t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/",idx_ch)["tp_0_est"] + out[idx_ch] = np.where((t0 < outt) & (limarr), res, out[idx_ch]) + out_chs[idx_ch] = np.where((t0 < outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) + outt[idx_ch] = np.where((t0 < outt) & (limarr), t0, outt[idx_ch]) elif "last" in mode: - t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/")["tp_0_est"] - out = np.where((t0 > outt) & (limarr), res, out) - out_chs = np.where((t0 > outt) & (limarr), int(ch[2:]), out_chs) - outt = np.where((t0 > outt) & (limarr), t0, outt) + t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/",idx_ch)["tp_0_est"] + out[idx_ch] = np.where((t0 > outt) & (limarr), res, out[idx_ch]) + out_chs[idx_ch] = np.where((t0 > outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) + outt[idx_ch] = np.where((t0 > outt) & (limarr), t0, outt[idx_ch]) elif "tot" in mode: if res.dtype == bool: res = res.astype(int) - out += np.where(limarr, res, out) + out[idx_ch] = np.where(limarr, res+out[idx_ch], out[idx_ch]) elif mode == "any": if res.dtype != bool: res = res.astype(bool) - out = out | res + out[idx_ch] = out[idx_ch] | res elif mode == "all": if res.dtype != bool: res = res.astype(bool) - out = out & res + out[idx_ch] = out[idx_ch] & res elif ch_comp is not None: - out = np.where(int(ch[2:]) == ch_comp, res, out) + out[idx_ch] = np.where(int(ch[2:]) == ch_comp, res, out[idx_ch]) else: raise ValueError(mode + " not a valid mode") @@ -184,6 +198,7 @@ def evaluate_expression( def build_evt( + f_tcm: str, f_dsp: str, f_hit: str, f_evt: str, @@ -197,6 +212,8 @@ def build_evt( Parameters ---------- + f_tcm + input LH5 file of the tcm level f_dsp input LH5 file of the dsp level f_hit @@ -292,9 +309,12 @@ def build_evt( chns[k] = [f"ch{chmap.map('name')[e]['daq']['rawid']}" for e in v] # do operations - first_iter, nrows = True, None + first_iter = True + + # get number of rows from TCM file + nrows = len(store.load_nda(f_tcm,['cumulative_length'],'hardware_tcm_1/')['cumulative_length']) log.info( - f"Applying'{len(tbl_cfg['operations'].keys())} operations' to dsp file {f_dsp} and hit file {f_hit} to create evt file {f_evt}" + f"Applying {len(tbl_cfg['operations'].keys())} operations to key {f_tcm.split('-')[-2]}" ) for k, v in tbl_cfg["operations"].items(): log.debug("Processing field" + k) @@ -336,15 +356,16 @@ def build_evt( defaultv = v["initial"] res, chs = evaluate_expression( + f_tcm, f_evt, f_hit, f_dsp, chns_e, v["mode"], v["expression"], - pars, - defaultv, nrows, + pars, + defaultv ) lstore.write_object(obj=Array(res), name=k, lh5_file=f_evt, wo_mode=wo_mode) @@ -360,7 +381,4 @@ def build_evt( if first_iter: first_iter = False - if not nrows: - nrows = len(res) - log.info("Done") From 21cfba12c13ded819689903b84eb4a3eb679cb0e Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Mon, 9 Oct 2023 18:04:45 +0200 Subject: [PATCH 094/191] adapted spm module to tcm based event building --- src/pygama/evt/build_evt.py | 2 +- src/pygama/evt/modules/spm.py | 77 ++++++++++++++++++++++++++--------- 2 files changed, 59 insertions(+), 20 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 2caa7c022..114d8c14a 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -98,7 +98,7 @@ def evaluate_expression( if mode == "func": # evaluate expression func, params = expr.split("(") - params = [f_hit, f_dsp, chns] + [ + params = [f_hit, f_dsp, f_tcm, chns] + [ num_and_pars(e, var_ph) for e in params[:-1].split(",") ] diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 4722ded87..5f6ba4e05 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -4,6 +4,7 @@ functions must take as the first 3 args in order: - path to the hit file - path to the dsp file +- path to the tcm file - list of channels processed additional parameters are free to the user and need to be defined in the JSON """ @@ -14,13 +15,19 @@ # get LAr energy per event over all channels -def get_energy(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): +def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): trig = np.where(np.isnan(trgr), tdefault, trgr) tmi = trig - tmin tma = trig + tmax sum = np.zeros(len(trig)) + # load TCM data to define an event + nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') + ids =nda['array_id'] + idx =nda['array_idx'] for ch in chs: - df = store.load_nda(f_hit, ["energy_in_pe", "trigger_pos"], ch + "/hit/") + # get index list for this channel to be loaded + idx_ch = idx[ids==int(ch[2:])] + df = store.load_nda(f_hit, ["energy_in_pe", "trigger_pos"], ch + "/hit/",idx_ch) mask = ( (df["trigger_pos"] < tma[:, None] / 16) & (df["trigger_pos"] > tmi[:, None] / 16) @@ -30,18 +37,24 @@ def get_energy(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): pes = np.where(np.isnan(pes), 0, pes) pes = np.where(mask, pes, 0) chsum = np.nansum(pes, axis=1) - sum = sum + chsum + sum[idx_ch] = sum[idx_ch] + chsum return sum # get LAr majority per event over all channels -def get_majority(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): +def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): trig = np.where(np.isnan(trgr), tdefault, trgr) tmi = trig - tmin tma = trig + tmax maj = np.zeros(len(trig)) + # load TCM data to define an event + nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') + ids =nda['array_id'] + idx =nda['array_idx'] for ch in chs: - df = store.load_nda(f_hit, ["energy_in_pe", "trigger_pos"], ch + "/hit/") + # get index list for this channel to be loaded + idx_ch = idx[ids==int(ch[2:])] + df = store.load_nda(f_hit, ["energy_in_pe", "trigger_pos"], ch + "/hit/",idx_ch) mask = ( (df["trigger_pos"] < tma[:, None] / 16) & (df["trigger_pos"] > tmi[:, None] / 16) @@ -52,19 +65,25 @@ def get_majority(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): pes = np.where(mask, pes, 0) chsum = np.nansum(pes, axis=1) chmaj = np.where(chsum > lim, 1, 0) - maj = maj + chmaj + maj[idx_ch] = maj[idx_ch] + chmaj return maj # get LAr energy per event over all channels -def get_energy_dplms(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): +def get_energy_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): trig = np.where(np.isnan(trgr), tdefault, trgr) tmi = trig - tmin tma = trig + tmax sum = np.zeros(len(trig)) + # load TCM data to define an event + nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') + ids =nda['array_id'] + idx =nda['array_idx'] for ch in chs: + # get index list for this channel to be loaded + idx_ch = idx[ids==int(ch[2:])] df = store.load_nda( - f_hit, ["energy_in_pe_dplms", "trigger_pos_dplms"], ch + "/hit/" + f_hit, ["energy_in_pe_dplms", "trigger_pos_dplms"], ch + "/hit/", idx_ch ) mask = ( (df["trigger_pos_dplms"] < tma[:, None] / 16) @@ -75,19 +94,25 @@ def get_energy_dplms(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): pes = np.where(np.isnan(pes), 0, pes) pes = np.where(mask, pes, 0) chsum = np.nansum(pes, axis=1) - sum = sum + chsum + sum[idx_ch] = sum[idx_ch] + chsum return sum # get LAr majority per event over all channels -def get_majority_dplms(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): +def get_majority_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): trig = np.where(np.isnan(trgr), tdefault, trgr) tmi = trig - tmin tma = trig + tmax maj = np.zeros(len(trig)) + # load TCM data to define an event + nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') + ids =nda['array_id'] + idx =nda['array_idx'] for ch in chs: + # get index list for this channel to be loaded + idx_ch = idx[ids==int(ch[2:])] df = store.load_nda( - f_hit, ["energy_in_pe_dplms", "trigger_pos_dplms"], ch + "/hit/" + f_hit, ["energy_in_pe_dplms", "trigger_pos_dplms"], ch + "/hit/",idx_ch ) mask = ( (df["trigger_pos_dplms"] < tma[:, None] / 16) @@ -99,11 +124,11 @@ def get_majority_dplms(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): pes = np.where(mask, pes, 0) chsum = np.nansum(pes, axis=1) chmaj = np.where(chsum > lim, 1, 0) - maj = maj + chmaj + maj[idx_ch] = maj[idx_ch] + chmaj return maj -def get_etc(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax, swin, trail): +def get_etc(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, trail): predf = store.load_nda(f_hit, ["energy_in_pe", "timestamp"], chs[0] + "/hit/") peshape = (predf["energy_in_pe"]).shape @@ -114,9 +139,16 @@ def get_etc(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax, swin, trail): tge = np.where(np.isnan(trgr), tdefault, trgr) tmi = tge - tmin tma = tge + tmax + + # load TCM data to define an event + nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') + ids =nda['array_id'] + idx =nda['array_idx'] for i in range(len(chs)): + # get index list for this channel to be loaded + idx_ch = idx[ids==int(chs[i][2:])] df = store.load_nda( - f_hit, ["energy_in_pe", "trigger_pos", "timestamp"], chs[i] + "/hit/" + f_hit, ["energy_in_pe", "trigger_pos", "timestamp"], chs[i] + "/hit/", idx_ch ) mask = ( (df["trigger_pos"] < tma[:, None] / 16) @@ -129,8 +161,8 @@ def get_etc(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax, swin, trail): pe = np.where(mask, pe, np.nan) time = np.where(mask, time, np.nan) - pes[i] = pe - times[i] = time + pes[i][idx_ch] = pe + times[i][idx_ch] = time outi = None if trail > 0: @@ -163,7 +195,7 @@ def get_etc(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax, swin, trail): return outi -def get_time_shift(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): +def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): predf = store.load_nda(f_hit, ["energy_in_pe"], chs[0] + "/hit/") peshape = (predf["energy_in_pe"]).shape times = np.zeros([len(chs), peshape[0], peshape[1]]) @@ -171,8 +203,15 @@ def get_time_shift(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): tge = np.where(np.isnan(trgr), tdefault, trgr) tmi = tge - tmin tma = tge + tmax + + # load TCM data to define an event + nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') + ids =nda['array_id'] + idx =nda['array_idx'] for i in range(len(chs)): - df = store.load_nda(f_hit, ["energy_in_pe", "trigger_pos"], chs[i] + "/hit/") + # get index list for this channel to be loaded + idx_ch = idx[ids==int(chs[i][2:])] + df = store.load_nda(f_hit, ["energy_in_pe", "trigger_pos"], chs[i] + "/hit/",idx_ch) mask = ( (df["trigger_pos"] < tma[:, None] / 16) & (df["trigger_pos"] > tmi[:, None] / 16) @@ -181,7 +220,7 @@ def get_time_shift(f_hit, f_dsp, chs, lim, trgr, tdefault, tmin, tmax): time = df["trigger_pos"] * 16 time = np.where(mask, time, np.nan) - times[i] = time + times[i][idx_ch] = time t1d = np.nanmin(times, axis=(0, 2)) From 7ff9151006b8fbc7a58e7cbff08f288671a192e5 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 11 Oct 2023 14:30:12 +0200 Subject: [PATCH 095/191] add lh5 group parameter --- src/pygama/evt/build_evt.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 114d8c14a..df922c9f0 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -46,6 +46,7 @@ def evaluate_expression( mode: str, expr: str, nrows: int, + group: str, para: dict = None, defv=np.nan, ) -> np.ndarray: @@ -74,10 +75,16 @@ def evaluate_expression( - "single": !!!NOT IMPLEMENTED!!!. Channels are not combined, but result saved for each channel. field name gets channel id as suffix. expr The expression. That can be any mathematical equation/comparison. If mode == func, the expression needs to be a special processing function defined in modules (e.g. "modules.spm.get_energy). In the expression parameters from either hit, dsp, evt tier (from operations performed before this one! --> JSON operations order matters), or from the "parameters" field can be used. + nrows + Number of rows to be processed. + group + lh5 root group name para Dictionary of parameters defined in the "parameters" field in the configuration JSON file. getch Only affects "first", "last" modes. In that cases the rawid of the resulting values channel is returned as well. + defv + default value of evaluation """ # define dimension of output array out = np.full(nrows, defv, dtype=type(defv)) @@ -90,7 +97,8 @@ def evaluate_expression( if os.path.exists(f_evt): var_ph = store.load_nda( f_evt, - [e.split("/")[-1] for e in store.ls(f_evt) if e.split("/")[-1] in exprl], + [e.split("/")[-1] for e in store.ls(f_evt,group) if e.split("/")[-1] in exprl], + group ) if para: var_ph = var_ph | para @@ -111,8 +119,8 @@ def evaluate_expression( # evaluate operator in mode ops = re.findall(r"([<>]=?|==)", mode) ch_comp = None - if os.path.exists(f_evt) and mode in store.ls(f_evt): - ch_comp = store.load_nda(f_evt, [mode])[mode] + if os.path.exists(f_evt) and mode in [e.split("/")[-1] for e in store.ls(f_evt,group)]: + ch_comp = store.load_nda(f_evt, [mode],group)[mode] # load TCM data to define an event nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') @@ -205,6 +213,7 @@ def build_evt( meta_path: str = None, evt_config: str | dict = None, wo_mode: str = "write_safe", + group: str = "/evt/", ) -> None: """ Transform data from the hit and dsp levels which a channel sorted @@ -271,6 +280,10 @@ def build_evt( } } } + wo_mode + writing mode + group + lh5 root group name """ lstore = store.LH5Store() tbl_cfg = evt_config @@ -328,16 +341,17 @@ def build_evt( f_evt, [ e.split("/")[-1] - for e in store.ls(f_evt) + for e in store.ls(f_evt,group) if e.split("/")[-1] in exprl ], + group ) if "parameters" in v.keys(): var = var | v["parameters"] res = Array(eval(v["expression"], var)) lstore.write_object( obj=res, - name=k, + name=group+k, lh5_file=f_evt, wo_mode=wo_mode, # if first_iter else "append" ) @@ -364,10 +378,11 @@ def build_evt( v["mode"], v["expression"], nrows, + group, pars, defaultv ) - lstore.write_object(obj=Array(res), name=k, lh5_file=f_evt, wo_mode=wo_mode) + lstore.write_object(obj=Array(res), name=group+k, lh5_file=f_evt, wo_mode=wo_mode) # if get_ch true flag in a first/last mode operation also obtain channel field if ( @@ -376,7 +391,7 @@ def build_evt( and v["get_ch"] ): lstore.write_object( - obj=Array(chs), name=k + "_id", lh5_file=f_evt, wo_mode=wo_mode + obj=Array(chs), name=group+k + "_id", lh5_file=f_evt, wo_mode=wo_mode ) if first_iter: From 099baeb78a5c3120cf2d00c8a90ec84bbda2a7e2 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 11 Oct 2023 14:37:43 +0200 Subject: [PATCH 096/191] ignoring useless numpy warnings in sipm module --- src/pygama/evt/build_evt.py | 71 +++++++++++++++++++++++------------ src/pygama/evt/modules/spm.py | 6 ++- 2 files changed, 52 insertions(+), 25 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index df922c9f0..5908c0a9a 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -97,8 +97,12 @@ def evaluate_expression( if os.path.exists(f_evt): var_ph = store.load_nda( f_evt, - [e.split("/")[-1] for e in store.ls(f_evt,group) if e.split("/")[-1] in exprl], - group + [ + e.split("/")[-1] + for e in store.ls(f_evt, group) + if e.split("/")[-1] in exprl + ], + group, ) if para: var_ph = var_ph | para @@ -119,18 +123,20 @@ def evaluate_expression( # evaluate operator in mode ops = re.findall(r"([<>]=?|==)", mode) ch_comp = None - if os.path.exists(f_evt) and mode in [e.split("/")[-1] for e in store.ls(f_evt,group)]: - ch_comp = store.load_nda(f_evt, [mode],group)[mode] - + if os.path.exists(f_evt) and mode in [ + e.split("/")[-1] for e in store.ls(f_evt, group) + ]: + ch_comp = store.load_nda(f_evt, [mode], group)[mode] + # load TCM data to define an event - nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') - ids =nda['array_id'] - idx =nda['array_idx'] + nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") + ids = nda["array_id"] + idx = nda["array_idx"] # cl = nda['cumulative_length'] for ch in chns: # get index list for this channel to be loaded - idx_ch = idx[ids==int(ch[2:])] + idx_ch = idx[ids == int(ch[2:])] # find fields in either dsp, hit var = store.load_nda( @@ -141,7 +147,7 @@ def evaluate_expression( if e.split("/")[-1] in exprl ], ch + "/hit/", - idx_ch + idx_ch, ) dsp_dic = store.load_nda( f_dsp, @@ -151,7 +157,7 @@ def evaluate_expression( if e.split("/")[-1] in exprl ], ch + "/dsp/", - idx_ch + idx_ch, ) var = dsp_dic | var_ph | var @@ -171,24 +177,32 @@ def evaluate_expression( ) else: limarr = np.ones(len(res)).astype(bool) - + # append to out according to mode if "first" in mode: if ch == chns[0]: outt[:] = np.inf - t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/",idx_ch)["tp_0_est"] + t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/", idx_ch)[ + "tp_0_est" + ] out[idx_ch] = np.where((t0 < outt) & (limarr), res, out[idx_ch]) - out_chs[idx_ch] = np.where((t0 < outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) + out_chs[idx_ch] = np.where( + (t0 < outt) & (limarr), int(ch[2:]), out_chs[idx_ch] + ) outt[idx_ch] = np.where((t0 < outt) & (limarr), t0, outt[idx_ch]) elif "last" in mode: - t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/",idx_ch)["tp_0_est"] + t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/", idx_ch)[ + "tp_0_est" + ] out[idx_ch] = np.where((t0 > outt) & (limarr), res, out[idx_ch]) - out_chs[idx_ch] = np.where((t0 > outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) + out_chs[idx_ch] = np.where( + (t0 > outt) & (limarr), int(ch[2:]), out_chs[idx_ch] + ) outt[idx_ch] = np.where((t0 > outt) & (limarr), t0, outt[idx_ch]) elif "tot" in mode: if res.dtype == bool: res = res.astype(int) - out[idx_ch] = np.where(limarr, res+out[idx_ch], out[idx_ch]) + out[idx_ch] = np.where(limarr, res + out[idx_ch], out[idx_ch]) elif mode == "any": if res.dtype != bool: res = res.astype(bool) @@ -325,7 +339,11 @@ def build_evt( first_iter = True # get number of rows from TCM file - nrows = len(store.load_nda(f_tcm,['cumulative_length'],'hardware_tcm_1/')['cumulative_length']) + nrows = len( + store.load_nda(f_tcm, ["cumulative_length"], "hardware_tcm_1/")[ + "cumulative_length" + ] + ) log.info( f"Applying {len(tbl_cfg['operations'].keys())} operations to key {f_tcm.split('-')[-2]}" ) @@ -341,17 +359,17 @@ def build_evt( f_evt, [ e.split("/")[-1] - for e in store.ls(f_evt,group) + for e in store.ls(f_evt, group) if e.split("/")[-1] in exprl ], - group + group, ) if "parameters" in v.keys(): var = var | v["parameters"] res = Array(eval(v["expression"], var)) lstore.write_object( obj=res, - name=group+k, + name=group + k, lh5_file=f_evt, wo_mode=wo_mode, # if first_iter else "append" ) @@ -380,9 +398,11 @@ def build_evt( nrows, group, pars, - defaultv + defaultv, + ) + lstore.write_object( + obj=Array(res), name=group + k, lh5_file=f_evt, wo_mode=wo_mode ) - lstore.write_object(obj=Array(res), name=group+k, lh5_file=f_evt, wo_mode=wo_mode) # if get_ch true flag in a first/last mode operation also obtain channel field if ( @@ -391,7 +411,10 @@ def build_evt( and v["get_ch"] ): lstore.write_object( - obj=Array(chs), name=group+k + "_id", lh5_file=f_evt, wo_mode=wo_mode + obj=Array(chs), + name=group + k + "_id", + lh5_file=f_evt, + wo_mode=wo_mode, ) if first_iter: diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 5f6ba4e05..14debb043 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -10,7 +10,7 @@ """ import numpy as np - +import warnings import pygama.lgdo.lh5_store as store @@ -129,6 +129,10 @@ def get_majority_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax def get_etc(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, trail): + # ignore stupid numpy warnings + warnings.filterwarnings('ignore', r'All-NaN slice encountered') + warnings.filterwarnings('ignore', r'invalid value encountered in true_divide') + predf = store.load_nda(f_hit, ["energy_in_pe", "timestamp"], chs[0] + "/hit/") peshape = (predf["energy_in_pe"]).shape From 598e33bb5db641e1fc2b1116205a76cf202c6088 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 25 Oct 2023 11:19:57 +0200 Subject: [PATCH 097/191] relocated modes into own functions --- src/pygama/evt/build_evt.py | 475 +++++++++++++++++++++++++++------- src/pygama/evt/modules/spm.py | 77 +++--- 2 files changed, 428 insertions(+), 124 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 5908c0a9a..e2eb8df60 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -49,7 +49,7 @@ def evaluate_expression( group: str, para: dict = None, defv=np.nan, -) -> np.ndarray: +) -> dict: """ Evaluates the expression defined by the user across all channels according to the mode Parameters @@ -86,11 +86,6 @@ def evaluate_expression( defv default value of evaluation """ - # define dimension of output array - out = np.full(nrows, defv, dtype=type(defv)) - out_chs = np.zeros(len(out), dtype=int) - outt = np.zeros(len(out)) - # find parameters in evt file or in parameters exprl = re.findall(r"[a-zA-Z_$][\w$]*", expr) var_ph = {} @@ -118,105 +113,397 @@ def evaluate_expression( p, m = func.rsplit(".", 1) met = getattr(import_module(p), m) out = met(*params) + return {"values": out} else: - # evaluate operator in mode + # evaluate possible operator in mode ops = re.findall(r"([<>]=?|==)", mode) - ch_comp = None - if os.path.exists(f_evt) and mode in [ - e.split("/")[-1] for e in store.ls(f_evt, group) - ]: - ch_comp = store.load_nda(f_evt, [mode], group)[mode] + op, mode_lim = None, None + if len(ops) == 1: + op = ops[0] + mode_lim = float(mode.split(op)[-1]) + elif len(ops) > 1: + raise ValueError(mode + " contains invalid operator") # load TCM data to define an event nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") ids = nda["array_id"] idx = nda["array_idx"] - # cl = nda['cumulative_length'] - for ch in chns: - # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] - - # find fields in either dsp, hit - var = store.load_nda( + # switch through modes + if "first" in mode: + return evaluate_to_first( + idx, + ids, + f_hit, + f_dsp, + chns, + expr, + exprl, + nrows, + mode_lim, + op, + var_ph, + defv, + ) + elif "last" in mode: + return evaluate_to_last( + idx, + ids, f_hit, - [ - e.split("/")[-1] - for e in store.ls(f_hit, ch + "/hit/") - if e.split("/")[-1] in exprl - ], - ch + "/hit/", - idx_ch, + f_dsp, + chns, + expr, + exprl, + nrows, + mode_lim, + op, + var_ph, + defv, ) - dsp_dic = store.load_nda( + elif "tot" in mode: + return evaluate_to_tot( + idx, + ids, + f_hit, f_dsp, - [ - e.split("/")[-1] - for e in store.ls(f_dsp, ch + "/dsp/") - if e.split("/")[-1] in exprl - ], - ch + "/dsp/", - idx_ch, + chns, + expr, + exprl, + nrows, + mode_lim, + op, + var_ph, + defv, + ) + elif "any" == mode: + return evaluate_to_any( + idx, ids, f_hit, f_dsp, chns, expr, exprl, nrows, var_ph, defv + ) + elif "all" == mode: + return evaluate_to_all( + idx, ids, f_hit, f_dsp, chns, expr, exprl, nrows, var_ph, defv + ) + elif os.path.exists(f_evt) and mode in [ + e.split("/")[-1] for e in store.ls(f_evt, group) + ]: + ch_comp = store.load_nda(f_evt, [mode], group)[mode] + return evaluate_at_channel( + idx, ids, f_hit, f_dsp, chns, expr, exprl, nrows, ch_comp, var_ph, defv ) - var = dsp_dic | var_ph | var - # evaluate expression - res = eval(expr, var) + else: + raise ValueError(mode + " not a valid mode") + + +def find_parameters( + f_hit: str, f_dsp: str, ch: str, idx_ch: np.ndarray, exprl: list +) -> dict: + # find fields in either dsp, hit + var = store.load_nda( + f_hit, + [ + e.split("/")[-1] + for e in store.ls(f_hit, ch + "/hit/") + if e.split("/")[-1] in exprl + ], + ch + "/hit/", + idx_ch, + ) + dsp_dic = store.load_nda( + f_dsp, + [ + e.split("/")[-1] + for e in store.ls(f_dsp, ch + "/dsp/") + if e.split("/")[-1] in exprl + ], + ch + "/dsp/", + idx_ch, + ) + return dsp_dic | var - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) - # get unification condition if present in mode - if len(ops) > 0: - limarr = eval( - "".join(["res", ops[0], "lim"]), - {"res": res, "lim": float(mode.split(ops[0])[-1])}, - ) - else: - limarr = np.ones(len(res)).astype(bool) - - # append to out according to mode - if "first" in mode: - if ch == chns[0]: - outt[:] = np.inf - t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/", idx_ch)[ - "tp_0_est" - ] - out[idx_ch] = np.where((t0 < outt) & (limarr), res, out[idx_ch]) - out_chs[idx_ch] = np.where( - (t0 < outt) & (limarr), int(ch[2:]), out_chs[idx_ch] - ) - outt[idx_ch] = np.where((t0 < outt) & (limarr), t0, outt[idx_ch]) - elif "last" in mode: - t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/", idx_ch)[ - "tp_0_est" - ] - out[idx_ch] = np.where((t0 > outt) & (limarr), res, out[idx_ch]) - out_chs[idx_ch] = np.where( - (t0 > outt) & (limarr), int(ch[2:]), out_chs[idx_ch] - ) - outt[idx_ch] = np.where((t0 > outt) & (limarr), t0, outt[idx_ch]) - elif "tot" in mode: - if res.dtype == bool: - res = res.astype(int) - out[idx_ch] = np.where(limarr, res + out[idx_ch], out[idx_ch]) - elif mode == "any": - if res.dtype != bool: - res = res.astype(bool) - out[idx_ch] = out[idx_ch] | res - elif mode == "all": - if res.dtype != bool: - res = res.astype(bool) - out[idx_ch] = out[idx_ch] & res - elif ch_comp is not None: - out[idx_ch] = np.where(int(ch[2:]) == ch_comp, res, out[idx_ch]) - else: - raise ValueError(mode + " not a valid mode") +def evaluate_to_first( + idx: np.ndarray, + ids: np.ndarray, + f_hit: str, + f_dsp: str, + chns: list, + expr: str, + exprl: list, + nrows: int, + mode_lim: int | float, + op: str = None, + var_ph: dict = None, + defv=np.nan, +) -> dict: + # define dimension of output array + out = np.full(nrows, defv, dtype=type(defv)) + out_chs = np.zeros(len(out), dtype=int) + outt = np.zeros(len(out)) + + for ch in chns: + # get index list for this channel to be loaded + idx_ch = idx[ids == int(ch[2:])] + + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + res = eval(expr, var) + + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # get unification condition if present in mode + if op is not None: + limarr = eval( + "".join(["res", op, "lim"]), + {"res": res, "lim": mode_lim}, + ) + else: + limarr = np.ones(len(res)).astype(bool) - return out, out_chs + # append to out according to mode == first + if ch == chns[0]: + outt[:] = np.inf + t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/", idx_ch)["tp_0_est"] + out[idx_ch] = np.where((t0 < outt) & (limarr), res, out[idx_ch]) + out_chs[idx_ch] = np.where((t0 < outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) + outt[idx_ch] = np.where((t0 < outt) & (limarr), t0, outt[idx_ch]) + + return {"values": out, "channels": out_chs} + + +def evaluate_to_last( + idx: np.ndarray, + ids: np.ndarray, + f_hit: str, + f_dsp: str, + chns: list, + expr: str, + exprl: list, + nrows: int, + mode_lim: int | float, + op: str = None, + var_ph: dict = None, + defv=np.nan, +) -> dict: + # define dimension of output array + out = np.full(nrows, defv, dtype=type(defv)) + out_chs = np.zeros(len(out), dtype=int) + outt = np.zeros(len(out)) + + for ch in chns: + # get index list for this channel to be loaded + idx_ch = idx[ids == int(ch[2:])] + + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + res = eval(expr, var) + + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # get unification condition if present in mode + if op is not None: + limarr = eval( + "".join(["res", op, "lim"]), + {"res": res, "lim": mode_lim}, + ) + else: + limarr = np.ones(len(res)).astype(bool) + + # append to out according to mode == last + t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/", idx_ch)["tp_0_est"] + out[idx_ch] = np.where((t0 > outt) & (limarr), res, out[idx_ch]) + out_chs[idx_ch] = np.where((t0 > outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) + outt[idx_ch] = np.where((t0 > outt) & (limarr), t0, outt[idx_ch]) + + return {"values": out, "channels": out_chs} + + +def evaluate_to_tot( + idx: np.ndarray, + ids: np.ndarray, + f_hit: str, + f_dsp: str, + chns: list, + expr: str, + exprl: list, + nrows: int, + mode_lim: int | float, + op: str = None, + var_ph: dict = None, + defv=np.nan, +) -> dict: + # define dimension of output array + out = np.full(nrows, defv, dtype=type(defv)) + + for ch in chns: + # get index list for this channel to be loaded + idx_ch = idx[ids == int(ch[2:])] + + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + res = eval(expr, var) + + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # get unification condition if present in mode + if op is not None: + limarr = eval( + "".join(["res", op, "lim"]), + {"res": res, "lim": mode_lim}, + ) + else: + limarr = np.ones(len(res)).astype(bool) + + # append to out according to mode == tot + if res.dtype == bool: + res = res.astype(int) + out[idx_ch] = np.where(limarr, res + out[idx_ch], out[idx_ch]) + + return {"values": out} + + +def evaluate_to_any( + idx: np.ndarray, + ids: np.ndarray, + f_hit: str, + f_dsp: str, + chns: list, + expr: str, + exprl: list, + nrows: int, + var_ph: dict = None, + defv=np.nan, +) -> dict: + # define dimension of output array + out = np.full(nrows, defv, dtype=type(defv)) + + for ch in chns: + # get index list for this channel to be loaded + idx_ch = idx[ids == int(ch[2:])] + + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + res = eval(expr, var) + + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # append to out according to mode == any + if res.dtype != bool: + res = res.astype(bool) + out[idx_ch] = out[idx_ch] | res + + return {"values": out} + + +def evaluate_to_all( + idx: np.ndarray, + ids: np.ndarray, + f_hit: str, + f_dsp: str, + chns: list, + expr: str, + exprl: list, + nrows: int, + var_ph: dict = None, + defv=np.nan, +) -> dict: + # define dimension of output array + out = np.full(nrows, defv, dtype=type(defv)) + + for ch in chns: + # get index list for this channel to be loaded + idx_ch = idx[ids == int(ch[2:])] + + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + res = eval(expr, var) + + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # append to out according to mode == all + if res.dtype != bool: + res = res.astype(bool) + out[idx_ch] = out[idx_ch] & res + + return {"values": out} + + +def evaluate_at_channel( + idx: np.ndarray, + ids: np.ndarray, + f_hit: str, + f_dsp: str, + chns: list, + expr: str, + exprl: list, + nrows: int, + ch_comp: np.ndarray, + var_ph: dict = None, + defv=np.nan, +) -> dict: + # define dimension of output array + out = np.full(nrows, defv, dtype=type(defv)) + + for ch in chns: + # get index list for this channel to be loaded + idx_ch = idx[ids == int(ch[2:])] + + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + res = eval(expr, var) + + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # append to out according to mode == any + out[idx_ch] = np.where(int(ch[2:]) == ch_comp, res, out[idx_ch]) + + return {"values": out} + + +def evaluate_to_vector( + f_tcm: str, + f_evt: str, + f_hit: str, + f_dsp: str, + chns: list, + mode: str, + expr: str, + nrows: int, + group: str, + para: dict = None, + defv=np.nan, +) -> dict: + raise NotImplementedError def build_evt( @@ -373,6 +660,8 @@ def build_evt( lh5_file=f_evt, wo_mode=wo_mode, # if first_iter else "append" ) + + # Else we build the event entry else: if isinstance(v["channels"], str): chns_e = chns[v["channels"]] @@ -387,7 +676,7 @@ def build_evt( if "initial" in v.keys() and not v["initial"] == "np.nan": defaultv = v["initial"] - res, chs = evaluate_expression( + result = evaluate_expression( f_tcm, f_evt, f_hit, @@ -401,7 +690,10 @@ def build_evt( defaultv, ) lstore.write_object( - obj=Array(res), name=group + k, lh5_file=f_evt, wo_mode=wo_mode + obj=Array(result["values"]), + name=group + k, + lh5_file=f_evt, + wo_mode=wo_mode, ) # if get_ch true flag in a first/last mode operation also obtain channel field @@ -409,9 +701,10 @@ def build_evt( "get_ch" in v.keys() and ("first" in v["mode"] or "last" in v["mode"]) and v["get_ch"] + and "channels" in result.keys() ): lstore.write_object( - obj=Array(chs), + obj=Array(result["channels"]), name=group + k + "_id", lh5_file=f_evt, wo_mode=wo_mode, diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 14debb043..8ef727381 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -9,9 +9,11 @@ additional parameters are free to the user and need to be defined in the JSON """ -import numpy as np import warnings -import pygama.lgdo.lh5_store as store + +import numpy as np + +import lgdo.lh5_store as store # get LAr energy per event over all channels @@ -21,13 +23,15 @@ def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): tma = trig + tmax sum = np.zeros(len(trig)) # load TCM data to define an event - nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') - ids =nda['array_id'] - idx =nda['array_idx'] + nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") + ids = nda["array_id"] + idx = nda["array_idx"] for ch in chs: # get index list for this channel to be loaded - idx_ch = idx[ids==int(ch[2:])] - df = store.load_nda(f_hit, ["energy_in_pe", "trigger_pos"], ch + "/hit/",idx_ch) + idx_ch = idx[ids == int(ch[2:])] + df = store.load_nda( + f_hit, ["energy_in_pe", "trigger_pos"], ch + "/hit/", idx_ch + ) mask = ( (df["trigger_pos"] < tma[:, None] / 16) & (df["trigger_pos"] > tmi[:, None] / 16) @@ -48,13 +52,15 @@ def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): tma = trig + tmax maj = np.zeros(len(trig)) # load TCM data to define an event - nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') - ids =nda['array_id'] - idx =nda['array_idx'] + nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") + ids = nda["array_id"] + idx = nda["array_idx"] for ch in chs: # get index list for this channel to be loaded - idx_ch = idx[ids==int(ch[2:])] - df = store.load_nda(f_hit, ["energy_in_pe", "trigger_pos"], ch + "/hit/",idx_ch) + idx_ch = idx[ids == int(ch[2:])] + df = store.load_nda( + f_hit, ["energy_in_pe", "trigger_pos"], ch + "/hit/", idx_ch + ) mask = ( (df["trigger_pos"] < tma[:, None] / 16) & (df["trigger_pos"] > tmi[:, None] / 16) @@ -76,12 +82,12 @@ def get_energy_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): tma = trig + tmax sum = np.zeros(len(trig)) # load TCM data to define an event - nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') - ids =nda['array_id'] - idx =nda['array_idx'] + nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") + ids = nda["array_id"] + idx = nda["array_idx"] for ch in chs: # get index list for this channel to be loaded - idx_ch = idx[ids==int(ch[2:])] + idx_ch = idx[ids == int(ch[2:])] df = store.load_nda( f_hit, ["energy_in_pe_dplms", "trigger_pos_dplms"], ch + "/hit/", idx_ch ) @@ -105,14 +111,14 @@ def get_majority_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax tma = trig + tmax maj = np.zeros(len(trig)) # load TCM data to define an event - nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') - ids =nda['array_id'] - idx =nda['array_idx'] + nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") + ids = nda["array_id"] + idx = nda["array_idx"] for ch in chs: # get index list for this channel to be loaded - idx_ch = idx[ids==int(ch[2:])] + idx_ch = idx[ids == int(ch[2:])] df = store.load_nda( - f_hit, ["energy_in_pe_dplms", "trigger_pos_dplms"], ch + "/hit/",idx_ch + f_hit, ["energy_in_pe_dplms", "trigger_pos_dplms"], ch + "/hit/", idx_ch ) mask = ( (df["trigger_pos_dplms"] < tma[:, None] / 16) @@ -130,8 +136,8 @@ def get_majority_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax def get_etc(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, trail): # ignore stupid numpy warnings - warnings.filterwarnings('ignore', r'All-NaN slice encountered') - warnings.filterwarnings('ignore', r'invalid value encountered in true_divide') + warnings.filterwarnings("ignore", r"All-NaN slice encountered") + warnings.filterwarnings("ignore", r"invalid value encountered in true_divide") predf = store.load_nda(f_hit, ["energy_in_pe", "timestamp"], chs[0] + "/hit/") @@ -145,14 +151,17 @@ def get_etc(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, tra tma = tge + tmax # load TCM data to define an event - nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') - ids =nda['array_id'] - idx =nda['array_idx'] + nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") + ids = nda["array_id"] + idx = nda["array_idx"] for i in range(len(chs)): # get index list for this channel to be loaded - idx_ch = idx[ids==int(chs[i][2:])] + idx_ch = idx[ids == int(chs[i][2:])] df = store.load_nda( - f_hit, ["energy_in_pe", "trigger_pos", "timestamp"], chs[i] + "/hit/", idx_ch + f_hit, + ["energy_in_pe", "trigger_pos", "timestamp"], + chs[i] + "/hit/", + idx_ch, ) mask = ( (df["trigger_pos"] < tma[:, None] / 16) @@ -209,13 +218,15 @@ def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): tma = tge + tmax # load TCM data to define an event - nda = store.load_nda(f_tcm,['array_id','array_idx'],'hardware_tcm_1/') - ids =nda['array_id'] - idx =nda['array_idx'] + nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") + ids = nda["array_id"] + idx = nda["array_idx"] for i in range(len(chs)): # get index list for this channel to be loaded - idx_ch = idx[ids==int(chs[i][2:])] - df = store.load_nda(f_hit, ["energy_in_pe", "trigger_pos"], chs[i] + "/hit/",idx_ch) + idx_ch = idx[ids == int(chs[i][2:])] + df = store.load_nda( + f_hit, ["energy_in_pe", "trigger_pos"], chs[i] + "/hit/", idx_ch + ) mask = ( (df["trigger_pos"] < tma[:, None] / 16) & (df["trigger_pos"] > tmi[:, None] / 16) From ad8f2f810d74f54da4e56e24030a4a6edaf1954b Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Fri, 27 Oct 2023 13:30:07 +0200 Subject: [PATCH 098/191] add vov output option --- src/pygama/evt/build_evt.py | 190 +++++++++++++++++++++++++++------- src/pygama/evt/modules/spm.py | 3 +- 2 files changed, 154 insertions(+), 39 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index e2eb8df60..e67694e4e 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -10,11 +10,10 @@ import re from importlib import import_module +import lgdo.lh5_store as store import numpy as np from legendmeta import LegendMetadata - -import pygama.lgdo.lh5_store as store -from pygama.lgdo import Array +from lgdo import Array, VectorOfVectors log = logging.getLogger(__name__) @@ -71,8 +70,8 @@ def evaluate_expression( - "tot": The sum of all channels across an event. It is possible to add a condition (e.g. "tot>10"). Only channels fulfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, zero is returned for this event. Booleans are treated as integers 0/1. - "any": Logical or between all channels. Non boolean values are True for values != 0 and False for values == 0. - "all": Logical and between all channels. Non boolean values are True for values != 0 and False for values == 0. - - ch_field: A previously generated channel_id field (i.e. from the get_ch flag) can be given here, and the value of this specific channels is used. - - "single": !!!NOT IMPLEMENTED!!!. Channels are not combined, but result saved for each channel. field name gets channel id as suffix. + - ch_field: A previously generated channel_id field (i.e. from the get_ch flag) can be given here, and the value of this specific channels is used. if ch_field is a VectorOfVectors, the channel list is ignored. If ch_field is an Array, the intersection of the passed channels list and the Array is formed. If a channel is not in the Array, the default is used. + - "vov": Channels are not combined, but result saved as VectorOfVectors. Use of getch is recommended. It is possible (and recommended) to add a condition (e.g. "vov>10"). Only channels fulfilling this condition are saved. expr The expression. That can be any mathematical equation/comparison. If mode == func, the expression needs to be a special processing function defined in modules (e.g. "modules.spm.get_energy). In the expression parameters from either hit, dsp, evt tier (from operations performed before this one! --> JSON operations order matters), or from the "parameters" field can be used. nrows @@ -81,8 +80,6 @@ def evaluate_expression( lh5 root group name para Dictionary of parameters defined in the "parameters" field in the configuration JSON file. - getch - Only affects "first", "last" modes. In that cases the rawid of the resulting values channel is returned as well. defv default value of evaluation """ @@ -176,6 +173,10 @@ def evaluate_expression( var_ph, defv, ) + elif "vov" in mode: + return evaluate_to_vector( + idx, ids, f_hit, f_dsp, chns, expr, exprl, nrows, mode_lim, op, var_ph + ) elif "any" == mode: return evaluate_to_any( idx, ids, f_hit, f_dsp, chns, expr, exprl, nrows, var_ph, defv @@ -187,10 +188,21 @@ def evaluate_expression( elif os.path.exists(f_evt) and mode in [ e.split("/")[-1] for e in store.ls(f_evt, group) ]: - ch_comp = store.load_nda(f_evt, [mode], group)[mode] - return evaluate_at_channel( - idx, ids, f_hit, f_dsp, chns, expr, exprl, nrows, ch_comp, var_ph, defv - ) + lstore = store.LH5Store() + ch_comp, _ = lstore.read_object(group + mode, f_evt) + if isinstance(ch_comp, Array): + return evaluate_at_channel( + idx, ids, f_hit, f_dsp, chns, expr, exprl, ch_comp, var_ph, defv + ) + elif isinstance(ch_comp, VectorOfVectors): + return evaluate_at_channel_vov( + idx, ids, f_hit, f_dsp, expr, exprl, ch_comp, var_ph + ) + else: + raise NotImplementedError( + type(ch_comp) + + " not supported (only Array and VectorOfVectors are supported)" + ) else: raise ValueError(mode + " not a valid mode") @@ -461,13 +473,11 @@ def evaluate_at_channel( chns: list, expr: str, exprl: list, - nrows: int, - ch_comp: np.ndarray, + ch_comp: Array, var_ph: dict = None, defv=np.nan, ) -> dict: - # define dimension of output array - out = np.full(nrows, defv, dtype=type(defv)) + out = np.full(len(ch_comp), defv, dtype=type(defv)) for ch in chns: # get index list for this channel to be loaded @@ -484,26 +494,120 @@ def evaluate_at_channel( if not isinstance(res, np.ndarray): res = np.full(len(out), res, dtype=type(res)) - # append to out according to mode == any - out[idx_ch] = np.where(int(ch[2:]) == ch_comp, res, out[idx_ch]) + out[idx_ch] = np.where(int(ch[2:]) == ch_comp.nda, res, out[idx_ch]) return {"values": out} +def evaluate_at_channel_vov( + idx: np.ndarray, + ids: np.ndarray, + f_hit: str, + f_dsp: str, + expr: str, + exprl: list, + ch_comp: VectorOfVectors, + var_ph: dict = None, +) -> dict: + # blow up vov to aoesa + out = ch_comp.to_aoesa().nda + + chns = np.unique(out[~np.isnan(out)]).astype(int) + + for ch in chns: + # get index list for this channel to be loaded + idx_ch = idx[ids == ch] + + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, f"ch{ch}", idx_ch, exprl) | var_ph + + # evaluate expression + res = eval(expr, var) + + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # see in which events the current channel is present + mask = (out == ch).any(axis=1) + out[out == ch] = res[mask] + + # ok now implode the table again + out = VectorOfVectors( + flattened_data=out.flatten()[~np.isnan(out.flatten())].astype(res.dtype), + cumulative_length=np.cumsum(np.count_nonzero(~np.isnan(out), axis=1)), + ) + return {"values": out, "channels": ch_comp} + + def evaluate_to_vector( - f_tcm: str, - f_evt: str, + idx: np.ndarray, + ids: np.ndarray, f_hit: str, f_dsp: str, chns: list, - mode: str, expr: str, + exprl: list, nrows: int, - group: str, - para: dict = None, - defv=np.nan, + mode_lim: int | float, + op: str = None, + var_ph: dict = None, ) -> dict: - raise NotImplementedError + """ + Allows the evaluation as a vector of vectors. + Returns a dictionary of values: VoV of requested values + and channels: VoV of same dimensions with requested channel_id + """ + # raise NotImplementedError + + # define dimension of output array + out = np.full((nrows, len(chns)), np.nan) + out_chs = np.full((nrows, len(chns)), np.nan) + + i = 0 + for ch in chns: + # get index list for this channel to be loaded + idx_ch = idx[ids == int(ch[2:])] + + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + res = eval(expr, var) + + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # get unification condition if present in mode + if op is not None: + limarr = eval( + "".join(["res", op, "lim"]), + {"res": res, "lim": mode_lim}, + ) + else: + limarr = np.ones(len(res)).astype(bool) + + # append to out according to mode == vov + out[:, i][limarr] = res[limarr] + out_chs[:, i][limarr] = int(ch[2:]) + + i += 1 + + # This can be smarter + # shorten to vov (FUTURE: replace with awkward) + out = VectorOfVectors( + flattened_data=out.flatten()[~np.isnan(out.flatten())], + cumulative_length=np.cumsum(np.count_nonzero(~np.isnan(out), axis=1)), + ) + out_chs = VectorOfVectors( + flattened_data=out_chs.flatten()[~np.isnan(out_chs.flatten())].astype(int), + cumulative_length=np.cumsum(np.count_nonzero(~np.isnan(out_chs), axis=1)), + ) + + return {"values": out, "channels": out_chs} def build_evt( @@ -531,7 +635,7 @@ def build_evt( f_evt name of the output file evt_config - dictionary or name of JSON file defining evt fields. Channel lists can be defined by the user or by using the keyword "meta" followed by the system (geds/spms) and the usability (on,no_psd,ac,off) separated by underscores (e.g. "meta_geds_on") in the "channels" dictionary. The "operations" dictionary defines the fields (name=key), where "channels" specifies the channels used to for this field (either a string or a list of strings), "mode" defines how the channels should be combined (see evaluate_expression). For first/last modes a "get_ch" flag can be defined, if true an additional field with the sufix "_id" is returned containing the rawid of the respective value in the field without the suffix. "expression" defnies the mathematical/special function to apply (see evaluate_expression), "parameters" defines any other parameter used in expression For example: + name of JSON file defining evt fields. Channel lists can be defined by the user or by using the keyword "meta" followed by the system (geds/spms) and the usability (on,no_psd,ac,off) separated by underscores (e.g. "meta_geds_on") in the "channels" dictionary. The "operations" dictionary defines the fields (name=key), where "channels" specifies the channels used to for this field (either a string or a list of strings), "mode" defines how the channels should be combined (see evaluate_expression). For first/last modes a "get_ch" flag can be defined, if true an additional field with the sufix "_id" is returned containing the rawid of the respective value in the field without the suffix. "expression" defnies the mathematical/special function to apply (see evaluate_expression), "parameters" defines any other parameter used in expression. For example: .. code-block::json @@ -554,6 +658,12 @@ def build_evt( "expression": "cuspEmax_ctc_cal", "initial": "np.nan" }, + "energy_on":{ + "channels": ["geds_on"], + "mode": "vov>25", + "get_ch": true, + "expression": "cuspEmax_ctc_cal" + }, "aoe":{ "channels": ["geds_on"], "mode": "energy_id", @@ -637,8 +747,9 @@ def build_evt( for k, v in tbl_cfg["operations"].items(): log.debug("Processing field" + k) - # if channels not defined in operation, it can only be an operation on the evt level. - if "channels" not in v.keys(): + # if mode not defined in operation, it can only be an operation on the evt level. + # TODO need to adapt to handle VoVs + if "mode" not in v.keys(): exprl = re.findall(r"[a-zA-Z_$][\w$]*", v["expression"]) var = {} if os.path.exists(f_evt): @@ -663,7 +774,9 @@ def build_evt( # Else we build the event entry else: - if isinstance(v["channels"], str): + if "channels" not in v.keys(): + chns_e = [] + elif isinstance(v["channels"], str): chns_e = chns[v["channels"]] elif isinstance(v["channels"], list): chns_e = list( @@ -689,22 +802,25 @@ def build_evt( pars, defaultv, ) + + obj = result["values"] + if isinstance(obj, np.ndarray): + obj = Array(result["values"]) lstore.write_object( - obj=Array(result["values"]), + obj=obj, name=group + k, lh5_file=f_evt, wo_mode=wo_mode, ) - # if get_ch true flag in a first/last mode operation also obtain channel field - if ( - "get_ch" in v.keys() - and ("first" in v["mode"] or "last" in v["mode"]) - and v["get_ch"] - and "channels" in result.keys() - ): + # if get_ch flag is true and exists and result dic contains channels entry + # write also channels information + if "get_ch" in v.keys() and v["get_ch"] and "channels" in result.keys(): + obj = result["channels"] + if isinstance(obj, np.ndarray): + obj = Array(result["channels"]) lstore.write_object( - obj=Array(result["channels"]), + obj=obj, name=group + k + "_id", lh5_file=f_evt, wo_mode=wo_mode, diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 8ef727381..5e7584e04 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -11,9 +11,8 @@ import warnings -import numpy as np - import lgdo.lh5_store as store +import numpy as np # get LAr energy per event over all channels From a301e37ef6d4c5876a53c4852000e393abc0bb33 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Sat, 28 Oct 2023 18:47:45 +0200 Subject: [PATCH 099/191] Add tests --- src/pygama/evt/__init__.py | 3 +- src/pygama/evt/build_evt.py | 18 +- src/pygama/evt/modules/__init__.py | 21 +++ src/pygama/evt/modules/spm.py | 1 + tests/evt/configs/basic-evt-config.json | 53 ++++++ tests/evt/configs/module-test-evt-config.json | 39 ++++ tests/evt/configs/vov-test-evt-config.json | 24 +++ tests/evt/test_build_evt.py | 166 ++++++++++++++++++ 8 files changed, 320 insertions(+), 5 deletions(-) create mode 100644 src/pygama/evt/modules/__init__.py create mode 100644 tests/evt/configs/basic-evt-config.json create mode 100644 tests/evt/configs/module-test-evt-config.json create mode 100644 tests/evt/configs/vov-test-evt-config.json create mode 100644 tests/evt/test_build_evt.py diff --git a/src/pygama/evt/__init__.py b/src/pygama/evt/__init__.py index 8257a98e3..80b544455 100644 --- a/src/pygama/evt/__init__.py +++ b/src/pygama/evt/__init__.py @@ -2,7 +2,8 @@ Utilities for grouping hit data into events. """ +from .build_evt import build_evt from .build_tcm import build_tcm from .tcm import generate_tcm_cols -__all__ = ["build_tcm", "generate_tcm_cols"] +__all__ = ["build_tcm", "generate_tcm_cols", "build_evt"] diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index e67694e4e..0f2de86d5 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -108,7 +108,7 @@ def evaluate_expression( # load function dynamically p, m = func.rsplit(".", 1) - met = getattr(import_module(p), m) + met = getattr(import_module(p, package=__package__), m) out = met(*params) return {"values": out} @@ -615,8 +615,8 @@ def build_evt( f_dsp: str, f_hit: str, f_evt: str, + evt_config: str | dict, meta_path: str = None, - evt_config: str | dict = None, wo_mode: str = "write_safe", group: str = "/evt/", ) -> None: @@ -635,7 +635,7 @@ def build_evt( f_evt name of the output file evt_config - name of JSON file defining evt fields. Channel lists can be defined by the user or by using the keyword "meta" followed by the system (geds/spms) and the usability (on,no_psd,ac,off) separated by underscores (e.g. "meta_geds_on") in the "channels" dictionary. The "operations" dictionary defines the fields (name=key), where "channels" specifies the channels used to for this field (either a string or a list of strings), "mode" defines how the channels should be combined (see evaluate_expression). For first/last modes a "get_ch" flag can be defined, if true an additional field with the sufix "_id" is returned containing the rawid of the respective value in the field without the suffix. "expression" defnies the mathematical/special function to apply (see evaluate_expression), "parameters" defines any other parameter used in expression. For example: + name of JSON file or dict defining evt fields. Channel lists can be defined by the user or by using the keyword "meta" followed by the system (geds/spms) and the usability (on,no_psd,ac,off) separated by underscores (e.g. "meta_geds_on") in the "channels" dictionary. The "operations" dictionary defines the fields (name=key), where "channels" specifies the channels used to for this field (either a string or a list of strings), "mode" defines how the channels should be combined (see evaluate_expression). For first/last modes a "get_ch" flag can be defined, if true an additional field with the sufix "_id" is returned containing the rawid of the respective value in the field without the suffix. "expression" defnies the mathematical/special function to apply (see evaluate_expression), "parameters" defines any other parameter used in expression. For example: .. code-block::json @@ -698,10 +698,17 @@ def build_evt( """ lstore = store.LH5Store() tbl_cfg = evt_config + if not isinstance(tbl_cfg, (str, dict)): + raise TypeError() if isinstance(tbl_cfg, str): with open(tbl_cfg) as f: tbl_cfg = json.load(f) + if "channels" not in tbl_cfg.keys(): + raise ValueError("channel field needs to be specified in the config") + if "operations" not in tbl_cfg.keys(): + raise ValueError("operations field needs to be specified in the config") + # create channel list according to config # This can be either read from the meta data # or a list of channel names @@ -712,6 +719,7 @@ def build_evt( lmeta = LegendMetadata() chmap = lmeta.channelmap(re.search(r"\d{8}T\d{6}Z", f_dsp).group(0)) chns = {} + for k, v in tbl_cfg["channels"].items(): if isinstance(v, str): if "meta" in v: @@ -736,6 +744,8 @@ def build_evt( first_iter = True # get number of rows from TCM file + if "hardware_tcm_1" not in store.ls(f_tcm): + raise ValueError(f"TCM {f_tcm} doesn't contain hardware_tcm_1 field.") nrows = len( store.load_nda(f_tcm, ["cumulative_length"], "hardware_tcm_1/")[ "cumulative_length" @@ -769,7 +779,7 @@ def build_evt( obj=res, name=group + k, lh5_file=f_evt, - wo_mode=wo_mode, # if first_iter else "append" + wo_mode=wo_mode, ) # Else we build the event entry diff --git a/src/pygama/evt/modules/__init__.py b/src/pygama/evt/modules/__init__.py new file mode 100644 index 000000000..bd80462f8 --- /dev/null +++ b/src/pygama/evt/modules/__init__.py @@ -0,0 +1,21 @@ +""" +Contains submodules for evt processing +""" + +from .spm import ( + get_energy, + get_energy_dplms, + get_etc, + get_majority, + get_majority_dplms, + get_time_shift, +) + +__all__ = [ + "get_energy", + "get_majority", + "get_energy_dplms", + "get_majority_dplms", + "get_etc", + "get_time_shift", +] diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 5e7584e04..b43bf134d 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -137,6 +137,7 @@ def get_etc(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, tra # ignore stupid numpy warnings warnings.filterwarnings("ignore", r"All-NaN slice encountered") warnings.filterwarnings("ignore", r"invalid value encountered in true_divide") + warnings.filterwarnings("ignore", r"invalid value encountered in divide") predf = store.load_nda(f_hit, ["energy_in_pe", "timestamp"], chs[0] + "/hit/") diff --git a/tests/evt/configs/basic-evt-config.json b/tests/evt/configs/basic-evt-config.json new file mode 100644 index 000000000..8c41913e1 --- /dev/null +++ b/tests/evt/configs/basic-evt-config.json @@ -0,0 +1,53 @@ +{ + "channels": { + "geds_on": ["V00048A", "V01240A", "V00048B"] + }, + "operations": { + "multiplicity": { + "channels": "geds_on", + "mode": "tot", + "expression": "cuspEmax_ctc_cal > a", + "parameters": { "a": 25 }, + "initial": 0 + }, + "energy": { + "channels": "geds_on", + "mode": "first>25", + "get_ch": true, + "expression": "cuspEmax_ctc_cal", + "initial": "np.nan" + }, + "energy_aux": { + "channels": "geds_on", + "mode": "last>25", + "get_ch": true, + "expression": "cuspEmax_ctc_cal", + "initial": "np.nan" + }, + "energy_sum": { + "channels": "geds_on", + "mode": "tot>25", + "get_ch": true, + "expression": "cuspEmax_ctc_cal", + "initial": 0.0 + }, + "is_usable_aoe": { + "channels": "geds_on", + "mode": "energy_id", + "expression": "True", + "initial": false + }, + "aoe": { + "channels": "geds_on", + "mode": "energy_id", + "expression": "AoE_Classifier", + "initial": "np.nan" + }, + "is_aoe_rejected": { + "channels": "geds_on", + "mode": "energy_id", + "expression": "~(AoE_Double_Sided_Cut)", + "initial": false + } + } +} diff --git a/tests/evt/configs/module-test-evt-config.json b/tests/evt/configs/module-test-evt-config.json new file mode 100644 index 000000000..e5500c9cb --- /dev/null +++ b/tests/evt/configs/module-test-evt-config.json @@ -0,0 +1,39 @@ +{ + "channels": { + "spms_on": ["S024", "S036", "S012"], + "geds_on": ["V00048A", "V01240A", "V00048B"] + }, + "operations": { + "energy_first": { + "channels": ["geds_on"], + "mode": "first>25", + "get_ch": true, + "expression": "cuspEmax_ctc_cal", + "initial": "np.nan" + }, + "t0": { + "channels": ["geds_on"], + "mode": "energy_first_id", + "expression": "tp_0_est", + "initial": 0.0 + }, + "lar_energy": { + "channels": "spms_on", + "mode": "func", + "expression": ".modules.spm.get_energy(0.5,t0,48000,1000,5000)" + }, + "lar_multiplicity": { + "channels": "spms_on", + "mode": "func", + "expression": ".modules.spm.get_majority(0.5,t0,48000,1000,5000)" + }, + "is_lar_rejected": { + "expression": "(lar_energy >4) | (lar_multiplicity > 4) " + }, + "lar_classifier": { + "channels": "spms_on", + "mode": "func", + "expression": ".modules.spm.get_etc(0.5,t0,48000,100,6000,80,1)" + } + } +} diff --git a/tests/evt/configs/vov-test-evt-config.json b/tests/evt/configs/vov-test-evt-config.json new file mode 100644 index 000000000..24b195ded --- /dev/null +++ b/tests/evt/configs/vov-test-evt-config.json @@ -0,0 +1,24 @@ +{ + "channels": { + "geds_on": ["V00048A", "V01240A", "V00048B"] + }, + "operations": { + "energy": { + "channels": "geds_on", + "mode": "vov>25", + "get_ch": true, + "expression": "cuspEmax_ctc_cal" + }, + "aoe": { + "mode": "energy_id", + "expression": "AoE_Classifier" + }, + "multiplicity": { + "channels": "geds_on", + "mode": "tot", + "expression": "cuspEmax_ctc_cal > a", + "parameters": { "a": 25 }, + "initial": 0 + } + } +} diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py new file mode 100644 index 000000000..3a182d8c0 --- /dev/null +++ b/tests/evt/test_build_evt.py @@ -0,0 +1,166 @@ +import os +from pathlib import Path + +import lgdo.lh5_store as store +import numpy as np +import pytest +from lgdo import Array, VectorOfVectors, load_nda, ls + +from pygama.evt import build_evt + +config_dir = Path(__file__).parent / "configs" + + +def test_basics(lgnd_test_data, tmptestdir): + outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" + tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" + if os.path.exists(outfile): + os.remove(outfile) + build_evt( + f_tcm=lgnd_test_data.get_path(tcm_path), + f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), + f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), + f_evt=outfile, + meta_path=lgnd_test_data.get_path("legend/metadata"), + evt_config=f"{config_dir}/basic-evt-config.json", + wo_mode="o", + group="/evt/", + ) + + assert os.path.exists(outfile) + assert ( + len(ls(outfile, "/evt/")) == 9 + ) # 7 operations of which 2 are requesting channel field + nda = load_nda( + outfile, ["energy", "energy_aux", "energy_sum", "multiplicity"], "/evt/" + ) + assert ( + nda["energy"][nda["multiplicity"] == 1] + == nda["energy_aux"][nda["multiplicity"] == 1] + ).all() + assert ( + nda["energy"][nda["multiplicity"] == 1] + == nda["energy_sum"][nda["multiplicity"] == 1] + ).all() + assert ( + nda["energy_aux"][nda["multiplicity"] == 1] + == nda["energy_sum"][nda["multiplicity"] == 1] + ).all() + + +def test_lar_module(lgnd_test_data, tmptestdir): + outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" + tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" + if os.path.exists(outfile): + os.remove(outfile) + build_evt( + f_tcm=lgnd_test_data.get_path(tcm_path), + f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), + f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), + f_evt=outfile, + meta_path=lgnd_test_data.get_path("legend/metadata"), + evt_config=f"{config_dir}/module-test-evt-config.json", + wo_mode="o", + group="/evt/", + ) + + assert os.path.exists(outfile) + assert len(ls(outfile, "/evt/")) == 7 + assert ( + np.max(load_nda(outfile, ["lar_multiplicity"], "/evt/")["lar_multiplicity"]) + <= 3 + ) + + +def test_vov(lgnd_test_data, tmptestdir): + outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" + tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" + if os.path.exists(outfile): + os.remove(outfile) + build_evt( + f_tcm=lgnd_test_data.get_path(tcm_path), + f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), + f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), + f_evt=outfile, + meta_path=lgnd_test_data.get_path("legend/metadata"), + evt_config=f"{config_dir}/vov-test-evt-config.json", + wo_mode="o", + group="/evt/", + ) + + assert os.path.exists(outfile) + assert len(ls(outfile, "/evt/")) == 4 + lstore = store.LH5Store() + vov_ene, _ = lstore.read_object("/evt/energy", outfile) + vov_aoe, _ = lstore.read_object("/evt/aoe", outfile) + arr_ac, _ = lstore.read_object("/evt/multiplicity", outfile) + assert isinstance(vov_ene, VectorOfVectors) + assert isinstance(vov_aoe, VectorOfVectors) + assert isinstance(arr_ac, Array) + assert (np.diff(vov_ene.cumulative_length.nda, prepend=[0]) == arr_ac.nda).all() + + +def test_graceful_crashing(lgnd_test_data, tmptestdir): + outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" + tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" + if os.path.exists(outfile): + os.remove(outfile) + f_tcm = lgnd_test_data.get_path(tcm_path) + f_dsp = lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")) + f_hit = lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")) + meta_path = lgnd_test_data.get_path("legend/metadata") + f_config = f"{config_dir}/basic-evt-config.json" + + with pytest.raises(ValueError): + build_evt(f_dsp, f_tcm, f_hit, outfile, f_config, meta_path) + + with pytest.raises(NameError): + build_evt(f_tcm, f_hit, f_dsp, outfile, f_config, meta_path) + + with pytest.raises(TypeError): + build_evt(f_tcm, f_dsp, f_hit, outfile, None, meta_path) + + conf = {"operations": {}} + with pytest.raises(ValueError): + build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) + + conf = {"channels": {"geds_on": ["V00048A", "V01240A", "V00048B"]}} + with pytest.raises(ValueError): + build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) + + conf = { + "channels": {"geds_on": ["V00048A", "V01240A", "V00048B"]}, + "operations": {}, + } + build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) + assert not os.path.exists(outfile) + + conf = { + "channels": {"geds_on": ["V00048A", "V01240A", "V00048B"]}, + "operations": { + "energy": { + "channels": "geds_on", + "mode": "first>pineapple", + "get_ch": True, + "expression": "cuspEmax_ctc_cal", + "initial": "np.nan", + } + }, + } + with pytest.raises(ValueError): + build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) + + conf = { + "channels": {"geds_on": ["V00048A", "V01240A", "V00048B"]}, + "operations": { + "energy": { + "channels": "geds_on", + "mode": "first>25", + "get_ch": True, + "expression": "cuspEmax_ctc_cal$cuspEmax_ctc_cal", + "initial": "np.nan", + } + }, + } + with pytest.raises(SyntaxError): + build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) From 00d89304f9e1b5a9f6fdd301c1cad0db21d52f34 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Sat, 28 Oct 2023 20:23:59 +0200 Subject: [PATCH 100/191] automatically load legendmeta on meta keyword in config --- src/pygama/evt/build_evt.py | 128 +++++++++--------- tests/evt/configs/basic-evt-config.json | 2 +- tests/evt/configs/module-test-evt-config.json | 4 +- tests/evt/configs/vov-test-evt-config.json | 2 +- tests/evt/test_build_evt.py | 16 +-- 5 files changed, 76 insertions(+), 76 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 0f2de86d5..8cd07da5f 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -12,7 +12,6 @@ import lgdo.lh5_store as store import numpy as np -from legendmeta import LegendMetadata from lgdo import Array, VectorOfVectors log = logging.getLogger(__name__) @@ -51,6 +50,7 @@ def evaluate_expression( ) -> dict: """ Evaluates the expression defined by the user across all channels according to the mode + Parameters ---------- f_tcm @@ -77,12 +77,13 @@ def evaluate_expression( nrows Number of rows to be processed. group - lh5 root group name + lh5 root group name para Dictionary of parameters defined in the "parameters" field in the configuration JSON file. defv - default value of evaluation + default value of evaluation """ + # find parameters in evt file or in parameters exprl = re.findall(r"[a-zA-Z_$][\w$]*", expr) var_ph = {} @@ -639,63 +640,65 @@ def build_evt( .. code-block::json - { - "channels": { - "geds_on": "meta_geds_on", - "geds_no_psd": "meta_geds_no_psd", - "geds_ac": "meta_geds_ac", - "spms_on": "meta_spms_on", - "pulser": "PULS01", - "baseline": "BSLN01", - "muon": "MUON01", - "ts_master":"S060" - }, - "operations": { - "energy":{ - "channels": ["geds_on","geds_no_psd","geds_ac"], - "mode": "first>25", - "get_ch": true, - "expression": "cuspEmax_ctc_cal", - "initial": "np.nan" - }, - "energy_on":{ - "channels": ["geds_on"], - "mode": "vov>25", - "get_ch": true, - "expression": "cuspEmax_ctc_cal" - }, - "aoe":{ - "channels": ["geds_on"], - "mode": "energy_id", - "expression": "AoE_Classifier", - "initial": "np.nan" - }, - "is_muon_tagged":{ - "channels": "muon", - "mode": "any", - "expression": "wf_max>a", - "parameters": {"a":15100}, - "initial": false + { + "channels": { + "geds_on": "meta_geds_on", + "geds_no_psd": "meta_geds_no_psd", + "geds_ac": "meta_geds_ac", + "spms_on": "meta_spms_on", + "pulser": "PULS01", + "baseline": "BSLN01", + "muon": "MUON01", + "ts_master":"S060" }, - "multiplicity":{ - "channels": ["geds_on","geds_no_psd","geds_ac"], - "mode": "tot", - "expression": "cuspEmax_ctc_cal > a", - "parameters": {"a":25}, - "initial": 0 - }, - "lar_energy":{ - "channels": "spms_on", - "mode": "func", - "expression": "modules.spm.get_energy(0.5,t0,48000,1000,5000)" + "operations": { + "energy":{ + "channels": ["geds_on","geds_no_psd","geds_ac"], + "mode": "first>25", + "get_ch": true, + "expression": "cuspEmax_ctc_cal", + "initial": "np.nan" + }, + "energy_on":{ + "channels": ["geds_on"], + "mode": "vov>25", + "get_ch": true, + "expression": "cuspEmax_ctc_cal" + }, + "aoe":{ + "channels": ["geds_on"], + "mode": "energy_id", + "expression": "AoE_Classifier", + "initial": "np.nan" + }, + "is_muon_tagged":{ + "channels": "muon", + "mode": "any", + "expression": "wf_max>a", + "parameters": {"a":15100}, + "initial": false + }, + "multiplicity":{ + "channels": ["geds_on","geds_no_psd","geds_ac"], + "mode": "tot", + "expression": "cuspEmax_ctc_cal > a", + "parameters": {"a":25}, + "initial": 0 + }, + "lar_energy":{ + "channels": "spms_on", + "mode": "func", + "expression": "modules.spm.get_energy(0.5,t0,48000,1000,5000)" + } } } - } + wo_mode writing mode group lh5 root group name """ + lstore = store.LH5Store() tbl_cfg = evt_config if not isinstance(tbl_cfg, (str, dict)): @@ -713,16 +716,18 @@ def build_evt( # This can be either read from the meta data # or a list of channel names log.debug("Creating channel dictionary") - if meta_path: - lmeta = LegendMetadata(path=meta_path) - else: - lmeta = LegendMetadata() - chmap = lmeta.channelmap(re.search(r"\d{8}T\d{6}Z", f_dsp).group(0)) + chns = {} for k, v in tbl_cfg["channels"].items(): if isinstance(v, str): + # only import legend meta data when needed. + # LEGEND collaborators can use the meta keyword + # Why for users w/o access to the LEGEND meta data this is still working if "meta" in v: + lm = import_module("legendmeta") + lmeta = lm.LegendMetadata(path=meta_path) + chmap = lmeta.channelmap(re.search(r"\d{8}T\d{6}Z", f_dsp).group(0)) m, sys, usa = v.split("_", 2) tmp = [ f"ch{e}" @@ -736,12 +741,9 @@ def build_evt( == usa ] else: - chns[k] = [f"ch{chmap.map('name')[v]['daq']['rawid']}"] + chns[k] = [v] elif isinstance(v, list): - chns[k] = [f"ch{chmap.map('name')[e]['daq']['rawid']}" for e in v] - - # do operations - first_iter = True + chns[k] = [e for e in v] # get number of rows from TCM file if "hardware_tcm_1" not in store.ls(f_tcm): @@ -836,6 +838,4 @@ def build_evt( wo_mode=wo_mode, ) - if first_iter: - first_iter = False log.info("Done") diff --git a/tests/evt/configs/basic-evt-config.json b/tests/evt/configs/basic-evt-config.json index 8c41913e1..5e0b2e662 100644 --- a/tests/evt/configs/basic-evt-config.json +++ b/tests/evt/configs/basic-evt-config.json @@ -1,6 +1,6 @@ { "channels": { - "geds_on": ["V00048A", "V01240A", "V00048B"] + "geds_on": ["ch1084803", "ch1084804", "ch1121600"] }, "operations": { "multiplicity": { diff --git a/tests/evt/configs/module-test-evt-config.json b/tests/evt/configs/module-test-evt-config.json index e5500c9cb..7c60f3d80 100644 --- a/tests/evt/configs/module-test-evt-config.json +++ b/tests/evt/configs/module-test-evt-config.json @@ -1,7 +1,7 @@ { "channels": { - "spms_on": ["S024", "S036", "S012"], - "geds_on": ["V00048A", "V01240A", "V00048B"] + "spms_on": ["ch1057600", "ch1059201", "ch1062405"], + "geds_on": ["ch1084803", "ch1084804", "ch1121600"] }, "operations": { "energy_first": { diff --git a/tests/evt/configs/vov-test-evt-config.json b/tests/evt/configs/vov-test-evt-config.json index 24b195ded..a02c7da7c 100644 --- a/tests/evt/configs/vov-test-evt-config.json +++ b/tests/evt/configs/vov-test-evt-config.json @@ -1,6 +1,6 @@ { "channels": { - "geds_on": ["V00048A", "V01240A", "V00048B"] + "geds_on": ["ch1084803", "ch1084804", "ch1121600"] }, "operations": { "energy": { diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 3a182d8c0..65ddb9996 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -21,7 +21,7 @@ def test_basics(lgnd_test_data, tmptestdir): f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), f_evt=outfile, - meta_path=lgnd_test_data.get_path("legend/metadata"), + meta_path=None, evt_config=f"{config_dir}/basic-evt-config.json", wo_mode="o", group="/evt/", @@ -58,7 +58,7 @@ def test_lar_module(lgnd_test_data, tmptestdir): f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), f_evt=outfile, - meta_path=lgnd_test_data.get_path("legend/metadata"), + meta_path=None, evt_config=f"{config_dir}/module-test-evt-config.json", wo_mode="o", group="/evt/", @@ -82,7 +82,7 @@ def test_vov(lgnd_test_data, tmptestdir): f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), f_evt=outfile, - meta_path=lgnd_test_data.get_path("legend/metadata"), + meta_path=None, evt_config=f"{config_dir}/vov-test-evt-config.json", wo_mode="o", group="/evt/", @@ -108,7 +108,7 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): f_tcm = lgnd_test_data.get_path(tcm_path) f_dsp = lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")) f_hit = lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")) - meta_path = lgnd_test_data.get_path("legend/metadata") + meta_path = None f_config = f"{config_dir}/basic-evt-config.json" with pytest.raises(ValueError): @@ -124,19 +124,19 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): with pytest.raises(ValueError): build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) - conf = {"channels": {"geds_on": ["V00048A", "V01240A", "V00048B"]}} + conf = {"channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}} with pytest.raises(ValueError): build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) conf = { - "channels": {"geds_on": ["V00048A", "V01240A", "V00048B"]}, + "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}, "operations": {}, } build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) assert not os.path.exists(outfile) conf = { - "channels": {"geds_on": ["V00048A", "V01240A", "V00048B"]}, + "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}, "operations": { "energy": { "channels": "geds_on", @@ -151,7 +151,7 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) conf = { - "channels": {"geds_on": ["V00048A", "V01240A", "V00048B"]}, + "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}, "operations": { "energy": { "channels": "geds_on", From 648cb8f5f56af7a7e077e050be801f1815208062 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Sat, 28 Oct 2023 21:48:23 +0200 Subject: [PATCH 101/191] parametrized dsp and hit lh5 root group --- src/pygama/evt/build_evt.py | 169 +++++++++++++++++++++++++++++++----- tests/evt/test_build_evt.py | 5 +- 2 files changed, 149 insertions(+), 25 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 8cd07da5f..d466515ea 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -45,6 +45,8 @@ def evaluate_expression( expr: str, nrows: int, group: str, + dsp_group: str, + hit_group: str, para: dict = None, defv=np.nan, ) -> dict: @@ -78,6 +80,10 @@ def evaluate_expression( Number of rows to be processed. group lh5 root group name + dsp_group + lh5 root group in dsp file + hit_group + lh5 root group in hit file para Dictionary of parameters defined in the "parameters" field in the configuration JSON file. defv @@ -134,7 +140,9 @@ def evaluate_expression( idx, ids, f_hit, + hit_group, f_dsp, + dsp_group, chns, expr, exprl, @@ -149,7 +157,9 @@ def evaluate_expression( idx, ids, f_hit, + hit_group, f_dsp, + dsp_group, chns, expr, exprl, @@ -164,7 +174,9 @@ def evaluate_expression( idx, ids, f_hit, + hit_group, f_dsp, + dsp_group, chns, expr, exprl, @@ -176,15 +188,49 @@ def evaluate_expression( ) elif "vov" in mode: return evaluate_to_vector( - idx, ids, f_hit, f_dsp, chns, expr, exprl, nrows, mode_lim, op, var_ph + idx, + ids, + f_hit, + hit_group, + f_dsp, + dsp_group, + chns, + expr, + exprl, + nrows, + mode_lim, + op, + var_ph, ) elif "any" == mode: return evaluate_to_any( - idx, ids, f_hit, f_dsp, chns, expr, exprl, nrows, var_ph, defv + idx, + ids, + f_hit, + hit_group, + f_dsp, + dsp_group, + chns, + expr, + exprl, + nrows, + var_ph, + defv, ) elif "all" == mode: return evaluate_to_all( - idx, ids, f_hit, f_dsp, chns, expr, exprl, nrows, var_ph, defv + idx, + ids, + f_hit, + hit_group, + f_dsp, + dsp_group, + chns, + expr, + exprl, + nrows, + var_ph, + defv, ) elif os.path.exists(f_evt) and mode in [ e.split("/")[-1] for e in store.ls(f_evt, group) @@ -193,11 +239,31 @@ def evaluate_expression( ch_comp, _ = lstore.read_object(group + mode, f_evt) if isinstance(ch_comp, Array): return evaluate_at_channel( - idx, ids, f_hit, f_dsp, chns, expr, exprl, ch_comp, var_ph, defv + idx, + ids, + f_hit, + hit_group, + f_dsp, + dsp_group, + chns, + expr, + exprl, + ch_comp, + var_ph, + defv, ) elif isinstance(ch_comp, VectorOfVectors): return evaluate_at_channel_vov( - idx, ids, f_hit, f_dsp, expr, exprl, ch_comp, var_ph + idx, + ids, + f_hit, + hit_group, + f_dsp, + dsp_group, + expr, + exprl, + ch_comp, + var_ph, ) else: raise NotImplementedError( @@ -210,27 +276,33 @@ def evaluate_expression( def find_parameters( - f_hit: str, f_dsp: str, ch: str, idx_ch: np.ndarray, exprl: list + f_hit: str, + f_dsp: str, + ch: str, + idx_ch: np.ndarray, + exprl: list, + dsp_group: str, + hit_group: str, ) -> dict: # find fields in either dsp, hit var = store.load_nda( f_hit, [ e.split("/")[-1] - for e in store.ls(f_hit, ch + "/hit/") + for e in store.ls(f_hit, ch + hit_group) if e.split("/")[-1] in exprl ], - ch + "/hit/", + ch + hit_group, idx_ch, ) dsp_dic = store.load_nda( f_dsp, [ e.split("/")[-1] - for e in store.ls(f_dsp, ch + "/dsp/") + for e in store.ls(f_dsp, ch + dsp_group) if e.split("/")[-1] in exprl ], - ch + "/dsp/", + ch + dsp_group, idx_ch, ) return dsp_dic | var @@ -240,7 +312,9 @@ def evaluate_to_first( idx: np.ndarray, ids: np.ndarray, f_hit: str, + hit_group: str, f_dsp: str, + dsp_group: str, chns: list, expr: str, exprl: list, @@ -259,7 +333,10 @@ def evaluate_to_first( # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + var = ( + find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) + | var_ph + ) # evaluate expression res = eval(expr, var) @@ -293,7 +370,9 @@ def evaluate_to_last( idx: np.ndarray, ids: np.ndarray, f_hit: str, + hit_group: str, f_dsp: str, + dsp_group: str, chns: list, expr: str, exprl: list, @@ -313,7 +392,10 @@ def evaluate_to_last( idx_ch = idx[ids == int(ch[2:])] # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + var = ( + find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) + | var_ph + ) # evaluate expression res = eval(expr, var) @@ -345,7 +427,9 @@ def evaluate_to_tot( idx: np.ndarray, ids: np.ndarray, f_hit: str, + hit_group: str, f_dsp: str, + dsp_group: str, chns: list, expr: str, exprl: list, @@ -363,7 +447,10 @@ def evaluate_to_tot( idx_ch = idx[ids == int(ch[2:])] # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + var = ( + find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) + | var_ph + ) # evaluate expression res = eval(expr, var) @@ -394,7 +481,9 @@ def evaluate_to_any( idx: np.ndarray, ids: np.ndarray, f_hit: str, + hit_group: str, f_dsp: str, + dsp_group: str, chns: list, expr: str, exprl: list, @@ -410,7 +499,10 @@ def evaluate_to_any( idx_ch = idx[ids == int(ch[2:])] # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + var = ( + find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) + | var_ph + ) # evaluate expression res = eval(expr, var) @@ -432,7 +524,9 @@ def evaluate_to_all( idx: np.ndarray, ids: np.ndarray, f_hit: str, + hit_group: str, f_dsp: str, + dsp_group: str, chns: list, expr: str, exprl: list, @@ -448,7 +542,10 @@ def evaluate_to_all( idx_ch = idx[ids == int(ch[2:])] # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + var = ( + find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) + | var_ph + ) # evaluate expression res = eval(expr, var) @@ -470,7 +567,9 @@ def evaluate_at_channel( idx: np.ndarray, ids: np.ndarray, f_hit: str, + hit_group: str, f_dsp: str, + dsp_group: str, chns: list, expr: str, exprl: list, @@ -485,7 +584,10 @@ def evaluate_at_channel( idx_ch = idx[ids == int(ch[2:])] # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + var = ( + find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) + | var_ph + ) # evaluate expression res = eval(expr, var) @@ -504,7 +606,9 @@ def evaluate_at_channel_vov( idx: np.ndarray, ids: np.ndarray, f_hit: str, + hit_group: str, f_dsp: str, + dsp_group: str, expr: str, exprl: list, ch_comp: VectorOfVectors, @@ -520,7 +624,12 @@ def evaluate_at_channel_vov( idx_ch = idx[ids == ch] # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, f"ch{ch}", idx_ch, exprl) | var_ph + var = ( + find_parameters( + f_hit, f_dsp, f"ch{ch}", idx_ch, exprl, dsp_group, hit_group + ) + | var_ph + ) # evaluate expression res = eval(expr, var) @@ -546,7 +655,9 @@ def evaluate_to_vector( idx: np.ndarray, ids: np.ndarray, f_hit: str, + hit_group: str, f_dsp: str, + dsp_group: str, chns: list, expr: str, exprl: list, @@ -572,7 +683,10 @@ def evaluate_to_vector( idx_ch = idx[ids == int(ch[2:])] # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + var = ( + find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) + | var_ph + ) # evaluate expression res = eval(expr, var) @@ -620,6 +734,9 @@ def build_evt( meta_path: str = None, wo_mode: str = "write_safe", group: str = "/evt/", + tcm_group: str = "/hardware_tcm_1/", + dsp_group: str = "/dsp/", + hit_group: str = "/hit/", ) -> None: """ Transform data from the hit and dsp levels which a channel sorted @@ -633,6 +750,7 @@ def build_evt( input LH5 file of the dsp level f_hit input LH5 file of the hit level + f_evt name of the output file evt_config @@ -697,6 +815,12 @@ def build_evt( writing mode group lh5 root group name + tcm_group + lh5 root group in tcm file + dsp_group + lh5 root group in dsp file + hit_group + lh5 root group in hit file """ lstore = store.LH5Store() @@ -745,13 +869,8 @@ def build_evt( elif isinstance(v, list): chns[k] = [e for e in v] - # get number of rows from TCM file - if "hardware_tcm_1" not in store.ls(f_tcm): - raise ValueError(f"TCM {f_tcm} doesn't contain hardware_tcm_1 field.") nrows = len( - store.load_nda(f_tcm, ["cumulative_length"], "hardware_tcm_1/")[ - "cumulative_length" - ] + store.load_nda(f_tcm, ["cumulative_length"], tcm_group)["cumulative_length"] ) log.info( f"Applying {len(tbl_cfg['operations'].keys())} operations to key {f_tcm.split('-')[-2]}" @@ -811,6 +930,8 @@ def build_evt( v["expression"], nrows, group, + dsp_group, + hit_group, pars, defaultv, ) diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 65ddb9996..12599f57e 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -25,6 +25,9 @@ def test_basics(lgnd_test_data, tmptestdir): evt_config=f"{config_dir}/basic-evt-config.json", wo_mode="o", group="/evt/", + tcm_group="hardware_tcm_1", + dsp_group="/dsp/", + hit_group="/hit/", ) assert os.path.exists(outfile) @@ -111,7 +114,7 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): meta_path = None f_config = f"{config_dir}/basic-evt-config.json" - with pytest.raises(ValueError): + with pytest.raises(RuntimeError): build_evt(f_dsp, f_tcm, f_hit, outfile, f_config, meta_path) with pytest.raises(NameError): From 086ddc72ca839146e64c572e9a5f7b64d0348ab8 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Sat, 28 Oct 2023 22:27:33 +0200 Subject: [PATCH 102/191] generalized first and last sorter --- src/pygama/evt/build_evt.py | 110 +++++++++++------- tests/evt/configs/basic-evt-config.json | 18 ++- tests/evt/configs/module-test-evt-config.json | 17 ++- tests/evt/test_build_evt.py | 33 ++++-- 4 files changed, 125 insertions(+), 53 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index d466515ea..e26012475 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -41,7 +41,7 @@ def evaluate_expression( f_hit: str, f_dsp: str, chns: list, - mode: str, + mod: str | list, expr: str, nrows: int, group: str, @@ -90,6 +90,12 @@ def evaluate_expression( default value of evaluation """ + # set modus variables + mode, sorter = mod, None + if isinstance(mod, list): + mode = mod[0] + sorter = mod[1] + # find parameters in evt file or in parameters exprl = re.findall(r"[a-zA-Z_$][\w$]*", expr) var_ph = {} @@ -135,7 +141,45 @@ def evaluate_expression( idx = nda["array_idx"] # switch through modes - if "first" in mode: + if os.path.exists(f_evt) and mode in [ + e.split("/")[-1] for e in store.ls(f_evt, group) + ]: + lstore = store.LH5Store() + ch_comp, _ = lstore.read_object(group + mode, f_evt) + if isinstance(ch_comp, Array): + return evaluate_at_channel( + idx, + ids, + f_hit, + hit_group, + f_dsp, + dsp_group, + chns, + expr, + exprl, + ch_comp, + var_ph, + defv, + ) + elif isinstance(ch_comp, VectorOfVectors): + return evaluate_at_channel_vov( + idx, + ids, + f_hit, + hit_group, + f_dsp, + dsp_group, + expr, + exprl, + ch_comp, + var_ph, + ) + else: + raise NotImplementedError( + type(ch_comp) + + " not supported (only Array and VectorOfVectors are supported)" + ) + elif "first" in mode: return evaluate_to_first( idx, ids, @@ -148,6 +192,7 @@ def evaluate_expression( exprl, nrows, mode_lim, + sorter, op, var_ph, defv, @@ -165,6 +210,7 @@ def evaluate_expression( exprl, nrows, mode_lim, + sorter, op, var_ph, defv, @@ -232,45 +278,6 @@ def evaluate_expression( var_ph, defv, ) - elif os.path.exists(f_evt) and mode in [ - e.split("/")[-1] for e in store.ls(f_evt, group) - ]: - lstore = store.LH5Store() - ch_comp, _ = lstore.read_object(group + mode, f_evt) - if isinstance(ch_comp, Array): - return evaluate_at_channel( - idx, - ids, - f_hit, - hit_group, - f_dsp, - dsp_group, - chns, - expr, - exprl, - ch_comp, - var_ph, - defv, - ) - elif isinstance(ch_comp, VectorOfVectors): - return evaluate_at_channel_vov( - idx, - ids, - f_hit, - hit_group, - f_dsp, - dsp_group, - expr, - exprl, - ch_comp, - var_ph, - ) - else: - raise NotImplementedError( - type(ch_comp) - + " not supported (only Array and VectorOfVectors are supported)" - ) - else: raise ValueError(mode + " not a valid mode") @@ -320,6 +327,7 @@ def evaluate_to_first( exprl: list, nrows: int, mode_lim: int | float, + sorter: str, op: str = None, var_ph: dict = None, defv=np.nan, @@ -358,7 +366,15 @@ def evaluate_to_first( # append to out according to mode == first if ch == chns[0]: outt[:] = np.inf - t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/", idx_ch)["tp_0_est"] + + # find if sorter is in hit or dsp + if sorter in [e.split("/")[-1] for e in store.ls(f_dsp, ch + dsp_group)]: + t0 = store.load_nda(f_dsp, [sorter], ch + dsp_group, idx_ch)[sorter] + elif sorter in [e.split("/")[-1] for e in store.ls(f_hit, ch + hit_group)]: + t0 = store.load_nda(f_hit, [sorter], ch + hit_group, idx_ch)[sorter] + else: + raise ValueError(f"Couldn't find sorter {sorter}") + out[idx_ch] = np.where((t0 < outt) & (limarr), res, out[idx_ch]) out_chs[idx_ch] = np.where((t0 < outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) outt[idx_ch] = np.where((t0 < outt) & (limarr), t0, outt[idx_ch]) @@ -378,6 +394,7 @@ def evaluate_to_last( exprl: list, nrows: int, mode_lim: int | float, + sorter: str, op: str = None, var_ph: dict = None, defv=np.nan, @@ -415,7 +432,14 @@ def evaluate_to_last( limarr = np.ones(len(res)).astype(bool) # append to out according to mode == last - t0 = store.load_nda(f_dsp, ["tp_0_est"], ch + "/dsp/", idx_ch)["tp_0_est"] + # find if sorter is in hit or dsp + if sorter in [e.split("/")[-1] for e in store.ls(f_dsp, ch + dsp_group)]: + t0 = store.load_nda(f_dsp, [sorter], ch + dsp_group, idx_ch)[sorter] + elif sorter in [e.split("/")[-1] for e in store.ls(f_hit, ch + hit_group)]: + t0 = store.load_nda(f_hit, [sorter], ch + hit_group, idx_ch)[sorter] + else: + raise ValueError(f"Couldn't find sorter {sorter}") + out[idx_ch] = np.where((t0 > outt) & (limarr), res, out[idx_ch]) out_chs[idx_ch] = np.where((t0 > outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) outt[idx_ch] = np.where((t0 > outt) & (limarr), t0, outt[idx_ch]) diff --git a/tests/evt/configs/basic-evt-config.json b/tests/evt/configs/basic-evt-config.json index 5e0b2e662..aa0b68456 100644 --- a/tests/evt/configs/basic-evt-config.json +++ b/tests/evt/configs/basic-evt-config.json @@ -12,14 +12,28 @@ }, "energy": { "channels": "geds_on", - "mode": "first>25", + "mode": ["first>25", "tp_0_est"], "get_ch": true, "expression": "cuspEmax_ctc_cal", "initial": "np.nan" }, + "energy_any_above1MeV": { + "channels": "geds_on", + "mode": "any", + "get_ch": true, + "expression": "cuspEmax_ctc_cal>1000", + "initial": false + }, + "energy_all_above1MeV": { + "channels": "geds_on", + "mode": "all", + "get_ch": true, + "expression": "cuspEmax_ctc_cal>1000", + "initial": false + }, "energy_aux": { "channels": "geds_on", - "mode": "last>25", + "mode": ["last>25", "tp_0_est"], "get_ch": true, "expression": "cuspEmax_ctc_cal", "initial": "np.nan" diff --git a/tests/evt/configs/module-test-evt-config.json b/tests/evt/configs/module-test-evt-config.json index 7c60f3d80..8f084034a 100644 --- a/tests/evt/configs/module-test-evt-config.json +++ b/tests/evt/configs/module-test-evt-config.json @@ -6,7 +6,7 @@ "operations": { "energy_first": { "channels": ["geds_on"], - "mode": "first>25", + "mode": ["first>25", "tp_0_est"], "get_ch": true, "expression": "cuspEmax_ctc_cal", "initial": "np.nan" @@ -34,6 +34,21 @@ "channels": "spms_on", "mode": "func", "expression": ".modules.spm.get_etc(0.5,t0,48000,100,6000,80,1)" + }, + "lar_energy_dplms": { + "channels": "spms_on", + "mode": "func", + "expression": ".modules.spm.get_energy_dplms(0.5,t0,48000,1000,5000)" + }, + "lar_multiplicity_dplms": { + "channels": "spms_on", + "mode": "func", + "expression": ".modules.spm.get_majority_dplms(0.5,t0,48000,1000,5000)" + }, + "lar_time_shift": { + "channels": "spms_on", + "mode": "func", + "expression": ".modules.spm.get_time_shift(0.5,t0,48000,1000,5000)" } } } diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 12599f57e..0c6c1bde0 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -32,7 +32,7 @@ def test_basics(lgnd_test_data, tmptestdir): assert os.path.exists(outfile) assert ( - len(ls(outfile, "/evt/")) == 9 + len(ls(outfile, "/evt/")) == 11 ) # 7 operations of which 2 are requesting channel field nda = load_nda( outfile, ["energy", "energy_aux", "energy_sum", "multiplicity"], "/evt/" @@ -68,11 +68,15 @@ def test_lar_module(lgnd_test_data, tmptestdir): ) assert os.path.exists(outfile) - assert len(ls(outfile, "/evt/")) == 7 - assert ( - np.max(load_nda(outfile, ["lar_multiplicity"], "/evt/")["lar_multiplicity"]) - <= 3 + assert len(ls(outfile, "/evt/")) == 10 + nda = load_nda( + outfile, + ["lar_multiplicity", "lar_multiplicity_dplms", "t0", "lar_time_shift"], + "/evt/", ) + assert np.max(nda["lar_multiplicity"]) <= 3 + assert np.max(nda["lar_multiplicity_dplms"]) <= 3 + assert ((nda["lar_time_shift"] + nda["t0"]) >= 0).all() def test_vov(lgnd_test_data, tmptestdir): @@ -143,7 +147,7 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): "operations": { "energy": { "channels": "geds_on", - "mode": "first>pineapple", + "mode": ["first>pineapple", "tp_0_est"], "get_ch": True, "expression": "cuspEmax_ctc_cal", "initial": "np.nan", @@ -158,7 +162,7 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): "operations": { "energy": { "channels": "geds_on", - "mode": "first>25", + "mode": ["first>25", "tp_0_est"], "get_ch": True, "expression": "cuspEmax_ctc_cal$cuspEmax_ctc_cal", "initial": "np.nan", @@ -167,3 +171,18 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): } with pytest.raises(SyntaxError): build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) + + conf = { + "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}, + "operations": { + "energy": { + "channels": "geds_on", + "mode": ["first>25", "coconut"], + "get_ch": True, + "expression": "cuspEmax_ctc_cal", + "initial": "np.nan", + } + }, + } + with pytest.raises(ValueError): + build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) From ad9c74e3455a0d1ce70f24a7b42011a2331e41e6 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Sun, 29 Oct 2023 10:53:30 +0100 Subject: [PATCH 103/191] allow mix of VoV and array evaluation at evt level --- src/pygama/evt/build_evt.py | 50 +++++++++++++++++----- tests/evt/configs/vov-test-evt-config.json | 9 ++++ tests/evt/test_build_evt.py | 8 +++- 3 files changed, 55 insertions(+), 12 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index e26012475..f0bd4c161 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -903,23 +903,51 @@ def build_evt( log.debug("Processing field" + k) # if mode not defined in operation, it can only be an operation on the evt level. - # TODO need to adapt to handle VoVs if "mode" not in v.keys(): exprl = re.findall(r"[a-zA-Z_$][\w$]*", v["expression"]) var = {} if os.path.exists(f_evt): - var = store.load_nda( - f_evt, - [ - e.split("/")[-1] - for e in store.ls(f_evt, group) - if e.split("/")[-1] in exprl - ], - group, - ) + flds = [ + e.split("/")[-1] + for e in store.ls(f_evt, group) + if e.split("/")[-1] in exprl + ] + var = {e: lstore.read_object(group + e, f_evt)[0] for e in flds} + + # to make any operations to VoVs we have to blow it up to a table (future change to more intelligant way) + arr_keys = [] + for key, value in var.items(): + if isinstance(value, VectorOfVectors): + var[key] = value.to_aoesa().nda + elif isinstance(value, Array): + var[key] = value.nda + arr_keys.append(key) + + # now we also need to set dimensions if we have an expression + # consisting of a mix of VoV and Arrays + if len(arr_keys) > 0 and not set(arr_keys) == set(var.keys()): + for key in arr_keys: + var[key] = var[key][:, None] + if "parameters" in v.keys(): var = var | v["parameters"] - res = Array(eval(v["expression"], var)) + res = eval(v["expression"], var) + + # now check what dimension we have after the evaluation + if len(res.shape) == 1: + res = Array(res) + elif len(res.shape) == 2: + res = VectorOfVectors( + flattened_data=res.flatten()[~np.isnan(res.flatten())], + cumulative_length=np.cumsum( + np.count_nonzero(~np.isnan(res), axis=1) + ), + ) + else: + raise NotImplementedError( + f"Currently only 2d formats are supported, the evaluated array has the dimension {res.shape}" + ) + lstore.write_object( obj=res, name=group + k, diff --git a/tests/evt/configs/vov-test-evt-config.json b/tests/evt/configs/vov-test-evt-config.json index a02c7da7c..d1bfc4120 100644 --- a/tests/evt/configs/vov-test-evt-config.json +++ b/tests/evt/configs/vov-test-evt-config.json @@ -19,6 +19,15 @@ "expression": "cuspEmax_ctc_cal > a", "parameters": { "a": 25 }, "initial": 0 + }, + "energy_times_aoe": { + "expression": "energy*aoe" + }, + "energy_times_multiplicity": { + "expression": "energy*multiplicity" + }, + "multiplicity_squared": { + "expression": "multiplicity*multiplicity" } } } diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 0c6c1bde0..b5a405323 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -96,14 +96,20 @@ def test_vov(lgnd_test_data, tmptestdir): ) assert os.path.exists(outfile) - assert len(ls(outfile, "/evt/")) == 4 + assert len(ls(outfile, "/evt/")) == 7 lstore = store.LH5Store() vov_ene, _ = lstore.read_object("/evt/energy", outfile) vov_aoe, _ = lstore.read_object("/evt/aoe", outfile) arr_ac, _ = lstore.read_object("/evt/multiplicity", outfile) + vov_aoeene, _ = lstore.read_object("/evt/energy_times_aoe", outfile) + vov_eneac, _ = lstore.read_object("/evt/energy_times_multiplicity", outfile) + arr_ac2, _ = lstore.read_object("/evt/multiplicity_squared", outfile) assert isinstance(vov_ene, VectorOfVectors) assert isinstance(vov_aoe, VectorOfVectors) assert isinstance(arr_ac, Array) + assert isinstance(vov_aoeene, VectorOfVectors) + assert isinstance(vov_eneac, VectorOfVectors) + assert isinstance(arr_ac2, Array) assert (np.diff(vov_ene.cumulative_length.nda, prepend=[0]) == arr_ac.nda).all() From db0495a43aca905c24810af2866d4c7fafe8e0f2 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Sun, 29 Oct 2023 12:19:15 +0100 Subject: [PATCH 104/191] add event skimming function --- src/pygama/evt/__init__.py | 4 +- src/pygama/evt/build_evt.py | 90 +++++++++++++++++++++++++++++++++++++ tests/evt/test_build_evt.py | 27 ++++++++++- 3 files changed, 118 insertions(+), 3 deletions(-) diff --git a/src/pygama/evt/__init__.py b/src/pygama/evt/__init__.py index 80b544455..8bc8bf058 100644 --- a/src/pygama/evt/__init__.py +++ b/src/pygama/evt/__init__.py @@ -2,8 +2,8 @@ Utilities for grouping hit data into events. """ -from .build_evt import build_evt +from .build_evt import build_evt, skim_evt from .build_tcm import build_tcm from .tcm import generate_tcm_cols -__all__ = ["build_tcm", "generate_tcm_cols", "build_evt"] +__all__ = ["build_tcm", "generate_tcm_cols", "build_evt", "skim_evt"] diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index f0bd4c161..8febc5f55 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -1012,3 +1012,93 @@ def build_evt( ) log.info("Done") + + +def skim_evt( + f_evt: str, + expression: str, + params: dict = None, + f_out: str = None, + wo_mode="n", + evt_group="/evt/", +) -> None: + """ + Skimms events from a evt file which are fullfling the expression, discards all other events. + + Parameters + ---------- + f_evt + input LH5 file of the evt level + expression + skimming expression. Can contain variabels from event file or from the params dictionary. + f_out + output LH5 file. Can be None if wo_mode is set to overwrite f_evt. + wo_mode + Write mode: "o"/"overwrite" overwrites f_evt. "n"/"new" writes to a new file specified in f_out. + evt_group + lh5 root group of the evt file + """ + + if wo_mode not in ["o", "overwrite", "n", "new"]: + raise ValueError( + wo_mode + + " is a invalid writing mode. Valid options are: 'o', 'overwrite','n','new'" + ) + lstore = store.LH5Store() + fields = store.ls(f_evt, evt_group) + nrows = lstore.read_n_rows(fields[0], f_evt) + # load fields in expression + exprl = re.findall(r"[a-zA-Z_$][\w$]*", expression) + var = {} + + flds = [ + e.split("/")[-1] + for e in store.ls(f_evt, evt_group) + if e.split("/")[-1] in exprl + ] + var = {e: lstore.read_object(evt_group + e, f_evt)[0] for e in flds} + + # to make any operations to VoVs we have to blow it up to a table (future change to more intelligant way) + arr_keys = [] + for key, value in var.items(): + if isinstance(value, VectorOfVectors): + var[key] = value.to_aoesa().nda + elif isinstance(value, Array): + var[key] = value.nda + arr_keys.append(key) + + # now we also need to set dimensions if we have an expression + # consisting of a mix of VoV and Arrays + if len(arr_keys) > 0 and not set(arr_keys) == set(var.keys()): + for key in arr_keys: + var[key] = var[key][:, None] + + if params is not None: + var = var | params + res = eval(expression, var) + + if res.shape != (nrows,): + raise ValueError( + f"The expression must result to 1D with length = event number. Current shape is {res.shape}" + ) + + res = res.astype(bool) + idx_list = np.arange(nrows, dtype=int)[res] + + of = f_out + if wo_mode in ["o", "overwrite"]: + of = f_evt + of_tmp = of.replace(of.split("/")[-1], ".tmp_" + of.split("/")[-1]) + + for fld in fields: + ob, _ = lstore.read_object(fld, f_evt, idx=idx_list) + lstore.write_object( + obj=ob, + name=fld, + lh5_file=of_tmp, + wo_mode="o", + ) + + if os.path.exists(of): + os.remove(of) + os.rename(of_tmp, of) diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index b5a405323..a08848934 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -6,7 +6,7 @@ import pytest from lgdo import Array, VectorOfVectors, load_nda, ls -from pygama.evt import build_evt +from pygama.evt import build_evt, skim_evt config_dir = Path(__file__).parent / "configs" @@ -192,3 +192,28 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): } with pytest.raises(ValueError): build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) + + +def test_skimming(lgnd_test_data, tmptestdir): + outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" + tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" + if os.path.exists(outfile): + os.remove(outfile) + f_tcm = lgnd_test_data.get_path(tcm_path) + f_dsp = lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")) + f_hit = lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")) + meta_path = None + f_config = f"{config_dir}/vov-test-evt-config.json" + build_evt(f_tcm, f_dsp, f_hit, outfile, f_config, meta_path) + + lstore = store.LH5Store() + ac = lstore.read_object("/evt/multiplicity", outfile)[0].nda + ac = len(ac[ac == 3]) + + outfile_skm = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.lh5" + + skim_evt(outfile, "multiplicity == 3", None, outfile_skm, "n") + assert ac == len(lstore.read_object("/evt/energy", outfile_skm)[0].to_aoesa().nda) + + skim_evt(outfile, "multiplicity == 3", None, None, "o") + assert ac == len(lstore.read_object("/evt/energy", outfile)[0].to_aoesa().nda) From 6e4264a207c7bf7749951a37d4d27921a17f5852 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Mon, 30 Oct 2023 16:44:07 +0100 Subject: [PATCH 105/191] allow VoV t0 in spm module --- src/pygama/evt/build_evt.py | 84 ++++++++---------- src/pygama/evt/modules/spm.py | 87 +++++++++++++++++-- .../module-test-t0-vov-evt-config.json | 53 +++++++++++ tests/evt/test_build_evt.py | 27 ++++++ 4 files changed, 195 insertions(+), 56 deletions(-) create mode 100644 tests/evt/configs/module-test-t0-vov-evt-config.json diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 8febc5f55..f606e3774 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -100,15 +100,7 @@ def evaluate_expression( exprl = re.findall(r"[a-zA-Z_$][\w$]*", expr) var_ph = {} if os.path.exists(f_evt): - var_ph = store.load_nda( - f_evt, - [ - e.split("/")[-1] - for e in store.ls(f_evt, group) - if e.split("/")[-1] in exprl - ], - group, - ) + var_ph = load_vars_to_nda(f_evt, group, exprl) if para: var_ph = var_ph | para @@ -292,29 +284,41 @@ def find_parameters( hit_group: str, ) -> dict: # find fields in either dsp, hit - var = store.load_nda( - f_hit, - [ - e.split("/")[-1] - for e in store.ls(f_hit, ch + hit_group) - if e.split("/")[-1] in exprl - ], - ch + hit_group, - idx_ch, - ) - dsp_dic = store.load_nda( - f_dsp, - [ - e.split("/")[-1] - for e in store.ls(f_dsp, ch + dsp_group) - if e.split("/")[-1] in exprl - ], - ch + dsp_group, - idx_ch, - ) + var = load_vars_to_nda(f_hit, ch + hit_group, exprl) + dsp_dic = load_vars_to_nda(f_dsp, ch + dsp_group, exprl) + return dsp_dic | var +def load_vars_to_nda(f_evt: str, group: str, exprl: list) -> dict: + lstore = store.LH5Store() + flds = [ + e.split("/")[-1] for e in store.ls(f_evt, group) if e.split("/")[-1] in exprl + ] + var = {e: lstore.read_object(group + e, f_evt)[0] for e in flds} + + # to make any operations to VoVs we have to blow it up to a table (future change to more intelligant way) + arr_keys = [] + for key, value in var.items(): + if isinstance(value, VectorOfVectors): + var[key] = value.to_aoesa().nda + elif isinstance(value, Array): + var[key] = value.nda + if var[key].ndim > 2: + raise ValueError("Dim > 2 not supported") + if var[key].ndim == 1: + arr_keys.append(key) + else: + raise ValueError(f"{type(value)} not supported") + + # now we also need to set dimensions if we have an expression + # consisting of a mix of VoV and Arrays + if len(arr_keys) > 0 and not set(arr_keys) == set(var.keys()): + for key in arr_keys: + var[key] = var[key][:, None] + return var + + def evaluate_to_first( idx: np.ndarray, ids: np.ndarray, @@ -907,27 +911,7 @@ def build_evt( exprl = re.findall(r"[a-zA-Z_$][\w$]*", v["expression"]) var = {} if os.path.exists(f_evt): - flds = [ - e.split("/")[-1] - for e in store.ls(f_evt, group) - if e.split("/")[-1] in exprl - ] - var = {e: lstore.read_object(group + e, f_evt)[0] for e in flds} - - # to make any operations to VoVs we have to blow it up to a table (future change to more intelligant way) - arr_keys = [] - for key, value in var.items(): - if isinstance(value, VectorOfVectors): - var[key] = value.to_aoesa().nda - elif isinstance(value, Array): - var[key] = value.nda - arr_keys.append(key) - - # now we also need to set dimensions if we have an expression - # consisting of a mix of VoV and Arrays - if len(arr_keys) > 0 and not set(arr_keys) == set(var.keys()): - for key in arr_keys: - var[key] = var[key][:, None] + var = load_vars_to_nda(f_evt, group, exprl) if "parameters" in v.keys(): var = var | v["parameters"] diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index b43bf134d..7bd530531 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -13,11 +13,24 @@ import lgdo.lh5_store as store import numpy as np +from lgdo import Array, VectorOfVectors # get LAr energy per event over all channels def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): - trig = np.where(np.isnan(trgr), tdefault, trgr) + trig = trgr + if isinstance(trgr, VectorOfVectors): + trig = trig.to_aoesa().nda + elif isinstance(trgr, Array): + trig = trig.nda + if isinstance(trig, np.ndarray) and trig.ndim == 2: + trig = np.where(np.isnan(trig).all(axis=1)[:, None], tdefault, trig) + trig = np.nanmin(trig, axis=1) + + elif isinstance(trig, np.ndarray) and trig.ndim == 1: + trig = np.where(np.isnan(trig), tdefault, trig) + else: + raise ValueError(f"Can't deal with t0 of type {type(trgr)}") tmi = trig - tmin tma = trig + tmax sum = np.zeros(len(trig)) @@ -46,7 +59,19 @@ def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): # get LAr majority per event over all channels def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): - trig = np.where(np.isnan(trgr), tdefault, trgr) + trig = trgr + if isinstance(trgr, VectorOfVectors): + trig = trig.to_aoesa().nda + elif isinstance(trgr, Array): + trig = trig.nda + if isinstance(trig, np.ndarray) and trig.ndim == 2: + trig = np.where(np.isnan(trig).all(axis=1)[:, None], tdefault, trig) + trig = np.nanmin(trig, axis=1) + + elif isinstance(trig, np.ndarray) and trig.ndim == 1: + trig = np.where(np.isnan(trig), tdefault, trig) + else: + raise ValueError(f"Can't deal with t0 of type {type(trgr)}") tmi = trig - tmin tma = trig + tmax maj = np.zeros(len(trig)) @@ -76,7 +101,19 @@ def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): # get LAr energy per event over all channels def get_energy_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): - trig = np.where(np.isnan(trgr), tdefault, trgr) + trig = trgr + if isinstance(trgr, VectorOfVectors): + trig = trig.to_aoesa().nda + elif isinstance(trgr, Array): + trig = trig.nda + if isinstance(trig, np.ndarray) and trig.ndim == 2: + trig = np.where(np.isnan(trig).all(axis=1)[:, None], tdefault, trig) + trig = np.nanmin(trig, axis=1) + + elif isinstance(trig, np.ndarray) and trig.ndim == 1: + trig = np.where(np.isnan(trig), tdefault, trig) + else: + raise ValueError(f"Can't deal with t0 of type {type(trgr)}") tmi = trig - tmin tma = trig + tmax sum = np.zeros(len(trig)) @@ -105,7 +142,19 @@ def get_energy_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): # get LAr majority per event over all channels def get_majority_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): - trig = np.where(np.isnan(trgr), tdefault, trgr) + trig = trgr + if isinstance(trgr, VectorOfVectors): + trig = trig.to_aoesa().nda + elif isinstance(trgr, Array): + trig = trig.nda + if isinstance(trig, np.ndarray) and trig.ndim == 2: + trig = np.where(np.isnan(trig).all(axis=1)[:, None], tdefault, trig) + trig = np.nanmin(trig, axis=1) + + elif isinstance(trig, np.ndarray) and trig.ndim == 1: + trig = np.where(np.isnan(trig), tdefault, trig) + else: + raise ValueError(f"Can't deal with t0 of type {type(trgr)}") tmi = trig - tmin tma = trig + tmax maj = np.zeros(len(trig)) @@ -146,7 +195,20 @@ def get_etc(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, tra pes = np.zeros([len(chs), peshape[0], peshape[1]]) times = np.zeros([len(chs), peshape[0], peshape[1]]) - tge = np.where(np.isnan(trgr), tdefault, trgr) + tge = trgr + if isinstance(trgr, VectorOfVectors): + tge = tge.to_aoesa().nda + elif isinstance(trgr, Array): + tge = tge.nda + if isinstance(tge, np.ndarray) and tge.ndim == 2: + tge = np.where(np.isnan(tge).all(axis=1)[:, None], tdefault, tge) + tge = np.nanmin(tge, axis=1) + + elif isinstance(tge, np.ndarray) and tge.ndim == 1: + tge = np.where(np.isnan(tge), tdefault, tge) + else: + raise ValueError(f"Can't deal with t0 of type {type(trgr)}") + tmi = tge - tmin tma = tge + tmax @@ -213,7 +275,20 @@ def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): peshape = (predf["energy_in_pe"]).shape times = np.zeros([len(chs), peshape[0], peshape[1]]) - tge = np.where(np.isnan(trgr), tdefault, trgr) + tge = trgr + if isinstance(trgr, VectorOfVectors): + tge = tge.to_aoesa().nda + elif isinstance(trgr, Array): + tge = tge.nda + if isinstance(tge, np.ndarray) and tge.ndim == 2: + tge = np.where(np.isnan(tge).all(axis=1)[:, None], tdefault, tge) + tge = np.nanmin(tge, axis=1) + + elif isinstance(tge, np.ndarray) and tge.ndim == 1: + tge = np.where(np.isnan(tge), tdefault, tge) + else: + raise ValueError(f"Can't deal with t0 of type {type(trgr)}") + tmi = tge - tmin tma = tge + tmax diff --git a/tests/evt/configs/module-test-t0-vov-evt-config.json b/tests/evt/configs/module-test-t0-vov-evt-config.json new file mode 100644 index 000000000..436332409 --- /dev/null +++ b/tests/evt/configs/module-test-t0-vov-evt-config.json @@ -0,0 +1,53 @@ +{ + "channels": { + "spms_on": ["ch1057600", "ch1059201", "ch1062405"], + "geds_on": ["ch1084803", "ch1084804", "ch1121600"] + }, + "operations": { + "energy": { + "channels": "geds_on", + "mode": "vov>25", + "get_ch": true, + "expression": "cuspEmax_ctc_cal" + }, + "t0": { + "channels": ["geds_on"], + "mode": "energy_id", + "expression": "tp_0_est", + "initial": 0.0 + }, + "lar_energy": { + "channels": "spms_on", + "mode": "func", + "expression": ".modules.spm.get_energy(0.5,t0,48000,1000,5000)" + }, + "lar_multiplicity": { + "channels": "spms_on", + "mode": "func", + "expression": ".modules.spm.get_majority(0.5,t0,48000,1000,5000)" + }, + "is_lar_rejected": { + "expression": "(lar_energy >4) | (lar_multiplicity > 4) " + }, + "lar_classifier": { + "channels": "spms_on", + "mode": "func", + "expression": ".modules.spm.get_etc(0.5,t0,48000,100,6000,80,1)" + }, + "lar_energy_dplms": { + "channels": "spms_on", + "mode": "func", + "expression": ".modules.spm.get_energy_dplms(0.5,t0,48000,1000,5000)" + }, + "lar_multiplicity_dplms": { + "channels": "spms_on", + "mode": "func", + "expression": ".modules.spm.get_majority_dplms(0.5,t0,48000,1000,5000)" + }, + "lar_time_shift": { + "channels": "spms_on", + "mode": "func", + "expression": ".modules.spm.get_time_shift(0.5,t0,48000,1000,5000)" + } + } +} diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index a08848934..128833e5b 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -79,6 +79,33 @@ def test_lar_module(lgnd_test_data, tmptestdir): assert ((nda["lar_time_shift"] + nda["t0"]) >= 0).all() +def test_lar_t0_vov_module(lgnd_test_data, tmptestdir): + outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" + tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" + if os.path.exists(outfile): + os.remove(outfile) + build_evt( + f_tcm=lgnd_test_data.get_path(tcm_path), + f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), + f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), + f_evt=outfile, + meta_path=None, + evt_config=f"{config_dir}/module-test-t0-vov-evt-config.json", + wo_mode="o", + group="/evt/", + ) + + assert os.path.exists(outfile) + assert len(ls(outfile, "/evt/")) == 10 + nda = load_nda( + outfile, + ["lar_multiplicity", "lar_multiplicity_dplms", "lar_time_shift"], + "/evt/", + ) + assert np.max(nda["lar_multiplicity"]) <= 3 + assert np.max(nda["lar_multiplicity_dplms"]) <= 3 + + def test_vov(lgnd_test_data, tmptestdir): outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" From f700e34a374904d7c1bf2ab2cc63e2a787d398bd Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 28 Nov 2023 15:54:45 +0100 Subject: [PATCH 106/191] implemented suggestions of luigi: retrieve ids in seperate config block, explicit tier naming, field naming changes --- src/pygama/evt/build_evt.py | 566 ++++++++++-------- tests/evt/configs/basic-evt-config.json | 49 +- tests/evt/configs/module-test-evt-config.json | 45 +- .../module-test-t0-vov-evt-config.json | 42 +- tests/evt/configs/query-test-evt-config.json | 88 +++ tests/evt/configs/vov-test-evt-config.json | 26 +- tests/evt/test_build_evt.py | 73 +-- 7 files changed, 531 insertions(+), 358 deletions(-) create mode 100644 tests/evt/configs/query-test-evt-config.json diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index f606e3774..cc1e237e5 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -45,9 +45,8 @@ def evaluate_expression( expr: str, nrows: int, group: str, - dsp_group: str, - hit_group: str, para: dict = None, + qry: str = None, defv=np.nan, ) -> dict: """ @@ -74,6 +73,8 @@ def evaluate_expression( - "all": Logical and between all channels. Non boolean values are True for values != 0 and False for values == 0. - ch_field: A previously generated channel_id field (i.e. from the get_ch flag) can be given here, and the value of this specific channels is used. if ch_field is a VectorOfVectors, the channel list is ignored. If ch_field is an Array, the intersection of the passed channels list and the Array is formed. If a channel is not in the Array, the default is used. - "vov": Channels are not combined, but result saved as VectorOfVectors. Use of getch is recommended. It is possible (and recommended) to add a condition (e.g. "vov>10"). Only channels fulfilling this condition are saved. + qry + A query that can set a condition on mode. Can be any tier (i.e. a channelxevents shaped boolean matrix for tiers below event or an events long boolean array at the evt level) expr The expression. That can be any mathematical equation/comparison. If mode == func, the expression needs to be a special processing function defined in modules (e.g. "modules.spm.get_energy). In the expression parameters from either hit, dsp, evt tier (from operations performed before this one! --> JSON operations order matters), or from the "parameters" field can be used. nrows @@ -94,21 +95,21 @@ def evaluate_expression( mode, sorter = mod, None if isinstance(mod, list): mode = mod[0] - sorter = mod[1] + sorter = mod[1].split(".") # find parameters in evt file or in parameters - exprl = re.findall(r"[a-zA-Z_$][\w$]*", expr) + exprl = re.findall(r"(evt|hit|dsp).([a-zA-Z_$][\w$]*)", expr) var_ph = {} if os.path.exists(f_evt): - var_ph = load_vars_to_nda(f_evt, group, exprl) + var_ph = load_vars_to_nda(f_evt, "", exprl) if para: var_ph = var_ph | para - if mode == "func": + if mode == "function": # evaluate expression func, params = expr.split("(") params = [f_hit, f_dsp, f_tcm, chns] + [ - num_and_pars(e, var_ph) for e in params[:-1].split(",") + num_and_pars(e.replace(".", "_"), var_ph) for e in params[:-1].split(",") ] # load function dynamically @@ -118,14 +119,18 @@ def evaluate_expression( return {"values": out} else: - # evaluate possible operator in mode - ops = re.findall(r"([<>]=?|==)", mode) - op, mode_lim = None, None - if len(ops) == 1: - op = ops[0] - mode_lim = float(mode.split(op)[-1]) - elif len(ops) > 1: - raise ValueError(mode + " contains invalid operator") + # check if query is either on channel basis or evt basis (and not a mix) + qry_mask = qry + if qry is not None: + if "evt." in qry and ("hit." in qry or "dsp." in qry): + raise ValueError("Query can't be a mix of evt tier and lower tiers.") + + # if it is an evt query we can evaluate it directly here + if os.path.exists(f_evt) and "evt." in qry: + var_qry = load_vars_to_nda( + f_evt, "", re.findall(r"(evt).([a-zA-Z_$][\w$]*)", qry) + ) + qry_mask = eval(qry.replace("evt.", "evt_"), var_qry) # load TCM data to define an event nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") @@ -133,19 +138,20 @@ def evaluate_expression( idx = nda["array_idx"] # switch through modes - if os.path.exists(f_evt) and mode in [ - e.split("/")[-1] for e in store.ls(f_evt, group) - ]: + if ( + os.path.exists(f_evt) + and "evt." == mode[:4] + and mode.split(".")[-1] + in [e.split("/")[-1] for e in store.ls(f_evt, "/evt/")] + ): lstore = store.LH5Store() - ch_comp, _ = lstore.read_object(group + mode, f_evt) + ch_comp, _ = lstore.read_object(mode.replace(".", "/"), f_evt) if isinstance(ch_comp, Array): return evaluate_at_channel( idx, ids, f_hit, - hit_group, f_dsp, - dsp_group, chns, expr, exprl, @@ -158,9 +164,7 @@ def evaluate_expression( idx, ids, f_hit, - hit_group, f_dsp, - dsp_group, expr, exprl, ch_comp, @@ -171,73 +175,62 @@ def evaluate_expression( type(ch_comp) + " not supported (only Array and VectorOfVectors are supported)" ) - elif "first" in mode: + + elif "first" == mode: return evaluate_to_first( idx, ids, f_hit, - hit_group, f_dsp, - dsp_group, chns, expr, exprl, + qry_mask, nrows, - mode_lim, sorter, - op, var_ph, defv, ) - elif "last" in mode: + elif "last" == mode: return evaluate_to_last( idx, ids, f_hit, - hit_group, f_dsp, - dsp_group, chns, expr, exprl, + qry_mask, nrows, - mode_lim, sorter, - op, var_ph, defv, ) - elif "tot" in mode: + elif "sum" == mode: return evaluate_to_tot( idx, ids, f_hit, - hit_group, f_dsp, - dsp_group, chns, expr, exprl, + qry_mask, nrows, - mode_lim, - op, var_ph, defv, ) - elif "vov" in mode: + elif "vov" == mode: return evaluate_to_vector( idx, ids, f_hit, - hit_group, f_dsp, - dsp_group, chns, expr, exprl, + qry_mask, nrows, - mode_lim, - op, var_ph, ) elif "any" == mode: @@ -245,12 +238,11 @@ def evaluate_expression( idx, ids, f_hit, - hit_group, f_dsp, - dsp_group, chns, expr, exprl, + qry_mask, nrows, var_ph, defv, @@ -260,12 +252,11 @@ def evaluate_expression( idx, ids, f_hit, - hit_group, f_dsp, - dsp_group, chns, expr, exprl, + qry_mask, nrows, var_ph, defv, @@ -280,22 +271,31 @@ def find_parameters( ch: str, idx_ch: np.ndarray, exprl: list, - dsp_group: str, - hit_group: str, ) -> dict: # find fields in either dsp, hit - var = load_vars_to_nda(f_hit, ch + hit_group, exprl) - dsp_dic = load_vars_to_nda(f_dsp, ch + dsp_group, exprl) + var = load_vars_to_nda(f_hit, ch, exprl, idx_ch) + dsp_dic = load_vars_to_nda(f_dsp, ch, exprl, idx_ch) return dsp_dic | var -def load_vars_to_nda(f_evt: str, group: str, exprl: list) -> dict: +def load_vars_to_nda( + f_evt: str, group: str, exprl: list, idx: np.ndarray = None +) -> dict: lstore = store.LH5Store() - flds = [ - e.split("/")[-1] for e in store.ls(f_evt, group) if e.split("/")[-1] in exprl - ] - var = {e: lstore.read_object(group + e, f_evt)[0] for e in flds} + var = { + f"{e[0]}_{e[1]}": lstore.read_object( + f"{group.replace('/','')}/{e[0]}/{e[1]}", + f_evt, + idx=idx, + )[0] + for e in exprl + if e[1] + in [ + x.split("/")[-1] + for x in store.ls(f_evt, f"{group.replace('/','')}/{e[0]}/") + ] + } # to make any operations to VoVs we have to blow it up to a table (future change to more intelligant way) arr_keys = [] @@ -316,6 +316,8 @@ def load_vars_to_nda(f_evt: str, group: str, exprl: list) -> dict: if len(arr_keys) > 0 and not set(arr_keys) == set(var.keys()): for key in arr_keys: var[key] = var[key][:, None] + + log.debug(f"Found parameters {var.keys()}") return var @@ -323,16 +325,13 @@ def evaluate_to_first( idx: np.ndarray, ids: np.ndarray, f_hit: str, - hit_group: str, f_dsp: str, - dsp_group: str, chns: list, expr: str, exprl: list, + qry: str | np.ndarray, nrows: int, - mode_lim: int | float, - sorter: str, - op: str = None, + sorter: list, var_ph: dict = None, defv=np.nan, ) -> dict: @@ -345,39 +344,53 @@ def evaluate_to_first( # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - var = ( - find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) - | var_ph - ) - - # evaluate expression - res = eval(expr, var) - - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) - - # get unification condition if present in mode - if op is not None: - limarr = eval( - "".join(["res", op, "lim"]), - {"res": res, "lim": mode_lim}, + if "tcm.array_id" == expr: + res = np.full(len(out), int(ch[2:]), dtype=int) + else: + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) + res = eval( + expr.replace("dsp.", "dsp_") + .replace("hit.", "hit_") + .replace("evt.", "evt_"), + var, ) + + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # get sub evt based query condition if needed + if isinstance(qry, str): + qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) + qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) + limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) + + # or forward the array + elif isinstance(qry, np.ndarray): + limarr = qry + + # if no condition, it must be true else: limarr = np.ones(len(res)).astype(bool) + if limarr.dtype != bool: + limarr = limarr.astype(bool) + # append to out according to mode == first if ch == chns[0]: outt[:] = np.inf # find if sorter is in hit or dsp - if sorter in [e.split("/")[-1] for e in store.ls(f_dsp, ch + dsp_group)]: - t0 = store.load_nda(f_dsp, [sorter], ch + dsp_group, idx_ch)[sorter] - elif sorter in [e.split("/")[-1] for e in store.ls(f_hit, ch + hit_group)]: - t0 = store.load_nda(f_hit, [sorter], ch + hit_group, idx_ch)[sorter] - else: - raise ValueError(f"Couldn't find sorter {sorter}") + t0 = store.load_nda( + f_hit if "hit" == sorter[0] else f_dsp, + [sorter[1]], + f"{ch}/{sorter[0]}/", + idx_ch, + )[sorter[1]] out[idx_ch] = np.where((t0 < outt) & (limarr), res, out[idx_ch]) out_chs[idx_ch] = np.where((t0 < outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) @@ -390,16 +403,13 @@ def evaluate_to_last( idx: np.ndarray, ids: np.ndarray, f_hit: str, - hit_group: str, f_dsp: str, - dsp_group: str, chns: list, expr: str, exprl: list, + qry: str | np.ndarray, nrows: int, - mode_lim: int | float, - sorter: str, - op: str = None, + sorter: list, var_ph: dict = None, defv=np.nan, ) -> dict: @@ -411,38 +421,49 @@ def evaluate_to_last( for ch in chns: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] + if "tcm.array_id" == expr: + res = np.full(len(out), int(ch[2:]), dtype=int) + else: + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) + res = eval( + expr.replace("dsp.", "dsp_") + .replace("hit.", "hit_") + .replace("evt.", "evt_"), + var, + ) - # find fields in either dsp, hit - var = ( - find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) - | var_ph - ) + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) - # evaluate expression - res = eval(expr, var) - - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) - - # get unification condition if present in mode - if op is not None: - limarr = eval( - "".join(["res", op, "lim"]), - {"res": res, "lim": mode_lim}, - ) + # get sub evt based query condition if needed + if isinstance(qry, str): + qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) + qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) + limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) + + # or forward the array + elif isinstance(qry, np.ndarray): + limarr = qry + + # if no condition, it must be true else: limarr = np.ones(len(res)).astype(bool) - + if limarr.dtype != bool: + limarr = limarr.astype(bool) # append to out according to mode == last # find if sorter is in hit or dsp - if sorter in [e.split("/")[-1] for e in store.ls(f_dsp, ch + dsp_group)]: - t0 = store.load_nda(f_dsp, [sorter], ch + dsp_group, idx_ch)[sorter] - elif sorter in [e.split("/")[-1] for e in store.ls(f_hit, ch + hit_group)]: - t0 = store.load_nda(f_hit, [sorter], ch + hit_group, idx_ch)[sorter] - else: - raise ValueError(f"Couldn't find sorter {sorter}") + t0 = store.load_nda( + f_hit if "hit" == sorter[0] else f_dsp, + [sorter[1]], + f"{ch}/{sorter[0]}/", + idx_ch, + )[sorter[1]] out[idx_ch] = np.where((t0 > outt) & (limarr), res, out[idx_ch]) out_chs[idx_ch] = np.where((t0 > outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) @@ -455,15 +476,12 @@ def evaluate_to_tot( idx: np.ndarray, ids: np.ndarray, f_hit: str, - hit_group: str, f_dsp: str, - dsp_group: str, chns: list, expr: str, exprl: list, + qry: str | np.ndarray, nrows: int, - mode_lim: int | float, - op: str = None, var_ph: dict = None, defv=np.nan, ) -> dict: @@ -474,32 +492,45 @@ def evaluate_to_tot( # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - # find fields in either dsp, hit - var = ( - find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) - | var_ph - ) - - # evaluate expression - res = eval(expr, var) - - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) - - # get unification condition if present in mode - if op is not None: - limarr = eval( - "".join(["res", op, "lim"]), - {"res": res, "lim": mode_lim}, + if "tcm.array_id" == expr: + res = np.full(len(out), int(ch[2:]), dtype=int) + else: + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) + res = eval( + expr.replace("dsp.", "dsp_") + .replace("hit.", "hit_") + .replace("evt.", "evt_"), + var, ) + + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # get sub evt based query condition if needed + if isinstance(qry, str): + qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) + qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) + limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) + + # or forward the array + elif isinstance(qry, np.ndarray): + limarr = qry + + # if no condition, it must be true else: limarr = np.ones(len(res)).astype(bool) # append to out according to mode == tot if res.dtype == bool: res = res.astype(int) + if limarr.dtype != bool: + limarr = limarr.astype(bool) out[idx_ch] = np.where(limarr, res + out[idx_ch], out[idx_ch]) return {"values": out} @@ -509,12 +540,11 @@ def evaluate_to_any( idx: np.ndarray, ids: np.ndarray, f_hit: str, - hit_group: str, f_dsp: str, - dsp_group: str, chns: list, expr: str, exprl: list, + qry: str | np.ndarray, nrows: int, var_ph: dict = None, defv=np.nan, @@ -526,24 +556,46 @@ def evaluate_to_any( # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - # find fields in either dsp, hit - var = ( - find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) - | var_ph - ) + if "tcm.array_id" == expr: + res = np.full(len(out), int(ch[2:]), dtype=int) + else: + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) + res = eval( + expr.replace("dsp.", "dsp_") + .replace("hit.", "hit_") + .replace("evt.", "evt_"), + var, + ) - # evaluate expression - res = eval(expr, var) + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # get sub evt based query condition if needed + if isinstance(qry, str): + qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) + qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) + limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) + # or forward the array + elif isinstance(qry, np.ndarray): + limarr = qry + + # if no condition, it must be true + else: + limarr = np.ones(len(res)).astype(bool) # append to out according to mode == any if res.dtype != bool: res = res.astype(bool) - out[idx_ch] = out[idx_ch] | res + if limarr.dtype != bool: + limarr = limarr.astype(bool) + out[idx_ch] = out[idx_ch] | (res & limarr) return {"values": out} @@ -552,12 +604,11 @@ def evaluate_to_all( idx: np.ndarray, ids: np.ndarray, f_hit: str, - hit_group: str, f_dsp: str, - dsp_group: str, chns: list, expr: str, exprl: list, + qry: str | np.ndarray, nrows: int, var_ph: dict = None, defv=np.nan, @@ -569,24 +620,45 @@ def evaluate_to_all( # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - # find fields in either dsp, hit - var = ( - find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) - | var_ph - ) + if "tcm.array_id" == expr: + res = np.full(len(out), int(ch[2:]), dtype=int) + else: + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + res = eval( + expr.replace("dsp.", "dsp_") + .replace("hit.", "hit_") + .replace("evt.", "evt_"), + var, + ) - # evaluate expression - res = eval(expr, var) + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # get sub evt based query condition if needed + if isinstance(qry, str): + qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) + qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) + limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) + + # or forward the array + elif isinstance(qry, np.ndarray): + limarr = qry - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) + # if no condition, it must be true + else: + limarr = np.ones(len(res)).astype(bool) # append to out according to mode == all if res.dtype != bool: res = res.astype(bool) - out[idx_ch] = out[idx_ch] & res + if limarr.dtype != bool: + limarr = limarr.astype(bool) + out[idx_ch] = out[idx_ch] & res & limarr return {"values": out} @@ -595,9 +667,7 @@ def evaluate_at_channel( idx: np.ndarray, ids: np.ndarray, f_hit: str, - hit_group: str, f_dsp: str, - dsp_group: str, chns: list, expr: str, exprl: list, @@ -611,19 +681,24 @@ def evaluate_at_channel( # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - # find fields in either dsp, hit - var = ( - find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) - | var_ph - ) - - # evaluate expression - res = eval(expr, var) + if "tcm.array_id" == expr: + res = np.full(len(out), int(ch[2:]), dtype=int) + else: + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + res = eval( + expr.replace("dsp.", "dsp_") + .replace("hit.", "hit_") + .replace("evt.", "evt_"), + var, + ) - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) out[idx_ch] = np.where(int(ch[2:]) == ch_comp.nda, res, out[idx_ch]) @@ -634,9 +709,7 @@ def evaluate_at_channel_vov( idx: np.ndarray, ids: np.ndarray, f_hit: str, - hit_group: str, f_dsp: str, - dsp_group: str, expr: str, exprl: list, ch_comp: VectorOfVectors, @@ -651,21 +724,24 @@ def evaluate_at_channel_vov( # get index list for this channel to be loaded idx_ch = idx[ids == ch] - # find fields in either dsp, hit - var = ( - find_parameters( - f_hit, f_dsp, f"ch{ch}", idx_ch, exprl, dsp_group, hit_group + if "tcm.array_id" == expr: + res = np.full(len(out), int(ch[2:]), dtype=int) + else: + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, f"ch{ch}", idx_ch, exprl) | var_ph + + # evaluate expression + res = eval( + expr.replace("dsp.", "dsp_") + .replace("hit.", "hit_") + .replace("evt.", "evt_"), + var, ) - | var_ph - ) - - # evaluate expression - res = eval(expr, var) - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) # see in which events the current channel is present mask = (out == ch).any(axis=1) @@ -683,15 +759,12 @@ def evaluate_to_vector( idx: np.ndarray, ids: np.ndarray, f_hit: str, - hit_group: str, f_dsp: str, - dsp_group: str, chns: list, expr: str, exprl: list, + qry: str | np.ndarray, nrows: int, - mode_lim: int | float, - op: str = None, var_ph: dict = None, ) -> dict: """ @@ -710,29 +783,41 @@ def evaluate_to_vector( # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - # find fields in either dsp, hit - var = ( - find_parameters(f_hit, f_dsp, ch, idx_ch, exprl, dsp_group, hit_group) - | var_ph - ) - - # evaluate expression - res = eval(expr, var) - - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) - - # get unification condition if present in mode - if op is not None: - limarr = eval( - "".join(["res", op, "lim"]), - {"res": res, "lim": mode_lim}, + if "tcm.array_id" == expr: + res = np.full(len(out), int(ch[2:]), dtype=int) + else: + # find fields in either dsp, hit + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + res = eval( + expr.replace("dsp.", "dsp_") + .replace("hit.", "hit_") + .replace("evt.", "evt_"), + var, ) + + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(len(out), res, dtype=type(res)) + + # get sub evt based query condition if needed + if isinstance(qry, str): + qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) + qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) + limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) + + # or forward the array + elif isinstance(qry, np.ndarray): + limarr = qry + + # if no condition, it must be true else: limarr = np.ones(len(res)).astype(bool) + if limarr.dtype != bool: + limarr = limarr.astype(bool) # append to out according to mode == vov out[:, i][limarr] = res[limarr] out_chs[:, i][limarr] = int(ch[2:]) @@ -763,8 +848,6 @@ def build_evt( wo_mode: str = "write_safe", group: str = "/evt/", tcm_group: str = "/hardware_tcm_1/", - dsp_group: str = "/dsp/", - hit_group: str = "/hit/", ) -> None: """ Transform data from the hit and dsp levels which a channel sorted @@ -845,10 +928,6 @@ def build_evt( lh5 root group name tcm_group lh5 root group in tcm file - dsp_group - lh5 root group in dsp file - hit_group - lh5 root group in hit file """ lstore = store.LH5Store() @@ -907,15 +986,15 @@ def build_evt( log.debug("Processing field" + k) # if mode not defined in operation, it can only be an operation on the evt level. - if "mode" not in v.keys(): - exprl = re.findall(r"[a-zA-Z_$][\w$]*", v["expression"]) + if "aggregation_mode" not in v.keys(): + exprl = re.findall(r"(evt).([a-zA-Z_$][\w$]*)", v["expression"]) var = {} if os.path.exists(f_evt): - var = load_vars_to_nda(f_evt, group, exprl) + var = load_vars_to_nda(f_evt, "", exprl) if "parameters" in v.keys(): var = var | v["parameters"] - res = eval(v["expression"], var) + res = eval(v["expression"].replace("evt.", "evt_"), var) # now check what dimension we have after the evaluation if len(res.shape) == 1: @@ -950,9 +1029,11 @@ def build_evt( itertools.chain.from_iterable([chns[e] for e in v["channels"]]) ) - pars, defaultv = None, np.nan + pars, qry, defaultv = None, None, np.nan if "parameters" in v.keys(): pars = v["parameters"] + if "query" in v.keys(): + qry = v["query"] if "initial" in v.keys() and not v["initial"] == "np.nan": defaultv = v["initial"] @@ -962,13 +1043,12 @@ def build_evt( f_hit, f_dsp, chns_e, - v["mode"], + v["aggregation_mode"], v["expression"], nrows, group, - dsp_group, - hit_group, pars, + qry, defaultv, ) @@ -984,16 +1064,16 @@ def build_evt( # if get_ch flag is true and exists and result dic contains channels entry # write also channels information - if "get_ch" in v.keys() and v["get_ch"] and "channels" in result.keys(): - obj = result["channels"] - if isinstance(obj, np.ndarray): - obj = Array(result["channels"]) - lstore.write_object( - obj=obj, - name=group + k + "_id", - lh5_file=f_evt, - wo_mode=wo_mode, - ) + # if "get_ch" in v.keys() and v["get_ch"] and "channels" in result.keys(): + # obj = result["channels"] + # if isinstance(obj, np.ndarray): + # obj = Array(result["channels"]) + # lstore.write_object( + # obj=obj, + # name=group + k + "_id", + # lh5_file=f_evt, + # wo_mode=wo_mode, + # ) log.info("Done") diff --git a/tests/evt/configs/basic-evt-config.json b/tests/evt/configs/basic-evt-config.json index aa0b68456..c573c89fb 100644 --- a/tests/evt/configs/basic-evt-config.json +++ b/tests/evt/configs/basic-evt-config.json @@ -5,62 +5,67 @@ "operations": { "multiplicity": { "channels": "geds_on", - "mode": "tot", - "expression": "cuspEmax_ctc_cal > a", + "aggregation_mode": "sum", + "expression": "hit.cuspEmax_ctc_cal > a", "parameters": { "a": 25 }, "initial": 0 }, "energy": { "channels": "geds_on", - "mode": ["first>25", "tp_0_est"], - "get_ch": true, - "expression": "cuspEmax_ctc_cal", + "aggregation_mode": ["first", "dsp.tp_0_est"], + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "hit.cuspEmax_ctc_cal", "initial": "np.nan" }, + "energy_id": { + "channels": "geds_on", + "aggregation_mode": ["first", "dsp.tp_0_est"], + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "tcm.array_id", + "initial": 0 + }, "energy_any_above1MeV": { "channels": "geds_on", - "mode": "any", - "get_ch": true, - "expression": "cuspEmax_ctc_cal>1000", + "aggregation_mode": "any", + "expression": "hit.cuspEmax_ctc_cal>1000", "initial": false }, "energy_all_above1MeV": { "channels": "geds_on", - "mode": "all", - "get_ch": true, - "expression": "cuspEmax_ctc_cal>1000", + "aggregation_mode": "all", + "expression": "hit.cuspEmax_ctc_cal>1000", "initial": false }, "energy_aux": { "channels": "geds_on", - "mode": ["last>25", "tp_0_est"], - "get_ch": true, - "expression": "cuspEmax_ctc_cal", + "aggregation_mode": ["last", "dsp.tp_0_est"], + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "hit.cuspEmax_ctc_cal", "initial": "np.nan" }, "energy_sum": { "channels": "geds_on", - "mode": "tot>25", - "get_ch": true, - "expression": "cuspEmax_ctc_cal", + "aggregation_mode": "sum", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "hit.cuspEmax_ctc_cal", "initial": 0.0 }, "is_usable_aoe": { "channels": "geds_on", - "mode": "energy_id", + "aggregation_mode": "evt.energy_id", "expression": "True", "initial": false }, "aoe": { "channels": "geds_on", - "mode": "energy_id", - "expression": "AoE_Classifier", + "aggregation_mode": "evt.energy_id", + "expression": "hit.AoE_Classifier", "initial": "np.nan" }, "is_aoe_rejected": { "channels": "geds_on", - "mode": "energy_id", - "expression": "~(AoE_Double_Sided_Cut)", + "aggregation_mode": "evt.energy_id", + "expression": "~(hit.AoE_Double_Sided_Cut)", "initial": false } } diff --git a/tests/evt/configs/module-test-evt-config.json b/tests/evt/configs/module-test-evt-config.json index 8f084034a..4810b91e0 100644 --- a/tests/evt/configs/module-test-evt-config.json +++ b/tests/evt/configs/module-test-evt-config.json @@ -5,50 +5,57 @@ }, "operations": { "energy_first": { - "channels": ["geds_on"], - "mode": ["first>25", "tp_0_est"], - "get_ch": true, - "expression": "cuspEmax_ctc_cal", + "channels": "geds_on", + "aggregation_mode": ["first", "dsp.tp_0_est"], + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "hit.cuspEmax_ctc_cal", "initial": "np.nan" }, + "energy_first_id": { + "channels": "geds_on", + "aggregation_mode": ["first", "dsp.tp_0_est"], + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "tcm.array_id", + "initial": 0 + }, "t0": { "channels": ["geds_on"], - "mode": "energy_first_id", - "expression": "tp_0_est", + "aggregation_mode": "evt.energy_first_id", + "expression": "dsp.tp_0_est", "initial": 0.0 }, "lar_energy": { "channels": "spms_on", - "mode": "func", - "expression": ".modules.spm.get_energy(0.5,t0,48000,1000,5000)" + "aggregation_mode": "function", + "expression": ".modules.spm.get_energy(0.5,evt.t0,48000,1000,5000)" }, "lar_multiplicity": { "channels": "spms_on", - "mode": "func", - "expression": ".modules.spm.get_majority(0.5,t0,48000,1000,5000)" + "aggregation_mode": "function", + "expression": ".modules.spm.get_majority(0.5,evt.t0,48000,1000,5000)" }, "is_lar_rejected": { - "expression": "(lar_energy >4) | (lar_multiplicity > 4) " + "expression": "(evt.lar_energy >4) | (evt.lar_multiplicity > 4) " }, "lar_classifier": { "channels": "spms_on", - "mode": "func", - "expression": ".modules.spm.get_etc(0.5,t0,48000,100,6000,80,1)" + "aggregation_mode": "function", + "expression": ".modules.spm.get_etc(0.5,evt.t0,48000,100,6000,80,1)" }, "lar_energy_dplms": { "channels": "spms_on", - "mode": "func", - "expression": ".modules.spm.get_energy_dplms(0.5,t0,48000,1000,5000)" + "aggregation_mode": "function", + "expression": ".modules.spm.get_energy_dplms(0.5,evt.t0,48000,1000,5000)" }, "lar_multiplicity_dplms": { "channels": "spms_on", - "mode": "func", - "expression": ".modules.spm.get_majority_dplms(0.5,t0,48000,1000,5000)" + "aggregation_mode": "function", + "expression": ".modules.spm.get_majority_dplms(0.5,evt.t0,48000,1000,5000)" }, "lar_time_shift": { "channels": "spms_on", - "mode": "func", - "expression": ".modules.spm.get_time_shift(0.5,t0,48000,1000,5000)" + "aggregation_mode": "function", + "expression": ".modules.spm.get_time_shift(0.5,evt.t0,48000,1000,5000)" } } } diff --git a/tests/evt/configs/module-test-t0-vov-evt-config.json b/tests/evt/configs/module-test-t0-vov-evt-config.json index 436332409..06918a421 100644 --- a/tests/evt/configs/module-test-t0-vov-evt-config.json +++ b/tests/evt/configs/module-test-t0-vov-evt-config.json @@ -6,48 +6,54 @@ "operations": { "energy": { "channels": "geds_on", - "mode": "vov>25", - "get_ch": true, - "expression": "cuspEmax_ctc_cal" + "aggregation_mode": "vov", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "hit.cuspEmax_ctc_cal" + }, + "energy_id": { + "channels": "geds_on", + "aggregation_mode": "vov", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "tcm.array_id" }, "t0": { "channels": ["geds_on"], - "mode": "energy_id", - "expression": "tp_0_est", + "aggregation_mode": "evt.energy_id", + "expression": "dsp.tp_0_est", "initial": 0.0 }, "lar_energy": { "channels": "spms_on", - "mode": "func", - "expression": ".modules.spm.get_energy(0.5,t0,48000,1000,5000)" + "aggregation_mode": "function", + "expression": ".modules.spm.get_energy(0.5,evt.t0,48000,1000,5000)" }, "lar_multiplicity": { "channels": "spms_on", - "mode": "func", - "expression": ".modules.spm.get_majority(0.5,t0,48000,1000,5000)" + "aggregation_mode": "function", + "expression": ".modules.spm.get_majority(0.5,evt.t0,48000,1000,5000)" }, "is_lar_rejected": { - "expression": "(lar_energy >4) | (lar_multiplicity > 4) " + "expression": "(evt.lar_energy >4) | (evt.lar_multiplicity > 4) " }, "lar_classifier": { "channels": "spms_on", - "mode": "func", - "expression": ".modules.spm.get_etc(0.5,t0,48000,100,6000,80,1)" + "aggregation_mode": "function", + "expression": ".modules.spm.get_etc(0.5,evt.t0,48000,100,6000,80,1)" }, "lar_energy_dplms": { "channels": "spms_on", - "mode": "func", - "expression": ".modules.spm.get_energy_dplms(0.5,t0,48000,1000,5000)" + "aggregation_mode": "function", + "expression": ".modules.spm.get_energy_dplms(0.5,evt.t0,48000,1000,5000)" }, "lar_multiplicity_dplms": { "channels": "spms_on", - "mode": "func", - "expression": ".modules.spm.get_majority_dplms(0.5,t0,48000,1000,5000)" + "aggregation_mode": "function", + "expression": ".modules.spm.get_majority_dplms(0.5,evt.t0,48000,1000,5000)" }, "lar_time_shift": { "channels": "spms_on", - "mode": "func", - "expression": ".modules.spm.get_time_shift(0.5,t0,48000,1000,5000)" + "aggregation_mode": "function", + "expression": ".modules.spm.get_time_shift(0.5,evt.t0,48000,1000,5000)" } } } diff --git a/tests/evt/configs/query-test-evt-config.json b/tests/evt/configs/query-test-evt-config.json new file mode 100644 index 000000000..abbaa8da4 --- /dev/null +++ b/tests/evt/configs/query-test-evt-config.json @@ -0,0 +1,88 @@ +{ + "channels": { + "geds_on": ["ch1084803", "ch1084804", "ch1121600"] + }, + "operations":{ + "multiplicity": { + "channels": "geds_on", + "aggregation_mode": "sum", + "expression": "hit.cuspEmax_ctc_cal > a", + "parameters": { "a": 25 }, + "initial": 0 + }, + "test_sum": { + "channels": "geds_on", + "aggregation_mode": "sum", + "query":"evt.multiplicity == 1", + "expression": "True", + "initial": false + }, + "test_first": { + "channels": "geds_on", + "aggregation_mode": ["first", "dsp.tp_0_est"], + "query":"evt.multiplicity == 1", + "expression": "True", + "initial": false + }, + "test_first2": { + "channels": "geds_on", + "aggregation_mode": ["first", "dsp.tp_0_est"], + "expression": "True", + "initial": false + }, + "test_last": { + "channels": "geds_on", + "aggregation_mode": ["last", "dsp.tp_0_est"], + "query":"evt.multiplicity == 1", + "expression": "True", + "initial": false + }, + "test_last2": { + "channels": "geds_on", + "aggregation_mode": ["last", "dsp.tp_0_est"], + "expression": "True", + "initial": false + }, + "test_any": { + "channels": "geds_on", + "aggregation_mode": "any", + "query":"evt.multiplicity == 1", + "expression": "True", + "initial": false + }, + "test_any2": { + "channels": "geds_on", + "aggregation_mode": "any", + "query":"hit.cuspEmax_ctc_cal >25", + "expression": "True", + "initial": false + }, + "test_all": { + "channels": "geds_on", + "aggregation_mode": "all", + "query":"evt.multiplicity == 1", + "expression": "True", + "initial": false + }, + "test_all2": { + "channels": "geds_on", + "aggregation_mode": "all", + "query":"hit.cuspEmax_ctc_cal >25", + "expression": "True", + "initial": false + }, + "test_vov": { + "channels": "geds_on", + "aggregation_mode": "vov", + "query":"evt.multiplicity == 1", + "expression": "True", + "initial": false + }, + "test_vov2": { + "channels": "geds_on", + "aggregation_mode": "vov", + "expression": "True", + "initial": false + } + } +} \ No newline at end of file diff --git a/tests/evt/configs/vov-test-evt-config.json b/tests/evt/configs/vov-test-evt-config.json index d1bfc4120..6f057d18c 100644 --- a/tests/evt/configs/vov-test-evt-config.json +++ b/tests/evt/configs/vov-test-evt-config.json @@ -5,29 +5,35 @@ "operations": { "energy": { "channels": "geds_on", - "mode": "vov>25", - "get_ch": true, - "expression": "cuspEmax_ctc_cal" + "aggregation_mode": "vov", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "hit.cuspEmax_ctc_cal" + }, + "energy_id": { + "channels": "geds_on", + "aggregation_mode": "vov", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "tcm.array_id" }, "aoe": { - "mode": "energy_id", - "expression": "AoE_Classifier" + "aggregation_mode": "evt.energy_id", + "expression": "hit.AoE_Classifier" }, "multiplicity": { "channels": "geds_on", - "mode": "tot", - "expression": "cuspEmax_ctc_cal > a", + "aggregation_mode": "sum", + "expression": "hit.cuspEmax_ctc_cal > a", "parameters": { "a": 25 }, "initial": 0 }, "energy_times_aoe": { - "expression": "energy*aoe" + "expression": "evt.energy*evt.aoe" }, "energy_times_multiplicity": { - "expression": "energy*multiplicity" + "expression": "evt.energy*evt.multiplicity" }, "multiplicity_squared": { - "expression": "multiplicity*multiplicity" + "expression": "evt.multiplicity*evt.multiplicity" } } } diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 128833e5b..146fe5150 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -26,14 +26,10 @@ def test_basics(lgnd_test_data, tmptestdir): wo_mode="o", group="/evt/", tcm_group="hardware_tcm_1", - dsp_group="/dsp/", - hit_group="/hit/", ) assert os.path.exists(outfile) - assert ( - len(ls(outfile, "/evt/")) == 11 - ) # 7 operations of which 2 are requesting channel field + assert len(ls(outfile, "/evt/")) == 10 nda = load_nda( outfile, ["energy", "energy_aux", "energy_sum", "multiplicity"], "/evt/" ) @@ -154,7 +150,7 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): with pytest.raises(RuntimeError): build_evt(f_dsp, f_tcm, f_hit, outfile, f_config, meta_path) - with pytest.raises(NameError): + with pytest.raises(RuntimeError): build_evt(f_tcm, f_hit, f_dsp, outfile, f_config, meta_path) with pytest.raises(TypeError): @@ -168,57 +164,39 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): with pytest.raises(ValueError): build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) - conf = { - "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}, - "operations": {}, - } - build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) - assert not os.path.exists(outfile) - conf = { "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}, "operations": { - "energy": { + "foo": { "channels": "geds_on", - "mode": ["first>pineapple", "tp_0_est"], - "get_ch": True, - "expression": "cuspEmax_ctc_cal", - "initial": "np.nan", + "aggregation_mode": "banana", + "expression": "hit.cuspEmax_ctc_cal > a", + "parameters": {"a": 25}, + "initial": 0, } }, } with pytest.raises(ValueError): build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) - conf = { - "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}, - "operations": { - "energy": { - "channels": "geds_on", - "mode": ["first>25", "tp_0_est"], - "get_ch": True, - "expression": "cuspEmax_ctc_cal$cuspEmax_ctc_cal", - "initial": "np.nan", - } - }, - } - with pytest.raises(SyntaxError): - build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) - conf = { - "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}, - "operations": { - "energy": { - "channels": "geds_on", - "mode": ["first>25", "coconut"], - "get_ch": True, - "expression": "cuspEmax_ctc_cal", - "initial": "np.nan", - } - }, - } - with pytest.raises(ValueError): - build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) +def test_query(lgnd_test_data, tmptestdir): + outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" + tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" + if os.path.exists(outfile): + os.remove(outfile) + build_evt( + f_tcm=lgnd_test_data.get_path(tcm_path), + f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), + f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), + f_evt=outfile, + meta_path=None, + evt_config=f"{config_dir}/query-test-evt-config.json", + wo_mode="o", + group="/evt/", + tcm_group="hardware_tcm_1", + ) + assert len(ls(outfile, "/evt/")) == 12 def test_skimming(lgnd_test_data, tmptestdir): @@ -244,3 +222,6 @@ def test_skimming(lgnd_test_data, tmptestdir): skim_evt(outfile, "multiplicity == 3", None, None, "o") assert ac == len(lstore.read_object("/evt/energy", outfile)[0].to_aoesa().nda) + + with pytest.raises(ValueError): + skim_evt(outfile, "multiplicity == 3", None, None, "bla") From 5821aaf5095ee7047497a4ebfa92a5524d1cc246 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 28 Nov 2023 18:04:00 +0100 Subject: [PATCH 107/191] Moved channel obtaining by meta data to its own module --- src/pygama/evt/build_evt.py | 68 ++++--- src/pygama/evt/modules/legend_meta.py | 27 +++ tests/evt/configs/module-test-evt-config.json | 2 +- tests/evt/configs/query-test-evt-config.json | 170 +++++++++--------- tests/evt/test_build_evt.py | 21 +-- 5 files changed, 151 insertions(+), 137 deletions(-) create mode 100644 src/pygama/evt/modules/legend_meta.py diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index cc1e237e5..0288015a0 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -108,8 +108,13 @@ def evaluate_expression( if mode == "function": # evaluate expression func, params = expr.split("(") + params = ( + params.replace("dsp.", "dsp_") + .replace("hit.", "hit_") + .replace("evt.", "evt_") + ) params = [f_hit, f_dsp, f_tcm, chns] + [ - num_and_pars(e.replace(".", "_"), var_ph) for e in params[:-1].split(",") + num_and_pars(e, var_ph) for e in params[:-1].split(",") ] # load function dynamically @@ -844,7 +849,6 @@ def build_evt( f_hit: str, f_evt: str, evt_config: str | dict, - meta_path: str = None, wo_mode: str = "write_safe", group: str = "/evt/", tcm_group: str = "/hardware_tcm_1/", @@ -951,28 +955,31 @@ def build_evt( chns = {} for k, v in tbl_cfg["channels"].items(): - if isinstance(v, str): - # only import legend meta data when needed. - # LEGEND collaborators can use the meta keyword - # Why for users w/o access to the LEGEND meta data this is still working - if "meta" in v: - lm = import_module("legendmeta") - lmeta = lm.LegendMetadata(path=meta_path) - chmap = lmeta.channelmap(re.search(r"\d{8}T\d{6}Z", f_dsp).group(0)) - m, sys, usa = v.split("_", 2) - tmp = [ - f"ch{e}" - for e in chmap.map("daq.rawid") - if chmap.map("daq.rawid")[e]["system"] == sys - ] - chns[k] = [ - e - for e in tmp - if chmap.map("daq.rawid")[int(e[2:])]["analysis"]["usability"] - == usa - ] - else: - chns[k] = [v] + if isinstance(v, dict): + # it is a meta module. module_name must exist + if "module" not in v.keys(): + raise ValueError( + "Need module_name to load channel via a meta data module" + ) + + attr = {} + # the time_key argument is set to the time key of the DSP file + # in case it is not provided by the config + if "time_key" not in v.keys(): + attr["time_key"] = re.search(r"\d{8}T\d{6}Z", f_dsp).group(0) + + # if "None" do None + elif "None" == v["time_key"]: + attr["time_key"] = None + + # load module + p, m = v["module"].rsplit(".", 1) + met = getattr(import_module(p, package=__package__), m) + chns[k] = met(v | attr) + + elif isinstance(v, str): + chns[k] = [v] + elif isinstance(v, list): chns[k] = [e for e in v] @@ -1062,19 +1069,6 @@ def build_evt( wo_mode=wo_mode, ) - # if get_ch flag is true and exists and result dic contains channels entry - # write also channels information - # if "get_ch" in v.keys() and v["get_ch"] and "channels" in result.keys(): - # obj = result["channels"] - # if isinstance(obj, np.ndarray): - # obj = Array(result["channels"]) - # lstore.write_object( - # obj=obj, - # name=group + k + "_id", - # lh5_file=f_evt, - # wo_mode=wo_mode, - # ) - log.info("Done") diff --git a/src/pygama/evt/modules/legend_meta.py b/src/pygama/evt/modules/legend_meta.py new file mode 100644 index 000000000..d188c2a14 --- /dev/null +++ b/src/pygama/evt/modules/legend_meta.py @@ -0,0 +1,27 @@ +""" +Module for importing channel lists from LEGEND meta data +""" +from importlib import import_module + + +def legend_meta(params: dict) -> list: + # only import legend meta data when needed. + # LEGEND collaborators can use the meta keyword + # While for users w/o access to the LEGEND meta data this is still working + lm = import_module("legendmeta") + lmeta = lm.LegendMetadata(path=params["meta_path"]) + chmap = lmeta.channelmap(params["time_key"]) + tmp = [ + f"ch{e}" + for e in chmap.map("daq.rawid") + if chmap.map("daq.rawid")[e]["system"] == params["system"] + ] + if "usability" not in params.keys(): + return tmp + else: + return [ + e + for e in tmp + if chmap.map("daq.rawid")[int(e[2:])]["analysis"]["usability"] + == params["usability"] + ] diff --git a/tests/evt/configs/module-test-evt-config.json b/tests/evt/configs/module-test-evt-config.json index 4810b91e0..d4d6c1148 100644 --- a/tests/evt/configs/module-test-evt-config.json +++ b/tests/evt/configs/module-test-evt-config.json @@ -27,7 +27,7 @@ "lar_energy": { "channels": "spms_on", "aggregation_mode": "function", - "expression": ".modules.spm.get_energy(0.5,evt.t0,48000,1000,5000)" + "expression": "pygama.evt.modules.spm.get_energy(0.5,evt.t0,48000,1000,5000)" }, "lar_multiplicity": { "channels": "spms_on", diff --git a/tests/evt/configs/query-test-evt-config.json b/tests/evt/configs/query-test-evt-config.json index abbaa8da4..0bf7fe4f9 100644 --- a/tests/evt/configs/query-test-evt-config.json +++ b/tests/evt/configs/query-test-evt-config.json @@ -1,88 +1,88 @@ { - "channels": { - "geds_on": ["ch1084803", "ch1084804", "ch1121600"] + "channels": { + "geds_on": ["ch1084803", "ch1084804", "ch1121600"] + }, + "operations": { + "multiplicity": { + "channels": "geds_on", + "aggregation_mode": "sum", + "expression": "hit.cuspEmax_ctc_cal > a", + "parameters": { "a": 25 }, + "initial": 0 }, - "operations":{ - "multiplicity": { - "channels": "geds_on", - "aggregation_mode": "sum", - "expression": "hit.cuspEmax_ctc_cal > a", - "parameters": { "a": 25 }, - "initial": 0 - }, - "test_sum": { - "channels": "geds_on", - "aggregation_mode": "sum", - "query":"evt.multiplicity == 1", - "expression": "True", - "initial": false - }, - "test_first": { - "channels": "geds_on", - "aggregation_mode": ["first", "dsp.tp_0_est"], - "query":"evt.multiplicity == 1", - "expression": "True", - "initial": false - }, - "test_first2": { - "channels": "geds_on", - "aggregation_mode": ["first", "dsp.tp_0_est"], - "expression": "True", - "initial": false - }, - "test_last": { - "channels": "geds_on", - "aggregation_mode": ["last", "dsp.tp_0_est"], - "query":"evt.multiplicity == 1", - "expression": "True", - "initial": false - }, - "test_last2": { - "channels": "geds_on", - "aggregation_mode": ["last", "dsp.tp_0_est"], - "expression": "True", - "initial": false - }, - "test_any": { - "channels": "geds_on", - "aggregation_mode": "any", - "query":"evt.multiplicity == 1", - "expression": "True", - "initial": false - }, - "test_any2": { - "channels": "geds_on", - "aggregation_mode": "any", - "query":"hit.cuspEmax_ctc_cal >25", - "expression": "True", - "initial": false - }, - "test_all": { - "channels": "geds_on", - "aggregation_mode": "all", - "query":"evt.multiplicity == 1", - "expression": "True", - "initial": false - }, - "test_all2": { - "channels": "geds_on", - "aggregation_mode": "all", - "query":"hit.cuspEmax_ctc_cal >25", - "expression": "True", - "initial": false - }, - "test_vov": { - "channels": "geds_on", - "aggregation_mode": "vov", - "query":"evt.multiplicity == 1", - "expression": "True", - "initial": false - }, - "test_vov2": { - "channels": "geds_on", - "aggregation_mode": "vov", - "expression": "True", - "initial": false - } + "test_sum": { + "channels": "geds_on", + "aggregation_mode": "sum", + "query": "evt.multiplicity == 1", + "expression": "True", + "initial": false + }, + "test_first": { + "channels": "geds_on", + "aggregation_mode": ["first", "dsp.tp_0_est"], + "query": "evt.multiplicity == 1", + "expression": "True", + "initial": false + }, + "test_first2": { + "channels": "geds_on", + "aggregation_mode": ["first", "dsp.tp_0_est"], + "expression": "True", + "initial": false + }, + "test_last": { + "channels": "geds_on", + "aggregation_mode": ["last", "dsp.tp_0_est"], + "query": "evt.multiplicity == 1", + "expression": "True", + "initial": false + }, + "test_last2": { + "channels": "geds_on", + "aggregation_mode": ["last", "dsp.tp_0_est"], + "expression": "True", + "initial": false + }, + "test_any": { + "channels": "geds_on", + "aggregation_mode": "any", + "query": "evt.multiplicity == 1", + "expression": "True", + "initial": false + }, + "test_any2": { + "channels": "geds_on", + "aggregation_mode": "any", + "query": "hit.cuspEmax_ctc_cal >25", + "expression": "True", + "initial": false + }, + "test_all": { + "channels": "geds_on", + "aggregation_mode": "all", + "query": "evt.multiplicity == 1", + "expression": "True", + "initial": false + }, + "test_all2": { + "channels": "geds_on", + "aggregation_mode": "all", + "query": "hit.cuspEmax_ctc_cal >25", + "expression": "True", + "initial": false + }, + "test_vov": { + "channels": "geds_on", + "aggregation_mode": "vov", + "query": "evt.multiplicity == 1", + "expression": "True", + "initial": false + }, + "test_vov2": { + "channels": "geds_on", + "aggregation_mode": "vov", + "expression": "True", + "initial": false } -} \ No newline at end of file + } +} diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 146fe5150..e7cae4e4d 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -21,7 +21,6 @@ def test_basics(lgnd_test_data, tmptestdir): f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), f_evt=outfile, - meta_path=None, evt_config=f"{config_dir}/basic-evt-config.json", wo_mode="o", group="/evt/", @@ -57,7 +56,6 @@ def test_lar_module(lgnd_test_data, tmptestdir): f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), f_evt=outfile, - meta_path=None, evt_config=f"{config_dir}/module-test-evt-config.json", wo_mode="o", group="/evt/", @@ -85,7 +83,6 @@ def test_lar_t0_vov_module(lgnd_test_data, tmptestdir): f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), f_evt=outfile, - meta_path=None, evt_config=f"{config_dir}/module-test-t0-vov-evt-config.json", wo_mode="o", group="/evt/", @@ -112,7 +109,6 @@ def test_vov(lgnd_test_data, tmptestdir): f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), f_evt=outfile, - meta_path=None, evt_config=f"{config_dir}/vov-test-evt-config.json", wo_mode="o", group="/evt/", @@ -144,25 +140,24 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): f_tcm = lgnd_test_data.get_path(tcm_path) f_dsp = lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")) f_hit = lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")) - meta_path = None f_config = f"{config_dir}/basic-evt-config.json" with pytest.raises(RuntimeError): - build_evt(f_dsp, f_tcm, f_hit, outfile, f_config, meta_path) + build_evt(f_dsp, f_tcm, f_hit, outfile, f_config) with pytest.raises(RuntimeError): - build_evt(f_tcm, f_hit, f_dsp, outfile, f_config, meta_path) + build_evt(f_tcm, f_hit, f_dsp, outfile, f_config) with pytest.raises(TypeError): - build_evt(f_tcm, f_dsp, f_hit, outfile, None, meta_path) + build_evt(f_tcm, f_dsp, f_hit, outfile, None) conf = {"operations": {}} with pytest.raises(ValueError): - build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) + build_evt(f_tcm, f_dsp, f_hit, outfile, conf) conf = {"channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}} with pytest.raises(ValueError): - build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) + build_evt(f_tcm, f_dsp, f_hit, outfile, conf) conf = { "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}, @@ -177,7 +172,7 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): }, } with pytest.raises(ValueError): - build_evt(f_tcm, f_dsp, f_hit, outfile, conf, meta_path) + build_evt(f_tcm, f_dsp, f_hit, outfile, conf) def test_query(lgnd_test_data, tmptestdir): @@ -190,7 +185,6 @@ def test_query(lgnd_test_data, tmptestdir): f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), f_evt=outfile, - meta_path=None, evt_config=f"{config_dir}/query-test-evt-config.json", wo_mode="o", group="/evt/", @@ -207,9 +201,8 @@ def test_skimming(lgnd_test_data, tmptestdir): f_tcm = lgnd_test_data.get_path(tcm_path) f_dsp = lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")) f_hit = lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")) - meta_path = None f_config = f"{config_dir}/vov-test-evt-config.json" - build_evt(f_tcm, f_dsp, f_hit, outfile, f_config, meta_path) + build_evt(f_tcm, f_dsp, f_hit, outfile, f_config) lstore = store.LH5Store() ac = lstore.read_object("/evt/multiplicity", outfile)[0].nda From 8e2bab13825f9dd17cf0223445afbb2ab68570ce Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 29 Nov 2023 19:29:39 +0100 Subject: [PATCH 108/191] many things --- src/pygama/evt/build_evt.py | 918 ++++++++++++------ src/pygama/evt/modules/legend_meta.py | 13 +- tests/evt/configs/basic-evt-config.json | 12 +- tests/evt/configs/module-test-evt-config.json | 8 +- .../module-test-t0-vov-evt-config.json | 8 +- tests/evt/configs/query-test-evt-config.json | 12 +- tests/evt/configs/vov-test-evt-config.json | 6 +- 7 files changed, 633 insertions(+), 344 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 0288015a0..249d08c2f 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -1,6 +1,7 @@ """ This module implements routines to build the evt tier. """ + from __future__ import annotations import itertools @@ -41,10 +42,10 @@ def evaluate_expression( f_hit: str, f_dsp: str, chns: list, - mod: str | list, + chns_rm: list, + mode: str, expr: str, nrows: int, - group: str, para: dict = None, qry: str = None, defv=np.nan, @@ -64,39 +65,29 @@ def evaluate_expression( Path to dsp tier file chns List of channel names across which expression gets evaluated (form: "ch") + chns_rm + List of channels which get set to default value during evaluation. In function mode they are removed entirely (form: "ch") mode The mode determines how the event entry is calculated across channels. Options are: - - "first": The value of the channel in an event triggering first in time (according to tp_0_est) is returned. It is possible to add a condition (e.g. "first>10"). Only channels fulfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, nan is returned for this event. - - "last": The value of the channel in an event triggering last in time (according to tp_0_est) is returned. It is possible to add a condition (e.g. "last>10"). Only channels fulfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, nan is returned for this event. - - "tot": The sum of all channels across an event. It is possible to add a condition (e.g. "tot>10"). Only channels fulfilling this condition are considered in the time evaluation. If no channel fullfilles the condition, zero is returned for this event. Booleans are treated as integers 0/1. - - "any": Logical or between all channels. Non boolean values are True for values != 0 and False for values == 0. - - "all": Logical and between all channels. Non boolean values are True for values != 0 and False for values == 0. - - ch_field: A previously generated channel_id field (i.e. from the get_ch flag) can be given here, and the value of this specific channels is used. if ch_field is a VectorOfVectors, the channel list is ignored. If ch_field is an Array, the intersection of the passed channels list and the Array is formed. If a channel is not in the Array, the default is used. - - "vov": Channels are not combined, but result saved as VectorOfVectors. Use of getch is recommended. It is possible (and recommended) to add a condition (e.g. "vov>10"). Only channels fulfilling this condition are saved. + - "first_at:sorter": aggregates across channels by returning the expression of the channel with smallest value of sorter. + - "last_at": aggregates across channels by returning the expression of the channel with largest value of sorter. + - "sum": aggregates by summation. + - "any": aggregates by logical or. + - "all": aggregates by logical and. + - "keep_at:ch_field": aggregates according to passed ch_field + - "vectorize": Channels are not combined, but result saved as VectorOfVectors. qry - A query that can set a condition on mode. Can be any tier (i.e. a channelxevents shaped boolean matrix for tiers below event or an events long boolean array at the evt level) + A query that can mask the aggregation. expr - The expression. That can be any mathematical equation/comparison. If mode == func, the expression needs to be a special processing function defined in modules (e.g. "modules.spm.get_energy). In the expression parameters from either hit, dsp, evt tier (from operations performed before this one! --> JSON operations order matters), or from the "parameters" field can be used. + The expression. That can be any mathematical equation/comparison. If mode == function, the expression needs to be a special processing function defined in modules (e.g. "modules.spm.get_energy). In the expression parameters from either hit, dsp, evt tier (from operations performed before this one! --> JSON operations order matters), or from the "parameters" field can be used. nrows Number of rows to be processed. - group - lh5 root group name - dsp_group - lh5 root group in dsp file - hit_group - lh5 root group in hit file para Dictionary of parameters defined in the "parameters" field in the configuration JSON file. defv default value of evaluation """ - # set modus variables - mode, sorter = mod, None - if isinstance(mod, list): - mode = mod[0] - sorter = mod[1].split(".") - # find parameters in evt file or in parameters exprl = re.findall(r"(evt|hit|dsp).([a-zA-Z_$][\w$]*)", expr) var_ph = {} @@ -113,7 +104,7 @@ def evaluate_expression( .replace("hit.", "hit_") .replace("evt.", "evt_") ) - params = [f_hit, f_dsp, f_tcm, chns] + [ + params = [f_hit, f_dsp, f_tcm, [x for x in chns if x not in chns_rm]] + [ num_and_pars(e, var_ph) for e in params[:-1].split(",") ] @@ -145,12 +136,13 @@ def evaluate_expression( # switch through modes if ( os.path.exists(f_evt) - and "evt." == mode[:4] - and mode.split(".")[-1] + and "keep_at:" == mode[:8] + and "evt." == mode[8:][:4] + and mode[8:].split(".")[-1] in [e.split("/")[-1] for e in store.ls(f_evt, "/evt/")] ): lstore = store.LH5Store() - ch_comp, _ = lstore.read_object(mode.replace(".", "/"), f_evt) + ch_comp, _ = lstore.read_object(mode[8:].replace(".", "/"), f_evt) if isinstance(ch_comp, Array): return evaluate_at_channel( idx, @@ -158,6 +150,7 @@ def evaluate_expression( f_hit, f_dsp, chns, + chns_rm, expr, exprl, ch_comp, @@ -173,7 +166,9 @@ def evaluate_expression( expr, exprl, ch_comp, + chns_rm, var_ph, + defv, ) else: raise NotImplementedError( @@ -181,13 +176,19 @@ def evaluate_expression( + " not supported (only Array and VectorOfVectors are supported)" ) - elif "first" == mode: + elif "first_at:" in mode: + sorter = tuple( + re.findall( + r"(evt|hit|dsp).([a-zA-Z_$][\w$]*)", mode.split("first_at:")[-1] + )[0] + ) return evaluate_to_first( idx, ids, f_hit, f_dsp, chns, + chns_rm, expr, exprl, qry_mask, @@ -196,13 +197,19 @@ def evaluate_expression( var_ph, defv, ) - elif "last" == mode: + elif "last_at:" in mode: + sorter = tuple( + re.findall( + r"(evt|hit|dsp).([a-zA-Z_$][\w$]*)", mode.split("last_at:")[-1] + )[0] + ) return evaluate_to_last( idx, ids, f_hit, f_dsp, chns, + chns_rm, expr, exprl, qry_mask, @@ -218,6 +225,7 @@ def evaluate_expression( f_hit, f_dsp, chns, + chns_rm, expr, exprl, qry_mask, @@ -225,18 +233,20 @@ def evaluate_expression( var_ph, defv, ) - elif "vov" == mode: + elif "vectorize" == mode: return evaluate_to_vector( idx, ids, f_hit, f_dsp, chns, + chns_rm, expr, exprl, qry_mask, nrows, var_ph, + defv, ) elif "any" == mode: return evaluate_to_any( @@ -245,6 +255,7 @@ def evaluate_expression( f_hit, f_dsp, chns, + chns_rm, expr, exprl, qry_mask, @@ -259,6 +270,7 @@ def evaluate_expression( f_hit, f_dsp, chns, + chns_rm, expr, exprl, qry_mask, @@ -277,6 +289,23 @@ def find_parameters( idx_ch: np.ndarray, exprl: list, ) -> dict: + """ + Wraps :func:`load_vars_to_nda` to return parameters from hit and dsp tiers. + + Parameters + ---------- + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + ch + rawid in the tiers + idx_ch + index array of entries to be read from files + exprl + list of tuples (tier, field) to be found in the hit/dsp tiers + """ + # find fields in either dsp, hit var = load_vars_to_nda(f_hit, ch, exprl, idx_ch) dsp_dic = load_vars_to_nda(f_dsp, ch, exprl, idx_ch) @@ -284,22 +313,33 @@ def find_parameters( return dsp_dic | var -def load_vars_to_nda( - f_evt: str, group: str, exprl: list, idx: np.ndarray = None -) -> dict: +def load_vars_to_nda(f: str, group: str, exprl: list, idx: np.ndarray = None) -> dict: + """ + Maps parameter expressions to parameters if found in f. + Blows up VectorOfVectors to ArrayOfEqualSizedArrays. + + Parameters + ---------- + f + Path to a LGDO file + group + additional group in f + idx + index array of entries to be read from files + exprl + list of parameter-tuples (root_group, field) to be found in f + """ + lstore = store.LH5Store() var = { f"{e[0]}_{e[1]}": lstore.read_object( f"{group.replace('/','')}/{e[0]}/{e[1]}", - f_evt, + f, idx=idx, )[0] for e in exprl if e[1] - in [ - x.split("/")[-1] - for x in store.ls(f_evt, f"{group.replace('/','')}/{e[0]}/") - ] + in [x.split("/")[-1] for x in store.ls(f, f"{group.replace('/','')}/{e[0]}/")] } # to make any operations to VoVs we have to blow it up to a table (future change to more intelligant way) @@ -326,20 +366,164 @@ def load_vars_to_nda( return var +def get_data_at_channel( + ch: str, + idx_ch: np.ndarray, + expr: str, + exprl: list, + var_ph: dict, + is_evaluated: bool, + f_hit: str, + f_dsp: str, + outsize: int, + defv, +) -> np.ndarray: + """ + Evaluates an expression and returns the result + + Parameters + ---------- + ch + rawid of channel to be evaluated + idx_ch + array of indices to be evaluated + expr + expression to be evaluated + exprl + list of parameter-tuples (root_group, field) found in the expression + var_ph + dict of additional parameters that are not channel dependent + is_evaluated + if false, the expression does not get evaluated but an array of default values is returned + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + outsize + size of the return array + defv + default value + """ + + if not is_evaluated: + res = np.full(outsize, defv, dtype=type(defv)) + elif "tcm.array_id" == expr: + res = np.full(outsize, int(ch[2:]), dtype=int) + else: + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + + # evaluate expression + # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) + res = eval( + expr.replace("dsp.", "dsp_") + .replace("hit.", "hit_") + .replace("evt.", "evt_"), + var, + ) + + # if it is not a nparray it could be a single value + # expand accordingly + if not isinstance(res, np.ndarray): + res = np.full(outsize, res, dtype=type(res)) + + return res + + +def get_mask_from_query( + qry: str | np.ndarray, + length: int, + ch: str, + idx_ch: np.ndarray, + f_hit: str, + f_dsp: str, +) -> np.ndarray: + """ + Evaluates an query expression and returns a mask accordingly + + Parameters + ---------- + qry + query expression + length + length of the return mask + ch + rawid of channel to be evaluated + idx_ch + array of indices to be evaluated + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + """ + + # get sub evt based query condition if needed + if isinstance(qry, str): + qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) + qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) + limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) + + # or forward the array + elif isinstance(qry, np.ndarray): + limarr = qry + + # if no condition, it must be true + else: + limarr = np.ones(length).astype(bool) + + if limarr.dtype != bool: + limarr = limarr.astype(bool) + + return limarr + + def evaluate_to_first( idx: np.ndarray, ids: np.ndarray, f_hit: str, f_dsp: str, chns: list, + chns_rm: list, expr: str, exprl: list, qry: str | np.ndarray, nrows: int, - sorter: list, + sorter: tuple, var_ph: dict = None, defv=np.nan, ) -> dict: + """ + aggregates across channels by returning the expression of the channel with smallest value of sorter. + + Parameters + ---------- + idx + tcm index array + ids + tcm id array + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + chns + list of channels to be aggregated + chns_rm + list of channels to be skipped from evaluation and set to default value + expr + expression string to be evaluated + exprl + list of dsp/hit/evt parameter tuples in expression (tier,field) + qry + query expression to mask aggregation + nrows + length of output array + sorter + tuple of field in hit/dsp/evt tier to evaluate (tier,field) + var_ph + dictionary of evt and additional parameters and their values + defv + default value + """ + # define dimension of output array out = np.full(nrows, defv, dtype=type(defv)) out_chs = np.zeros(len(out), dtype=int) @@ -349,41 +533,22 @@ def evaluate_to_first( # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - if "tcm.array_id" == expr: - res = np.full(len(out), int(ch[2:]), dtype=int) - else: - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph - - # evaluate expression - # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) - res = eval( - expr.replace("dsp.", "dsp_") - .replace("hit.", "hit_") - .replace("evt.", "evt_"), - var, - ) - - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) - - # get sub evt based query condition if needed - if isinstance(qry, str): - qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) - qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) - limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) - - # or forward the array - elif isinstance(qry, np.ndarray): - limarr = qry - - # if no condition, it must be true - else: - limarr = np.ones(len(res)).astype(bool) + # evaluate at channel + res = get_data_at_channel( + ch, + idx_ch, + expr, + exprl, + var_ph, + ch not in chns_rm, + f_hit, + f_dsp, + len(out), + defv, + ) - if limarr.dtype != bool: - limarr = limarr.astype(bool) + # get mask from query + limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) # append to out according to mode == first if ch == chns[0]: @@ -410,14 +575,48 @@ def evaluate_to_last( f_hit: str, f_dsp: str, chns: list, + chns_rm: list, expr: str, exprl: list, qry: str | np.ndarray, nrows: int, - sorter: list, + sorter: tuple, var_ph: dict = None, defv=np.nan, ) -> dict: + """ + aggregates across channels by returning the expression of the channel with largest value of sorter. + + Parameters + ---------- + idx + tcm index array + ids + tcm id array + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + chns + list of channels to be aggregated + chns_rm + list of channels to be skipped from evaluation and set to default value + expr + expression string to be evaluated + exprl + list of dsp/hit/evt parameter tuples in expression (tier,field) + qry + query expression to mask aggregation + nrows + length of output array + sorter + tuple of field in hit/dsp/evt tier to evaluate (tier,field) + var_ph + dictionary of evt and additional parameters and their values + defv + default value + """ + # define dimension of output array out = np.full(nrows, defv, dtype=type(defv)) out_chs = np.zeros(len(out), dtype=int) @@ -426,41 +625,24 @@ def evaluate_to_last( for ch in chns: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - if "tcm.array_id" == expr: - res = np.full(len(out), int(ch[2:]), dtype=int) - else: - # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph - - # evaluate expression - # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) - res = eval( - expr.replace("dsp.", "dsp_") - .replace("hit.", "hit_") - .replace("evt.", "evt_"), - var, - ) - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) - - # get sub evt based query condition if needed - if isinstance(qry, str): - qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) - qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) - limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) + # evaluate at channel + res = get_data_at_channel( + ch, + idx_ch, + expr, + exprl, + var_ph, + ch not in chns_rm, + f_hit, + f_dsp, + len(out), + defv, + ) - # or forward the array - elif isinstance(qry, np.ndarray): - limarr = qry + # get mask from query + limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) - # if no condition, it must be true - else: - limarr = np.ones(len(res)).astype(bool) - if limarr.dtype != bool: - limarr = limarr.astype(bool) # append to out according to mode == last # find if sorter is in hit or dsp t0 = store.load_nda( @@ -483,6 +665,7 @@ def evaluate_to_tot( f_hit: str, f_dsp: str, chns: list, + chns_rm: list, expr: str, exprl: list, qry: str | np.ndarray, @@ -490,6 +673,37 @@ def evaluate_to_tot( var_ph: dict = None, defv=np.nan, ) -> dict: + """ + aggregates by summation across channels. + + Parameters + ---------- + idx + tcm index array + ids + tcm id array + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + chns + list of channels to be aggregated + chns_rm + list of channels to be skipped from evaluation and set to default value + expr + expression string to be evaluated + exprl + list of dsp/hit/evt parameter tuples in expression (tier,field) + qry + query expression to mask aggregation + nrows + length of output array + var_ph + dictionary of evt and additional parameters and their values + defv + default value + """ + # define dimension of output array out = np.full(nrows, defv, dtype=type(defv)) @@ -497,45 +711,26 @@ def evaluate_to_tot( # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - if "tcm.array_id" == expr: - res = np.full(len(out), int(ch[2:]), dtype=int) - else: - # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph - - # evaluate expression - # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) - res = eval( - expr.replace("dsp.", "dsp_") - .replace("hit.", "hit_") - .replace("evt.", "evt_"), - var, - ) - - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) - - # get sub evt based query condition if needed - if isinstance(qry, str): - qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) - qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) - limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) - - # or forward the array - elif isinstance(qry, np.ndarray): - limarr = qry + res = get_data_at_channel( + ch, + idx_ch, + expr, + exprl, + var_ph, + ch not in chns_rm, + f_hit, + f_dsp, + len(out), + defv, + ) - # if no condition, it must be true - else: - limarr = np.ones(len(res)).astype(bool) + # get mask from query + limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) # append to out according to mode == tot if res.dtype == bool: res = res.astype(int) - if limarr.dtype != bool: - limarr = limarr.astype(bool) + out[idx_ch] = np.where(limarr, res + out[idx_ch], out[idx_ch]) return {"values": out} @@ -547,6 +742,7 @@ def evaluate_to_any( f_hit: str, f_dsp: str, chns: list, + chns_rm: list, expr: str, exprl: list, qry: str | np.ndarray, @@ -554,6 +750,37 @@ def evaluate_to_any( var_ph: dict = None, defv=np.nan, ) -> dict: + """ + aggregates by logical or operation across channels. If the expression evaluates to a non boolean value it is casted to bool. + + Parameters + ---------- + idx + tcm index array + ids + tcm id array + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + chns + list of channels to be aggregated + chns_rm + list of channels to be skipped from evaluation and set to default value + expr + expression string to be evaluated + exprl + list of dsp/hit/evt parameter tuples in expression (tier,field) + qry + query expression to mask aggregation + nrows + length of output array + var_ph + dictionary of evt and additional parameters and their values + defv + default value + """ + # define dimension of output array out = np.full(nrows, defv, dtype=type(defv)) @@ -561,45 +788,26 @@ def evaluate_to_any( # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - if "tcm.array_id" == expr: - res = np.full(len(out), int(ch[2:]), dtype=int) - else: - # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph - - # evaluate expression - # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) - res = eval( - expr.replace("dsp.", "dsp_") - .replace("hit.", "hit_") - .replace("evt.", "evt_"), - var, - ) - - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) - - # get sub evt based query condition if needed - if isinstance(qry, str): - qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) - qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) - limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) - - # or forward the array - elif isinstance(qry, np.ndarray): - limarr = qry + res = get_data_at_channel( + ch, + idx_ch, + expr, + exprl, + var_ph, + ch not in chns_rm, + f_hit, + f_dsp, + len(out), + defv, + ) - # if no condition, it must be true - else: - limarr = np.ones(len(res)).astype(bool) + # get mask from query + limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) # append to out according to mode == any if res.dtype != bool: res = res.astype(bool) - if limarr.dtype != bool: - limarr = limarr.astype(bool) + out[idx_ch] = out[idx_ch] | (res & limarr) return {"values": out} @@ -611,6 +819,7 @@ def evaluate_to_all( f_hit: str, f_dsp: str, chns: list, + chns_rm: list, expr: str, exprl: list, qry: str | np.ndarray, @@ -618,6 +827,37 @@ def evaluate_to_all( var_ph: dict = None, defv=np.nan, ) -> dict: + """ + aggregates by logical and operation across channels. If the expression evaluates to a non boolean value it is casted to bool. + + Parameters + ---------- + idx + tcm index array + ids + tcm id array + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + chns + list of channels to be aggregated + chns_rm + list of channels to be skipped from evaluation and set to default value + expr + expression string to be evaluated + exprl + list of dsp/hit/evt parameter tuples in expression (tier,field) + qry + query expression to mask aggregation + nrows + length of output array + var_ph + dictionary of evt and additional parameters and their values + defv + default value + """ + # define dimension of output array out = np.full(nrows, defv, dtype=type(defv)) @@ -625,44 +865,26 @@ def evaluate_to_all( # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - if "tcm.array_id" == expr: - res = np.full(len(out), int(ch[2:]), dtype=int) - else: - # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph - - # evaluate expression - res = eval( - expr.replace("dsp.", "dsp_") - .replace("hit.", "hit_") - .replace("evt.", "evt_"), - var, - ) - - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) - - # get sub evt based query condition if needed - if isinstance(qry, str): - qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) - qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) - limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) - - # or forward the array - elif isinstance(qry, np.ndarray): - limarr = qry + res = get_data_at_channel( + ch, + idx_ch, + expr, + exprl, + var_ph, + ch not in chns_rm, + f_hit, + f_dsp, + len(out), + defv, + ) - # if no condition, it must be true - else: - limarr = np.ones(len(res)).astype(bool) + # get mask from query + limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) # append to out according to mode == all if res.dtype != bool: res = res.astype(bool) - if limarr.dtype != bool: - limarr = limarr.astype(bool) + out[idx_ch] = out[idx_ch] & res & limarr return {"values": out} @@ -674,36 +896,60 @@ def evaluate_at_channel( f_hit: str, f_dsp: str, chns: list, + chns_rm: list, expr: str, exprl: list, ch_comp: Array, var_ph: dict = None, defv=np.nan, ) -> dict: + """ + aggregates by evaluating the expression at a given channel + + Parameters + ---------- + idx + tcm index array + ids + tcm id array + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + chns + list of channels to be aggregated + chns_rm + list of channels to be skipped from evaluation and set to default value + expr + expression string to be evaluated + exprl + list of dsp/hit/evt parameter tuples in expression (tier,field) + ch_comp + array of rawids at which the expression is evaluated + var_ph + dictionary of evt and additional parameters and their values + defv + default value + """ + out = np.full(len(ch_comp), defv, dtype=type(defv)) for ch in chns: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - if "tcm.array_id" == expr: - res = np.full(len(out), int(ch[2:]), dtype=int) - else: - # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph - - # evaluate expression - res = eval( - expr.replace("dsp.", "dsp_") - .replace("hit.", "hit_") - .replace("evt.", "evt_"), - var, - ) - - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) + res = get_data_at_channel( + ch, + idx_ch, + expr, + exprl, + var_ph, + ch not in chns_rm, + f_hit, + f_dsp, + len(out), + defv, + ) out[idx_ch] = np.where(int(ch[2:]) == ch_comp.nda, res, out[idx_ch]) @@ -718,43 +964,68 @@ def evaluate_at_channel_vov( expr: str, exprl: list, ch_comp: VectorOfVectors, + chns_rm: list, var_ph: dict = None, + defv=np.nan, ) -> dict: + """ + same as :func:`evaluate_at_channel` but evaluates expression at non flat channels VectorOfVectors. + + Parameters + ---------- + idx + tcm index array + ids + tcm id array + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + expr + expression string to be evaluated + exprl + list of dsp/hit/evt parameter tuples in expression (tier,field) + ch_comp + array of rawids at which the expression is evaluated + chns_rm + list of channels to be skipped from evaluation and set to default value + var_ph + dictionary of evt and additional parameters and their values + defv + default value + """ + # blow up vov to aoesa out = ch_comp.to_aoesa().nda chns = np.unique(out[~np.isnan(out)]).astype(int) - + type_name = None for ch in chns: # get index list for this channel to be loaded idx_ch = idx[ids == ch] - - if "tcm.array_id" == expr: - res = np.full(len(out), int(ch[2:]), dtype=int) - else: - # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, f"ch{ch}", idx_ch, exprl) | var_ph - - # evaluate expression - res = eval( - expr.replace("dsp.", "dsp_") - .replace("hit.", "hit_") - .replace("evt.", "evt_"), - var, - ) - - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) + res = get_data_at_channel( + f"ch{ch}", + idx_ch, + expr, + exprl, + var_ph, + f"ch{ch}" not in chns_rm, + f_hit, + f_dsp, + len(out), + defv, + ) # see in which events the current channel is present mask = (out == ch).any(axis=1) out[out == ch] = res[mask] + if ch == chns[0]: + type_name = res.dtype + # ok now implode the table again out = VectorOfVectors( - flattened_data=out.flatten()[~np.isnan(out.flatten())].astype(res.dtype), + flattened_data=out.flatten()[~np.isnan(out.flatten())].astype(type_name), cumulative_length=np.cumsum(np.count_nonzero(~np.isnan(out), axis=1)), ) return {"values": out, "channels": ch_comp} @@ -766,18 +1037,46 @@ def evaluate_to_vector( f_hit: str, f_dsp: str, chns: list, + chns_rm: list, expr: str, exprl: list, qry: str | np.ndarray, nrows: int, var_ph: dict = None, + defv=np.nan, ) -> dict: """ - Allows the evaluation as a vector of vectors. - Returns a dictionary of values: VoV of requested values - and channels: VoV of same dimensions with requested channel_id + Aggregates by returning a VectorOfVector of evaluated expressions of channels that fulfill a query expression. + + Parameters + ---------- + idx + tcm index array + ids + tcm id array + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + chns + list of channels to be aggregated + chns_rm + list of channels to be skipped from evaluation and set to default value + expr + expression string to be evaluated + exprl + list of dsp/hit/evt parameter tuples in expression (tier,field) + qry + query expression to mask aggregation + nrows + length of output VectorOfVectors + ch_comp + array of rawids at which the expression is evaluated + var_ph + dictionary of evt and additional parameters and their values + defv + default value """ - # raise NotImplementedError # define dimension of output array out = np.full((nrows, len(chns)), np.nan) @@ -788,41 +1087,22 @@ def evaluate_to_vector( # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - if "tcm.array_id" == expr: - res = np.full(len(out), int(ch[2:]), dtype=int) - else: - # find fields in either dsp, hit - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph - - # evaluate expression - res = eval( - expr.replace("dsp.", "dsp_") - .replace("hit.", "hit_") - .replace("evt.", "evt_"), - var, - ) - - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(len(out), res, dtype=type(res)) - - # get sub evt based query condition if needed - if isinstance(qry, str): - qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) - qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) - limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) - - # or forward the array - elif isinstance(qry, np.ndarray): - limarr = qry + res = get_data_at_channel( + ch, + idx_ch, + expr, + exprl, + var_ph, + ch not in chns_rm, + f_hit, + f_dsp, + len(out), + defv, + ) - # if no condition, it must be true - else: - limarr = np.ones(len(res)).astype(bool) + # get mask from query + limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) - if limarr.dtype != bool: - limarr = limarr.astype(bool) # append to out according to mode == vov out[:, i][limarr] = res[limarr] out_chs[:, i][limarr] = int(ch[2:]) @@ -869,60 +1149,54 @@ def build_evt( f_evt name of the output file evt_config - name of JSON file or dict defining evt fields. Channel lists can be defined by the user or by using the keyword "meta" followed by the system (geds/spms) and the usability (on,no_psd,ac,off) separated by underscores (e.g. "meta_geds_on") in the "channels" dictionary. The "operations" dictionary defines the fields (name=key), where "channels" specifies the channels used to for this field (either a string or a list of strings), "mode" defines how the channels should be combined (see evaluate_expression). For first/last modes a "get_ch" flag can be defined, if true an additional field with the sufix "_id" is returned containing the rawid of the respective value in the field without the suffix. "expression" defnies the mathematical/special function to apply (see evaluate_expression), "parameters" defines any other parameter used in expression. For example: + name of JSON file or dict defining evt fields. Channel lists can be defined by importing a meta module. The "operations" dictionary defines the fields (name=key), where "channels" specifies the channels used to for this field (either a string or a list of strings), "aggregation_mode" defines how the channels should be combined (see evaluate_expression). "expression" defnies the mathematical/special function to apply (see evaluate_expression), + "query" defines an expression to mask the aggregation. + "parameters" defines any other parameter used in expression. For example: .. code-block::json { "channels": { - "geds_on": "meta_geds_on", - "geds_no_psd": "meta_geds_no_psd", - "geds_ac": "meta_geds_ac", - "spms_on": "meta_spms_on", - "pulser": "PULS01", - "baseline": "BSLN01", - "muon": "MUON01", - "ts_master":"S060" + "geds_on": ["ch1084803", "ch1084804", "ch1121600"], + "spms_on": ["ch1057600", "ch1059201", "ch1062405"], + "muon": "ch1027202", }, "operations": { "energy":{ - "channels": ["geds_on","geds_no_psd","geds_ac"], - "mode": "first>25", - "get_ch": true, - "expression": "cuspEmax_ctc_cal", - "initial": "np.nan" - }, - "energy_on":{ - "channels": ["geds_on"], - "mode": "vov>25", - "get_ch": true, - "expression": "cuspEmax_ctc_cal" + "channels": "geds_on", + "aggregation_mode": "vectorize", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "hit.cuspEmax_ctc_cal" }, - "aoe":{ - "channels": ["geds_on"], - "mode": "energy_id", - "expression": "AoE_Classifier", - "initial": "np.nan" + "energy_id":{ + "channels": "geds_on", + "aggregation_mode": "vectorize", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "tcm.array_id" }, - "is_muon_tagged":{ + "is_muon_rejected":{ "channels": "muon", - "mode": "any", - "expression": "wf_max>a", + "aggregation_mode": "any", + "expression": "dsp.wf_max>a", "parameters": {"a":15100}, "initial": false }, "multiplicity":{ "channels": ["geds_on","geds_no_psd","geds_ac"], - "mode": "tot", - "expression": "cuspEmax_ctc_cal > a", + "aggregation_mode": "sum", + "expression": "hit.cuspEmax_ctc_cal > a", "parameters": {"a":25}, "initial": 0 }, + "t0":{ + "aggregation_mode": "keep_at:evt.energy_id", + "expression": "dsp.tp_0_est" + }, "lar_energy":{ "channels": "spms_on", - "mode": "func", - "expression": "modules.spm.get_energy(0.5,t0,48000,1000,5000)" - } + "aggregation_mode": "function", + "expression": ".modules.spm.get_energy(0.5,evt.t0,48000,1000,5000)" + }, } } @@ -1035,6 +1309,16 @@ def build_evt( chns_e = list( itertools.chain.from_iterable([chns[e] for e in v["channels"]]) ) + chns_rm = [] + if "exclude_channels" in v.keys(): + if isinstance(v["exclude_channels"], str): + chns_rm = chns[v["exclude_channels"]] + elif isinstance(v["exclude_channels"], list): + chns_rm = list( + itertools.chain.from_iterable( + [chns[e] for e in v["exclude_channels"]] + ) + ) pars, qry, defaultv = None, None, np.nan if "parameters" in v.keys(): @@ -1050,10 +1334,10 @@ def build_evt( f_hit, f_dsp, chns_e, + chns_rm, v["aggregation_mode"], v["expression"], nrows, - group, pars, qry, defaultv, diff --git a/src/pygama/evt/modules/legend_meta.py b/src/pygama/evt/modules/legend_meta.py index d188c2a14..8e98f6385 100644 --- a/src/pygama/evt/modules/legend_meta.py +++ b/src/pygama/evt/modules/legend_meta.py @@ -16,12 +16,17 @@ def legend_meta(params: dict) -> list: for e in chmap.map("daq.rawid") if chmap.map("daq.rawid")[e]["system"] == params["system"] ] - if "usability" not in params.keys(): - return tmp - else: - return [ + if "usability" in params.keys(): + tmp = [ e for e in tmp if chmap.map("daq.rawid")[int(e[2:])]["analysis"]["usability"] == params["usability"] ] + if "geds" == params["system"] and "type" in params.keys(): + tmp = [ + e + for e in tmp + if chmap.map("daq.rawid")[int(e[2:])]["type"] == params["type"] + ] + return tmp diff --git a/tests/evt/configs/basic-evt-config.json b/tests/evt/configs/basic-evt-config.json index c573c89fb..1fd0527cb 100644 --- a/tests/evt/configs/basic-evt-config.json +++ b/tests/evt/configs/basic-evt-config.json @@ -12,14 +12,14 @@ }, "energy": { "channels": "geds_on", - "aggregation_mode": ["first", "dsp.tp_0_est"], + "aggregation_mode": "first_at:dsp.tp_0_est", "query": "hit.cuspEmax_ctc_cal>25", "expression": "hit.cuspEmax_ctc_cal", "initial": "np.nan" }, "energy_id": { "channels": "geds_on", - "aggregation_mode": ["first", "dsp.tp_0_est"], + "aggregation_mode": "first_at:dsp.tp_0_est", "query": "hit.cuspEmax_ctc_cal>25", "expression": "tcm.array_id", "initial": 0 @@ -38,7 +38,7 @@ }, "energy_aux": { "channels": "geds_on", - "aggregation_mode": ["last", "dsp.tp_0_est"], + "aggregation_mode": "last_at:dsp.tp_0_est", "query": "hit.cuspEmax_ctc_cal>25", "expression": "hit.cuspEmax_ctc_cal", "initial": "np.nan" @@ -52,19 +52,19 @@ }, "is_usable_aoe": { "channels": "geds_on", - "aggregation_mode": "evt.energy_id", + "aggregation_mode": "keep_at:evt.energy_id", "expression": "True", "initial": false }, "aoe": { "channels": "geds_on", - "aggregation_mode": "evt.energy_id", + "aggregation_mode": "keep_at:evt.energy_id", "expression": "hit.AoE_Classifier", "initial": "np.nan" }, "is_aoe_rejected": { "channels": "geds_on", - "aggregation_mode": "evt.energy_id", + "aggregation_mode": "keep_at:evt.energy_id", "expression": "~(hit.AoE_Double_Sided_Cut)", "initial": false } diff --git a/tests/evt/configs/module-test-evt-config.json b/tests/evt/configs/module-test-evt-config.json index d4d6c1148..07262cc3d 100644 --- a/tests/evt/configs/module-test-evt-config.json +++ b/tests/evt/configs/module-test-evt-config.json @@ -6,21 +6,21 @@ "operations": { "energy_first": { "channels": "geds_on", - "aggregation_mode": ["first", "dsp.tp_0_est"], + "aggregation_mode": "first_at:dsp.tp_0_est", "query": "hit.cuspEmax_ctc_cal>25", "expression": "hit.cuspEmax_ctc_cal", "initial": "np.nan" }, "energy_first_id": { "channels": "geds_on", - "aggregation_mode": ["first", "dsp.tp_0_est"], + "aggregation_mode": "first_at:dsp.tp_0_est", "query": "hit.cuspEmax_ctc_cal>25", "expression": "tcm.array_id", "initial": 0 }, "t0": { - "channels": ["geds_on"], - "aggregation_mode": "evt.energy_first_id", + "channels": "geds_on", + "aggregation_mode": "keep_at:evt.energy_first_id", "expression": "dsp.tp_0_est", "initial": 0.0 }, diff --git a/tests/evt/configs/module-test-t0-vov-evt-config.json b/tests/evt/configs/module-test-t0-vov-evt-config.json index 06918a421..61782b01e 100644 --- a/tests/evt/configs/module-test-t0-vov-evt-config.json +++ b/tests/evt/configs/module-test-t0-vov-evt-config.json @@ -6,19 +6,19 @@ "operations": { "energy": { "channels": "geds_on", - "aggregation_mode": "vov", + "aggregation_mode": "vectorize", "query": "hit.cuspEmax_ctc_cal>25", "expression": "hit.cuspEmax_ctc_cal" }, "energy_id": { "channels": "geds_on", - "aggregation_mode": "vov", + "aggregation_mode": "vectorize", "query": "hit.cuspEmax_ctc_cal>25", "expression": "tcm.array_id" }, "t0": { - "channels": ["geds_on"], - "aggregation_mode": "evt.energy_id", + "channels": "geds_on", + "aggregation_mode": "keep_at:evt.energy_id", "expression": "dsp.tp_0_est", "initial": 0.0 }, diff --git a/tests/evt/configs/query-test-evt-config.json b/tests/evt/configs/query-test-evt-config.json index 0bf7fe4f9..7998a496c 100644 --- a/tests/evt/configs/query-test-evt-config.json +++ b/tests/evt/configs/query-test-evt-config.json @@ -19,27 +19,27 @@ }, "test_first": { "channels": "geds_on", - "aggregation_mode": ["first", "dsp.tp_0_est"], + "aggregation_mode": "first_at:dsp.tp_0_est", "query": "evt.multiplicity == 1", "expression": "True", "initial": false }, "test_first2": { "channels": "geds_on", - "aggregation_mode": ["first", "dsp.tp_0_est"], + "aggregation_mode": "first_at:dsp.tp_0_est", "expression": "True", "initial": false }, "test_last": { "channels": "geds_on", - "aggregation_mode": ["last", "dsp.tp_0_est"], + "aggregation_mode": "last_at:dsp.tp_0_est", "query": "evt.multiplicity == 1", "expression": "True", "initial": false }, "test_last2": { "channels": "geds_on", - "aggregation_mode": ["last", "dsp.tp_0_est"], + "aggregation_mode": "last_at:dsp.tp_0_est", "expression": "True", "initial": false }, @@ -73,14 +73,14 @@ }, "test_vov": { "channels": "geds_on", - "aggregation_mode": "vov", + "aggregation_mode": "vectorize", "query": "evt.multiplicity == 1", "expression": "True", "initial": false }, "test_vov2": { "channels": "geds_on", - "aggregation_mode": "vov", + "aggregation_mode": "vectorize", "expression": "True", "initial": false } diff --git a/tests/evt/configs/vov-test-evt-config.json b/tests/evt/configs/vov-test-evt-config.json index 6f057d18c..f5b3679bb 100644 --- a/tests/evt/configs/vov-test-evt-config.json +++ b/tests/evt/configs/vov-test-evt-config.json @@ -5,18 +5,18 @@ "operations": { "energy": { "channels": "geds_on", - "aggregation_mode": "vov", + "aggregation_mode": "vectorize", "query": "hit.cuspEmax_ctc_cal>25", "expression": "hit.cuspEmax_ctc_cal" }, "energy_id": { "channels": "geds_on", - "aggregation_mode": "vov", + "aggregation_mode": "vectorize", "query": "hit.cuspEmax_ctc_cal>25", "expression": "tcm.array_id" }, "aoe": { - "aggregation_mode": "evt.energy_id", + "aggregation_mode": "keep_at:evt.energy_id", "expression": "hit.AoE_Classifier" }, "multiplicity": { From d069968897cf433d04cba5f24481a1c25c42f770 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Fri, 1 Dec 2023 23:40:37 +0100 Subject: [PATCH 109/191] add sorter to vector evaluation --- src/pygama/evt/build_evt.py | 129 ++++++++++++++++++++++++++++++++---- tests/evt/test_build_evt.py | 49 ++++++++++++++ 2 files changed, 164 insertions(+), 14 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 249d08c2f..5cb25c711 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -49,6 +49,7 @@ def evaluate_expression( para: dict = None, qry: str = None, defv=np.nan, + sorter: str = None, ) -> dict: """ Evaluates the expression defined by the user across all channels according to the mode @@ -86,6 +87,8 @@ def evaluate_expression( Dictionary of parameters defined in the "parameters" field in the configuration JSON file. defv default value of evaluation + sorter + can be used to sort vector outputs according to sorter expression (see :func:`evaluate_to_vector`) """ # find parameters in evt file or in parameters @@ -247,6 +250,7 @@ def evaluate_expression( nrows, var_ph, defv, + sorter, ) elif "any" == mode: return evaluate_to_any( @@ -410,7 +414,10 @@ def get_data_at_channel( elif "tcm.array_id" == expr: res = np.full(outsize, int(ch[2:]), dtype=int) else: - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) | var_ph + var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) + + if var_ph is not None: + var = var | var_ph # evaluate expression # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) @@ -1031,7 +1038,7 @@ def evaluate_at_channel_vov( return {"values": out, "channels": ch_comp} -def evaluate_to_vector( +def evaluate_to_aoesa( idx: np.ndarray, ids: np.ndarray, f_hit: str, @@ -1044,9 +1051,10 @@ def evaluate_to_vector( nrows: int, var_ph: dict = None, defv=np.nan, -) -> dict: + missv=np.nan, +) -> np.ndarray: """ - Aggregates by returning a VectorOfVector of evaluated expressions of channels that fulfill a query expression. + Aggregates by returning a ArrayOfEqualSizedArrays of evaluated expressions of channels that fulfill a query expression. Parameters ---------- @@ -1076,11 +1084,13 @@ def evaluate_to_vector( dictionary of evt and additional parameters and their values defv default value + missv + missing value + sorter + sorts the entries in the vector according to sorter expression """ - # define dimension of output array - out = np.full((nrows, len(chns)), np.nan) - out_chs = np.full((nrows, len(chns)), np.nan) + out = np.full((nrows, len(chns)), missv) i = 0 for ch in chns: @@ -1105,22 +1115,110 @@ def evaluate_to_vector( # append to out according to mode == vov out[:, i][limarr] = res[limarr] - out_chs[:, i][limarr] = int(ch[2:]) i += 1 + return out + + +def evaluate_to_vector( + idx: np.ndarray, + ids: np.ndarray, + f_hit: str, + f_dsp: str, + chns: list, + chns_rm: list, + expr: str, + exprl: list, + qry: str | np.ndarray, + nrows: int, + var_ph: dict = None, + defv=np.nan, + sorter: str = None, +) -> dict: + """ + Aggregates by returning a VectorOfVector of evaluated expressions of channels that fulfill a query expression. + + Parameters + ---------- + idx + tcm index array + ids + tcm id array + f_hit + Path to hit tier file + f_dsp + Path to dsp tier file + chns + list of channels to be aggregated + chns_rm + list of channels to be skipped from evaluation and set to default value + expr + expression string to be evaluated + exprl + list of dsp/hit/evt parameter tuples in expression (tier,field) + qry + query expression to mask aggregation + nrows + length of output VectorOfVectors + ch_comp + array of rawids at which the expression is evaluated + var_ph + dictionary of evt and additional parameters and their values + defv + default value + sorter + sorts the entries in the vector according to sorter expression. acend_by: results in an vector ordered ascending, decend_by: sorts descending + """ + out = evaluate_to_aoesa( + idx, + ids, + f_hit, + f_dsp, + chns, + chns_rm, + expr, + exprl, + qry, + nrows, + var_ph, + defv, + np.nan, + ) + + # if a sorter is given sort accordingly + if sorter is not None: + md, fld = sorter.split(":") + s_val = evaluate_to_aoesa( + idx, + ids, + f_hit, + f_dsp, + chns, + chns_rm, + fld, + [tuple(fld.split("."))], + None, + nrows, + ) + if "ascend_by" == md: + out[np.arange(len(out))[:, None], np.argsort(s_val)] + + elif "descend_by" == md: + out[np.arange(len(out))[:, None], np.argsort(-s_val)] + else: + raise ValueError( + "sorter values can only have 'ascend_by' or 'descend_by' prefixes" + ) + # This can be smarter # shorten to vov (FUTURE: replace with awkward) out = VectorOfVectors( flattened_data=out.flatten()[~np.isnan(out.flatten())], cumulative_length=np.cumsum(np.count_nonzero(~np.isnan(out), axis=1)), ) - out_chs = VectorOfVectors( - flattened_data=out_chs.flatten()[~np.isnan(out_chs.flatten())].astype(int), - cumulative_length=np.cumsum(np.count_nonzero(~np.isnan(out_chs), axis=1)), - ) - return {"values": out, "channels": out_chs} + return {"values": out} def build_evt( @@ -1320,13 +1418,15 @@ def build_evt( ) ) - pars, qry, defaultv = None, None, np.nan + pars, qry, defaultv, srter = None, None, np.nan, None if "parameters" in v.keys(): pars = v["parameters"] if "query" in v.keys(): qry = v["query"] if "initial" in v.keys() and not v["initial"] == "np.nan": defaultv = v["initial"] + if "sort" in v.keys(): + srter = v["sort"] result = evaluate_expression( f_tcm, @@ -1341,6 +1441,7 @@ def build_evt( pars, qry, defaultv, + srter, ) obj = result["values"] diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index e7cae4e4d..e9a0b4e01 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -193,6 +193,55 @@ def test_query(lgnd_test_data, tmptestdir): assert len(ls(outfile, "/evt/")) == 12 +def test_vector_sort(lgnd_test_data, tmptestdir): + outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" + tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" + if os.path.exists(outfile): + os.remove(outfile) + f_tcm = lgnd_test_data.get_path(tcm_path) + f_dsp = lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")) + f_hit = lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")) + + conf = { + "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}, + "operations": { + "acend_id": { + "channels": "geds_on", + "aggregation_mode": "vectorize", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "tcm.array_id", + "sort": "ascend_by:dsp.tp_0_est", + }, + "t0_acend": { + "aggregation_mode": "keep_at:evt.acend_id", + "expression": "dsp.tp_0_est", + }, + "decend_id": { + "channels": "geds_on", + "aggregation_mode": "vectorize", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "tcm.array_id", + "sort": "descend_by:dsp.tp_0_est", + }, + "t0_decend": { + "aggregation_mode": "keep_at:evt.acend_id", + "expression": "dsp.tp_0_est", + }, + }, + } + build_evt(f_tcm, f_dsp, f_hit, outfile, conf) + + assert os.path.exists(outfile) + assert len(ls(outfile, "/evt/")) == 4 + lstore = store.LH5Store() + vov_t0, _ = lstore.read_object("/evt/t0_acend", outfile) + nda_t0 = vov_t0.to_aoesa().nda + assert ((np.diff(nda_t0) >= 0) | (np.isnan(np.diff(nda_t0)))).all() + vov_t0, _ = lstore.read_object("/evt/t0_decend", outfile) + nda_t0 = vov_t0.to_aoesa().nda + assert ((np.diff(nda_t0) <= 0) | (np.isnan(np.diff(nda_t0)))).all() + + def test_skimming(lgnd_test_data, tmptestdir): outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" From 2247ccb878c4bbebeeaa9f63a4ba19b434ebfa83 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Fri, 1 Dec 2023 23:52:35 +0100 Subject: [PATCH 110/191] change aggregation mode vectorize to gather --- src/pygama/evt/build_evt.py | 19 +++++++++---------- .../module-test-t0-vov-evt-config.json | 4 ++-- tests/evt/configs/query-test-evt-config.json | 4 ++-- tests/evt/configs/vov-test-evt-config.json | 4 ++-- tests/evt/test_build_evt.py | 4 ++-- 5 files changed, 17 insertions(+), 18 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 5cb25c711..fb2a73ed7 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -76,7 +76,7 @@ def evaluate_expression( - "any": aggregates by logical or. - "all": aggregates by logical and. - "keep_at:ch_field": aggregates according to passed ch_field - - "vectorize": Channels are not combined, but result saved as VectorOfVectors. + - "gather": Channels are not combined, but result saved as VectorOfVectors. qry A query that can mask the aggregation. expr @@ -236,7 +236,7 @@ def evaluate_expression( var_ph, defv, ) - elif "vectorize" == mode: + elif "gather" == mode: return evaluate_to_vector( idx, ids, @@ -1260,18 +1260,17 @@ def build_evt( "muon": "ch1027202", }, "operations": { - "energy":{ - "channels": "geds_on", - "aggregation_mode": "vectorize", - "query": "hit.cuspEmax_ctc_cal>25", - "expression": "hit.cuspEmax_ctc_cal" - }, "energy_id":{ "channels": "geds_on", - "aggregation_mode": "vectorize", + "aggregation_mode": "gather", "query": "hit.cuspEmax_ctc_cal>25", - "expression": "tcm.array_id" + "expression": "tcm.array_id", + "sort": "ascend_by:dsp.tp_0_est" }, + "energy":{ + "aggregation_mode": "keep_at:evt.energy_id", + "expression": "hit.cuspEmax_ctc_cal>25" + } "is_muon_rejected":{ "channels": "muon", "aggregation_mode": "any", diff --git a/tests/evt/configs/module-test-t0-vov-evt-config.json b/tests/evt/configs/module-test-t0-vov-evt-config.json index 61782b01e..6479d4587 100644 --- a/tests/evt/configs/module-test-t0-vov-evt-config.json +++ b/tests/evt/configs/module-test-t0-vov-evt-config.json @@ -6,13 +6,13 @@ "operations": { "energy": { "channels": "geds_on", - "aggregation_mode": "vectorize", + "aggregation_mode": "gather", "query": "hit.cuspEmax_ctc_cal>25", "expression": "hit.cuspEmax_ctc_cal" }, "energy_id": { "channels": "geds_on", - "aggregation_mode": "vectorize", + "aggregation_mode": "gather", "query": "hit.cuspEmax_ctc_cal>25", "expression": "tcm.array_id" }, diff --git a/tests/evt/configs/query-test-evt-config.json b/tests/evt/configs/query-test-evt-config.json index 7998a496c..ff59e2a0b 100644 --- a/tests/evt/configs/query-test-evt-config.json +++ b/tests/evt/configs/query-test-evt-config.json @@ -73,14 +73,14 @@ }, "test_vov": { "channels": "geds_on", - "aggregation_mode": "vectorize", + "aggregation_mode": "gather", "query": "evt.multiplicity == 1", "expression": "True", "initial": false }, "test_vov2": { "channels": "geds_on", - "aggregation_mode": "vectorize", + "aggregation_mode": "gather", "expression": "True", "initial": false } diff --git a/tests/evt/configs/vov-test-evt-config.json b/tests/evt/configs/vov-test-evt-config.json index f5b3679bb..9b0b37078 100644 --- a/tests/evt/configs/vov-test-evt-config.json +++ b/tests/evt/configs/vov-test-evt-config.json @@ -5,13 +5,13 @@ "operations": { "energy": { "channels": "geds_on", - "aggregation_mode": "vectorize", + "aggregation_mode": "gather", "query": "hit.cuspEmax_ctc_cal>25", "expression": "hit.cuspEmax_ctc_cal" }, "energy_id": { "channels": "geds_on", - "aggregation_mode": "vectorize", + "aggregation_mode": "gather", "query": "hit.cuspEmax_ctc_cal>25", "expression": "tcm.array_id" }, diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index e9a0b4e01..43105a31d 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -207,7 +207,7 @@ def test_vector_sort(lgnd_test_data, tmptestdir): "operations": { "acend_id": { "channels": "geds_on", - "aggregation_mode": "vectorize", + "aggregation_mode": "gather", "query": "hit.cuspEmax_ctc_cal>25", "expression": "tcm.array_id", "sort": "ascend_by:dsp.tp_0_est", @@ -218,7 +218,7 @@ def test_vector_sort(lgnd_test_data, tmptestdir): }, "decend_id": { "channels": "geds_on", - "aggregation_mode": "vectorize", + "aggregation_mode": "gather", "query": "hit.cuspEmax_ctc_cal>25", "expression": "tcm.array_id", "sort": "descend_by:dsp.tp_0_est", From 8ac2ebdf9c1d8192329adde6997210e9f8736dea Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Sat, 2 Dec 2023 00:28:50 +0100 Subject: [PATCH 111/191] renaming of legend meta module --- src/pygama/evt/modules/{legend_meta.py => legend.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename src/pygama/evt/modules/{legend_meta.py => legend.py} (94%) diff --git a/src/pygama/evt/modules/legend_meta.py b/src/pygama/evt/modules/legend.py similarity index 94% rename from src/pygama/evt/modules/legend_meta.py rename to src/pygama/evt/modules/legend.py index 8e98f6385..0bfe59d63 100644 --- a/src/pygama/evt/modules/legend_meta.py +++ b/src/pygama/evt/modules/legend.py @@ -1,5 +1,5 @@ """ -Module for importing channel lists from LEGEND meta data +Module provides LEGEND internal functions """ from importlib import import_module From b6ff699a93b79229d0e7cd0915aac3b059a38ddf Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Sat, 2 Dec 2023 13:43:17 +0100 Subject: [PATCH 112/191] Add skm tier --- src/pygama/skm/__init__.py | 7 + src/pygama/skm/build_skm.py | 316 +++++++++++++++++++++ tests/evt/configs/vov-test-evt-config.json | 13 +- tests/evt/test_build_evt.py | 2 +- tests/skm/configs/basic-skm-config.json | 35 +++ tests/skm/test_build_skm.py | 50 ++++ 6 files changed, 421 insertions(+), 2 deletions(-) create mode 100644 src/pygama/skm/__init__.py create mode 100644 src/pygama/skm/build_skm.py create mode 100644 tests/skm/configs/basic-skm-config.json create mode 100644 tests/skm/test_build_skm.py diff --git a/src/pygama/skm/__init__.py b/src/pygama/skm/__init__.py new file mode 100644 index 000000000..7b9ae88d2 --- /dev/null +++ b/src/pygama/skm/__init__.py @@ -0,0 +1,7 @@ +""" +Utilities for grouping hit data into events. +""" + +from .build_skm import build_skm + +__all__ = ["build_skm"] diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py new file mode 100644 index 000000000..aace36501 --- /dev/null +++ b/src/pygama/skm/build_skm.py @@ -0,0 +1,316 @@ +""" +This module implements routines to build the evt tier. +""" + +from __future__ import annotations + +import json +import logging +import os + +import awkward as ak +import h5py +import lgdo.lh5_store as store +import numpy as np +import pandas as pd +from lgdo import Array, ArrayOfEqualSizedArrays, VectorOfVectors + +log = logging.getLogger(__name__) + + +def vov_to_ak(vov: VectorOfVectors) -> ak.Array: + """ + Temporary function to convert VectorOfVectors to awkward arrays. This function will be removed soon. + + Parameters + ---------- + vov + VectorOfVectors to be converted. + """ + flattened_data = vov.flattened_data + cumulative_length = vov.cumulative_length + if isinstance(flattened_data, Array): + flattened_data = flattened_data.nda + if isinstance(cumulative_length, Array): + cumulative_length = cumulative_length.nda + + offsets = np.empty(len(cumulative_length) + 1, dtype=cumulative_length.dtype) + offsets[1:] = cumulative_length + offsets[0] = 0 + + layout = ak.contents.ListOffsetArray( + offsets=ak.index.Index(offsets), content=ak.contents.NumpyArray(flattened_data) + ) + return ak.Array(layout) + + +def vov_to_aoesa( + vov: VectorOfVectors, missing_value=np.nan, length: int = None +) -> ArrayOfEqualSizedArrays: + """ + Temporary function to convert VectorOfVectors to ArrayOfEqualSizedArrays. This function will be removed soon. + + Parameters + ---------- + vov + VectorOfVectors to be converted. + missing_value + missing value to be inserted. Determines the datatype of the output ArrayOfEqualSizedArrays + length + length of each row in the ArrayOfEqualSizedArrays. If the row in VectorOfVectors is shorter than length, the row gets padded with missing_value. If the row in VectorOfVectors is longer than length, the row gets clipped. + """ + arr = vov_to_ak(vov) + if length is not None: + max_len = length + else: + max_len = int(ak.max(ak.count(arr, axis=-1))) + return ArrayOfEqualSizedArrays( + nda=ak.fill_none(ak.pad_none(arr, max_len, clip=True), missing_value) + .to_numpy(allow_missing=False) + .astype(type(missing_value)), + attrs=vov.getattrs(), + ) + + +def build_skm( + f_evt: str | list, + f_skm: str, + skm_conf: dict | str, + wo_mode="w", + group: str = "/evt/", + skim_format: str = "parquet", +): + """ + Builds a skimmed file from a (set) of evt tier file(s). + + Parameters + ---------- + f_evt + list/path of evt file(s) + f_skm + name of the skm output file + skm_conf + name of JSON file or dict defining skm fields. multiplicity defines upto which row length VectorOfVector fields should be kept. Skimmed fields are forwarded from the evt tier and clipped/padded according to missing_value if needed. Global fields define an operation to reduce the dimension of VectorOfVector event fields. + For example: + + .. code-block::json + + { + "multiplicity": 2, + "index_field": "timestamp", + "skimmed_fields": { + "timestamp":{ + "evt_field": "timestamp" + }, + "is_muon_rejected":{ + "evt_field": "is_muon_rejected" + }, + "multiplicity":{ + "evt_field": "multiplicity" + }, + "energy":{ + "evt_field": "energy", + "missing_value": "np.nan" + }, + "energy_id":{ + "evt_field": "energy_id", + "missing_value": 0 + }, + "global_fields":{ + "energy_sum":{ + "aggregation_mode": "sum", + "evt_field": "energy" + }, + "is_all_physical":{ + "aggregation_mode": "all", + "evt_field": "is_physical" + }, + } + } + } + + wo_mode + writing mode. + - ``write_safe`` or ``w``: only proceed with writing if the file does not already exis. + - ``append`` or ``a``: append to file. + - ``overwrite`` or ``o``: replaces existing file. + group + lh5 root group name of the evt tier + skim_format + data format of the skimmed output (hdf or parquet) + """ + + log = logging.getLogger(__name__) + log.info("Starting skimming") + log.debug(f"I am skimning {len(f_evt) if isinstance(f_evt,list) else 1} files") + tbl_cfg = skm_conf + if not isinstance(tbl_cfg, (str, dict)): + raise TypeError() + if isinstance(tbl_cfg, str): + with open(tbl_cfg) as f: + tbl_cfg = json.load(f) + + flds, flds_vov, flds_arr, multi = None, None, None, None + if "skimmed_fields" in tbl_cfg.keys(): + flds = tbl_cfg["skimmed_fields"].keys() + evt_flds = [(e, tbl_cfg["skimmed_fields"][e]["evt_field"]) for e in flds] + f = h5py.File(f_evt[0] if isinstance(f_evt, list) else f_evt, "r") + flds_vov = [ + x + for x in evt_flds + if x[1] + in [ + e.split("/")[-1] + for e in store.ls(f_evt[0] if isinstance(f_evt, list) else f_evt, group) + if "array<1>{array<1>{" in f[e].attrs.get("datatype") + ] + ] + flds_arr = [ + x + for x in evt_flds + if x not in flds_vov + and x[1] + in [ + e.split("/")[-1] + for e in store.ls(f_evt[0] if isinstance(f_evt, list) else f_evt, group) + ] + ] + + gflds = None + if "global_fields" in tbl_cfg.keys(): + gflds = list(tbl_cfg["global_fields"].keys()) + + if flds is None and gflds is None: + return + + # Check if multiplicity is given, if vector like fields are skimmed + if ( + isinstance(flds_vov, list) + and len(flds_vov) > 0 + and "multiplicity" not in tbl_cfg.keys() + ): + raise ValueError("If skiime fields are passed, multiplicity must be given") + + elif "multiplicity" in tbl_cfg.keys(): + multi = tbl_cfg["multiplicity"] + + # init pandas df + df = pd.DataFrame() + + # add array like fields + if isinstance(flds_arr, list): + log.debug("Crunching array-like fields") + df = df.join( + store.load_dfs(f_evt, [x[1] for x in flds_arr], group).rename( + columns={y: x for x, y in flds_arr} + ), + how="outer", + ) + + # take care of vector like fields + if isinstance(flds_vov, list): + log.debug("Processing VoV-like fields") + lstore = store.LH5Store() + for fld in flds_vov: + if "missing_value" not in tbl_cfg["skimmed_fields"][fld[0]].keys(): + raise ValueError( + f"({fld[0]}) is a VectorOfVector field and no missing_value is specified" + ) + vls, _ = lstore.read_object(group + fld[1], f_evt) + mv = tbl_cfg["skimmed_fields"][fld[0]]["missing_value"] + if mv in ["np.inf", "-np.inf", "np.nan"]: + mv = eval(mv) + out = vov_to_aoesa(vls, missing_value=mv, length=multi).nda + nms = [fld[0] + f"_{e}" for e in range(multi)] + df = df.join(pd.DataFrame(data=out, columns=nms), how="outer") + + # ok now build global fields if requested + if isinstance(gflds, list): + log.debug("Defining global fields") + for k in gflds: + if "aggregation_mode" not in tbl_cfg["global_fields"][k].keys(): + raise ValueError(f"global {k} operation needs aggregation mode") + if "evt_field" not in tbl_cfg["global_fields"][k].keys(): + raise ValueError(f"global {k} operation needs evt_field") + mode = tbl_cfg["global_fields"][k]["aggregation_mode"] + fld = tbl_cfg["global_fields"][k]["evt_field"] + + obj, _ = lstore.read_object(group + fld, f_evt) + if not isinstance(obj, VectorOfVectors): + raise ValueError( + f"global {k} operation not possible, since {fld} is not an VectorOfVectors" + ) + + obj_ak = vov_to_ak(obj) + if mode in [ + "sum", + "prod", + "nansum", + "nanprod", + "any", + "all", + "mean", + "std", + "var", + ]: + df = df.join( + pd.DataFrame( + data=getattr(ak, mode)(obj_ak, axis=-1).to_numpy( + allow_missing=False + ), + columns=[k], + ) + ) + + elif mode in ["min", "max"]: + val = getattr(ak, mode)(obj_ak, axis=-1, mask_identity=True) + if "missing_value" not in tbl_cfg["global_fields"][k].keys(): + raise ValueError( + f"global {k} {mode} operation needs a missing value assigned" + ) + mv = tbl_cfg["global_fields"][k]["missing_value"] + if mv in ["np.inf", "-np.inf"]: + mv = eval(mv) + val = ak.fill_none(val, mv) + df = df.join( + pd.DataFrame(data=val.to_numpy(allow_missing=False), columns=[k]) + ) + else: + raise ValueError("aggregation mode not supported") + + # Set an index column if specified + if "index_field" in tbl_cfg.keys(): + log.debug("Setting index") + if tbl_cfg["index_field"] in df.keys(): + df = df.set_index(tbl_cfg["index_field"]) + else: + raise ValueError( + "index field not found. Needs to be a previously defined skm field" + ) + + # last thing missing is writing it out + log.debug("saving skm file") + if skim_format not in ["parquet", "hdf"]: + raise ValueError("Not supported skim data format. Operations are hdf, parquet") + if wo_mode in ["w", "write_safe"]: + if os.path.exists(f_skm): + raise FileExistsError(f"Write_safe mode: {f_skm} exists.") + else: + if "hdf" == skim_format: + df.to_hdf(f_skm, key="df", mode="w") + elif "parquet" == skim_format: + df.to_parquet(f_skm) + elif wo_mode in ["o", "overwrite"]: + if "hdf" == skim_format: + df.to_hdf(f_skm, key="df", mode="w") + elif "parquet" == skim_format: + df.to_parquet(f_skm) + elif wo_mode in ["a", "append"]: + if "hdf" == skim_format: + df.to_hdf(f_skm, key="df", mode="a") + elif "parquet" == skim_format: + df.to_parquet(f_skm, append=True) + else: + raise ValueError(f"wo_mode {wo_mode} not valid.") + + log.info("done") diff --git a/tests/evt/configs/vov-test-evt-config.json b/tests/evt/configs/vov-test-evt-config.json index 9b0b37078..cc0d129ce 100644 --- a/tests/evt/configs/vov-test-evt-config.json +++ b/tests/evt/configs/vov-test-evt-config.json @@ -1,8 +1,15 @@ { "channels": { - "geds_on": ["ch1084803", "ch1084804", "ch1121600"] + "geds_on": ["ch1084803", "ch1084804", "ch1121600"], + "ts_master": "ch1084803" }, "operations": { + "timestamp": { + "channels": "ts_master", + "aggregation_mode": "sum", + "expression": "dsp.timestamp", + "initial": 0.0 + }, "energy": { "channels": "geds_on", "aggregation_mode": "gather", @@ -26,6 +33,10 @@ "parameters": { "a": 25 }, "initial": 0 }, + "is_saturated": { + "aggregation_mode": "keep_at:evt.energy_id", + "expression": "hit.is_saturated" + }, "energy_times_aoe": { "expression": "evt.energy*evt.aoe" }, diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 43105a31d..2cac630d8 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -115,7 +115,7 @@ def test_vov(lgnd_test_data, tmptestdir): ) assert os.path.exists(outfile) - assert len(ls(outfile, "/evt/")) == 7 + assert len(ls(outfile, "/evt/")) == 9 lstore = store.LH5Store() vov_ene, _ = lstore.read_object("/evt/energy", outfile) vov_aoe, _ = lstore.read_object("/evt/aoe", outfile) diff --git a/tests/skm/configs/basic-skm-config.json b/tests/skm/configs/basic-skm-config.json new file mode 100644 index 000000000..b1844ecb0 --- /dev/null +++ b/tests/skm/configs/basic-skm-config.json @@ -0,0 +1,35 @@ +{ + "multiplicity": 3, + "index_field": "timestamp", + "skimmed_fields": { + "timestamp": { + "evt_field": "timestamp" + }, + "multiplicity": { + "evt_field": "multiplicity" + }, + "energy": { + "evt_field": "energy", + "missing_value": "np.nan" + }, + "energy_id": { + "evt_field": "energy_id", + "missing_value": 0 + } + }, + "global_fields": { + "energy_sum": { + "aggregation_mode": "nansum", + "evt_field": "energy" + }, + "is_any_saturated": { + "aggregation_mode": "any", + "evt_field": "is_saturated" + }, + "max_energy": { + "aggregation_mode": "max", + "evt_field": "energy", + "missing_value": "np.inf" + } + } +} diff --git a/tests/skm/test_build_skm.py b/tests/skm/test_build_skm.py new file mode 100644 index 000000000..984be7936 --- /dev/null +++ b/tests/skm/test_build_skm.py @@ -0,0 +1,50 @@ +import os +from pathlib import Path + +import numpy as np +import pandas as pd + +from pygama.evt import build_evt +from pygama.skm import build_skm + +config_dir = Path(__file__).parent / "configs" +evt_config_dir = Path(__file__).parent.parent / "evt" / "configs" + + +def test_basics(lgnd_test_data, tmptestdir): + outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" + tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" + if os.path.exists(outfile): + os.remove(outfile) + build_evt( + f_tcm=lgnd_test_data.get_path(tcm_path), + f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), + f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), + f_evt=outfile, + evt_config=f"{evt_config_dir}/vov-test-evt-config.json", + wo_mode="o", + group="/evt/", + tcm_group="hardware_tcm_1", + ) + + skm_conf = f"{config_dir}/basic-skm-config.json" + skm_out = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.parquet" + build_skm(outfile, skm_out, skm_conf, wo_mode="o") + + assert os.path.exists(skm_out) + df = pd.read_parquet(skm_out) + assert df.index.name == "timestamp" + assert "energy_0" in df.keys() + assert "energy_1" in df.keys() + assert "energy_2" in df.keys() + assert "multiplicity" in df.keys() + assert (df.multiplicity.to_numpy() <= 3).all() + assert ( + np.nan_to_num(df.energy_0.to_numpy()) + + np.nan_to_num(df.energy_1.to_numpy()) + + np.nan_to_num(df.energy_2.to_numpy()) + == df.energy_sum.to_numpy() + ).all() + assert (np.nan_to_num(df.energy_0.to_numpy()) <= df.max_energy.to_numpy()).all() + assert (np.nan_to_num(df.energy_1.to_numpy()) <= df.max_energy.to_numpy()).all() + assert (np.nan_to_num(df.energy_2.to_numpy()) <= df.max_energy.to_numpy()).all() From 985b35b12ccaf30950270bc91113510e951c3702 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 6 Dec 2023 13:28:51 +0100 Subject: [PATCH 113/191] generalized legend meta module --- src/pygama/evt/modules/legend.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/pygama/evt/modules/legend.py b/src/pygama/evt/modules/legend.py index 0bfe59d63..f12b81e6e 100644 --- a/src/pygama/evt/modules/legend.py +++ b/src/pygama/evt/modules/legend.py @@ -11,22 +11,23 @@ def legend_meta(params: dict) -> list: lm = import_module("legendmeta") lmeta = lm.LegendMetadata(path=params["meta_path"]) chmap = lmeta.channelmap(params["time_key"]) + tmp = [ f"ch{e}" for e in chmap.map("daq.rawid") if chmap.map("daq.rawid")[e]["system"] == params["system"] ] - if "usability" in params.keys(): - tmp = [ - e - for e in tmp - if chmap.map("daq.rawid")[int(e[2:])]["analysis"]["usability"] - == params["usability"] - ] - if "geds" == params["system"] and "type" in params.keys(): - tmp = [ - e - for e in tmp - if chmap.map("daq.rawid")[int(e[2:])]["type"] == params["type"] - ] + + if "selectors" in params.keys(): + for k in params["selectors"].keys(): + s = "" + for e in k.split("."): + s += f"['{e}']" + + tmp = [ + e + for e in tmp + if eval("dotter" + s, {"dotter": chmap.map("daq.rawid")[int(e[2:])]}) + == params["selectors"][k] + ] return tmp From ceac4db59c714cb516db5a09323347ae600e3d51 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 6 Dec 2023 15:26:54 +0100 Subject: [PATCH 114/191] small changes --- src/pygama/evt/build_evt.py | 19 +++++++++---------- tests/evt/configs/basic-evt-config.json | 3 --- tests/evt/configs/module-test-evt-config.json | 1 - .../module-test-t0-vov-evt-config.json | 1 - 4 files changed, 9 insertions(+), 15 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index fb2a73ed7..e43a713b8 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -152,7 +152,6 @@ def evaluate_expression( ids, f_hit, f_dsp, - chns, chns_rm, expr, exprl, @@ -902,7 +901,6 @@ def evaluate_at_channel( ids: np.ndarray, f_hit: str, f_dsp: str, - chns: list, chns_rm: list, expr: str, exprl: list, @@ -923,8 +921,6 @@ def evaluate_at_channel( Path to hit tier file f_dsp Path to dsp tier file - chns - list of channels to be aggregated chns_rm list of channels to be skipped from evaluation and set to default value expr @@ -939,26 +935,29 @@ def evaluate_at_channel( default value """ - out = np.full(len(ch_comp), defv, dtype=type(defv)) + out = np.full(len(ch_comp.nda), defv, dtype=type(defv)) - for ch in chns: + for ch in np.unique(ch_comp.nda.astype(int)): + # skip default value + if f"ch{ch}" not in store.ls(f_hit): + continue # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] + idx_ch = idx[ids == ch] res = get_data_at_channel( - ch, + f"ch{ch}", idx_ch, expr, exprl, var_ph, - ch not in chns_rm, + f"ch{ch}" not in chns_rm, f_hit, f_dsp, len(out), defv, ) - out[idx_ch] = np.where(int(ch[2:]) == ch_comp.nda, res, out[idx_ch]) + out = np.where(ch == ch_comp.nda, res, out) return {"values": out} diff --git a/tests/evt/configs/basic-evt-config.json b/tests/evt/configs/basic-evt-config.json index 1fd0527cb..c68ea7cd6 100644 --- a/tests/evt/configs/basic-evt-config.json +++ b/tests/evt/configs/basic-evt-config.json @@ -51,19 +51,16 @@ "initial": 0.0 }, "is_usable_aoe": { - "channels": "geds_on", "aggregation_mode": "keep_at:evt.energy_id", "expression": "True", "initial": false }, "aoe": { - "channels": "geds_on", "aggregation_mode": "keep_at:evt.energy_id", "expression": "hit.AoE_Classifier", "initial": "np.nan" }, "is_aoe_rejected": { - "channels": "geds_on", "aggregation_mode": "keep_at:evt.energy_id", "expression": "~(hit.AoE_Double_Sided_Cut)", "initial": false diff --git a/tests/evt/configs/module-test-evt-config.json b/tests/evt/configs/module-test-evt-config.json index 07262cc3d..6117ddd8d 100644 --- a/tests/evt/configs/module-test-evt-config.json +++ b/tests/evt/configs/module-test-evt-config.json @@ -19,7 +19,6 @@ "initial": 0 }, "t0": { - "channels": "geds_on", "aggregation_mode": "keep_at:evt.energy_first_id", "expression": "dsp.tp_0_est", "initial": 0.0 diff --git a/tests/evt/configs/module-test-t0-vov-evt-config.json b/tests/evt/configs/module-test-t0-vov-evt-config.json index 6479d4587..0b64d0e43 100644 --- a/tests/evt/configs/module-test-t0-vov-evt-config.json +++ b/tests/evt/configs/module-test-t0-vov-evt-config.json @@ -17,7 +17,6 @@ "expression": "tcm.array_id" }, "t0": { - "channels": "geds_on", "aggregation_mode": "keep_at:evt.energy_id", "expression": "dsp.tp_0_est", "initial": 0.0 From b8e87171548ecc5b444b60b82d8edbcfde19acfc Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Fri, 29 Dec 2023 14:35:50 +0100 Subject: [PATCH 115/191] add outputs field in configuration --- src/pygama/evt/build_evt.py | 30 ++++++++++++++++--- tests/evt/configs/basic-evt-config.json | 12 ++++++++ tests/evt/configs/module-test-evt-config.json | 12 ++++++++ .../module-test-t0-vov-evt-config.json | 12 ++++++++ tests/evt/configs/query-test-evt-config.json | 14 +++++++++ tests/evt/configs/vov-test-evt-config.json | 11 +++++++ tests/evt/test_build_evt.py | 2 ++ 7 files changed, 89 insertions(+), 4 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index e43a713b8..47e1f0b16 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -8,6 +8,7 @@ import json import logging import os +import random import re from importlib import import_module @@ -1359,6 +1360,10 @@ def build_evt( log.info( f"Applying {len(tbl_cfg['operations'].keys())} operations to key {f_tcm.split('-')[-2]}" ) + + # Define temporary file + f_evt_tmp = f"{os.path.dirname(f_evt)}/{os.path.basename(f_evt).split('.')[0]}_tmp{random.randrange(9999):04d}.lh5" + for k, v in tbl_cfg["operations"].items(): log.debug("Processing field" + k) @@ -1366,8 +1371,8 @@ def build_evt( if "aggregation_mode" not in v.keys(): exprl = re.findall(r"(evt).([a-zA-Z_$][\w$]*)", v["expression"]) var = {} - if os.path.exists(f_evt): - var = load_vars_to_nda(f_evt, "", exprl) + if os.path.exists(f_evt_tmp): + var = load_vars_to_nda(f_evt_tmp, "", exprl) if "parameters" in v.keys(): var = var | v["parameters"] @@ -1391,7 +1396,7 @@ def build_evt( lstore.write_object( obj=res, name=group + k, - lh5_file=f_evt, + lh5_file=f_evt_tmp, wo_mode=wo_mode, ) @@ -1428,7 +1433,7 @@ def build_evt( result = evaluate_expression( f_tcm, - f_evt, + f_evt_tmp, f_hit, f_dsp, chns_e, @@ -1448,9 +1453,26 @@ def build_evt( lstore.write_object( obj=obj, name=group + k, + lh5_file=f_evt_tmp, + wo_mode=wo_mode, + ) + + # write output fields into f_evt and delete temporary file + if "outputs" in tbl_cfg.keys(): + if len(tbl_cfg["outputs"]) < 1: + log.warning("No output fields specified, no file will be written.") + for fld in tbl_cfg["outputs"]: + obj, _ = lstore.read_object(group + fld, f_evt_tmp) + lstore.write_object( + obj=obj, + name=group + fld, lh5_file=f_evt, wo_mode=wo_mode, ) + else: + log.warning("No output fields specified, no file will be written.") + + os.remove(f_evt_tmp) log.info("Done") diff --git a/tests/evt/configs/basic-evt-config.json b/tests/evt/configs/basic-evt-config.json index c68ea7cd6..8eb23adf2 100644 --- a/tests/evt/configs/basic-evt-config.json +++ b/tests/evt/configs/basic-evt-config.json @@ -2,6 +2,18 @@ "channels": { "geds_on": ["ch1084803", "ch1084804", "ch1121600"] }, + "outputs": [ + "multiplicity", + "energy", + "energy_id", + "energy_any_above1MeV", + "energy_all_above1MeV", + "energy_aux", + "energy_sum", + "is_usable_aoe", + "aoe", + "is_aoe_rejected" + ], "operations": { "multiplicity": { "channels": "geds_on", diff --git a/tests/evt/configs/module-test-evt-config.json b/tests/evt/configs/module-test-evt-config.json index 6117ddd8d..595999d60 100644 --- a/tests/evt/configs/module-test-evt-config.json +++ b/tests/evt/configs/module-test-evt-config.json @@ -3,6 +3,18 @@ "spms_on": ["ch1057600", "ch1059201", "ch1062405"], "geds_on": ["ch1084803", "ch1084804", "ch1121600"] }, + "outputs": [ + "energy_first", + "energy_first_id", + "t0", + "lar_energy", + "lar_multiplicity", + "is_lar_rejected", + "lar_classifier", + "lar_energy_dplms", + "lar_multiplicity_dplms", + "lar_time_shift" + ], "operations": { "energy_first": { "channels": "geds_on", diff --git a/tests/evt/configs/module-test-t0-vov-evt-config.json b/tests/evt/configs/module-test-t0-vov-evt-config.json index 0b64d0e43..f1bf09a8e 100644 --- a/tests/evt/configs/module-test-t0-vov-evt-config.json +++ b/tests/evt/configs/module-test-t0-vov-evt-config.json @@ -3,6 +3,18 @@ "spms_on": ["ch1057600", "ch1059201", "ch1062405"], "geds_on": ["ch1084803", "ch1084804", "ch1121600"] }, + "outputs": [ + "energy", + "energy_id", + "t0", + "lar_energy", + "lar_multiplicity", + "is_lar_rejected", + "lar_classifier", + "lar_energy_dplms", + "lar_multiplicity_dplms", + "lar_time_shift" + ], "operations": { "energy": { "channels": "geds_on", diff --git a/tests/evt/configs/query-test-evt-config.json b/tests/evt/configs/query-test-evt-config.json index ff59e2a0b..901d2d6c1 100644 --- a/tests/evt/configs/query-test-evt-config.json +++ b/tests/evt/configs/query-test-evt-config.json @@ -2,6 +2,20 @@ "channels": { "geds_on": ["ch1084803", "ch1084804", "ch1121600"] }, + "outputs": [ + "multiplicity", + "test_sum", + "test_first", + "test_first2", + "test_last", + "test_last2", + "test_any", + "test_any2", + "test_all", + "test_all2", + "test_vov", + "test_vov2" + ], "operations": { "multiplicity": { "channels": "geds_on", diff --git a/tests/evt/configs/vov-test-evt-config.json b/tests/evt/configs/vov-test-evt-config.json index cc0d129ce..ffdce3b31 100644 --- a/tests/evt/configs/vov-test-evt-config.json +++ b/tests/evt/configs/vov-test-evt-config.json @@ -3,6 +3,17 @@ "geds_on": ["ch1084803", "ch1084804", "ch1121600"], "ts_master": "ch1084803" }, + "outputs": [ + "timestamp", + "energy", + "energy_id", + "aoe", + "multiplicity", + "is_saturated", + "energy_times_aoe", + "energy_times_multiplicity", + "multiplicity_squared" + ], "operations": { "timestamp": { "channels": "ts_master", diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 2cac630d8..838f76b38 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -161,6 +161,7 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): conf = { "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}, + "outputs": ["foo"], "operations": { "foo": { "channels": "geds_on", @@ -204,6 +205,7 @@ def test_vector_sort(lgnd_test_data, tmptestdir): conf = { "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}, + "outputs": ["acend_id", "t0_acend", "decend_id", "t0_decend"], "operations": { "acend_id": { "channels": "geds_on", From c70ec82e05d0788faba08c477b47ec9aae916292 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 2 Jan 2024 18:34:09 +0100 Subject: [PATCH 116/191] LH5Store function renaming fixes --- src/pygama/evt/build_evt.py | 20 +++++------ src/pygama/skm/build_skm.py | 67 ++++--------------------------------- tests/evt/test_build_evt.py | 22 ++++++------ tests/skm/test_build_skm.py | 4 +-- 4 files changed, 29 insertions(+), 84 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 47e1f0b16..49c621cd0 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -146,7 +146,7 @@ def evaluate_expression( in [e.split("/")[-1] for e in store.ls(f_evt, "/evt/")] ): lstore = store.LH5Store() - ch_comp, _ = lstore.read_object(mode[8:].replace(".", "/"), f_evt) + ch_comp, _ = lstore.read(mode[8:].replace(".", "/"), f_evt) if isinstance(ch_comp, Array): return evaluate_at_channel( idx, @@ -336,7 +336,7 @@ def load_vars_to_nda(f: str, group: str, exprl: list, idx: np.ndarray = None) -> lstore = store.LH5Store() var = { - f"{e[0]}_{e[1]}": lstore.read_object( + f"{e[0]}_{e[1]}": lstore.read( f"{group.replace('/','')}/{e[0]}/{e[1]}", f, idx=idx, @@ -1393,7 +1393,7 @@ def build_evt( f"Currently only 2d formats are supported, the evaluated array has the dimension {res.shape}" ) - lstore.write_object( + lstore.write( obj=res, name=group + k, lh5_file=f_evt_tmp, @@ -1450,7 +1450,7 @@ def build_evt( obj = result["values"] if isinstance(obj, np.ndarray): obj = Array(result["values"]) - lstore.write_object( + lstore.write( obj=obj, name=group + k, lh5_file=f_evt_tmp, @@ -1462,8 +1462,8 @@ def build_evt( if len(tbl_cfg["outputs"]) < 1: log.warning("No output fields specified, no file will be written.") for fld in tbl_cfg["outputs"]: - obj, _ = lstore.read_object(group + fld, f_evt_tmp) - lstore.write_object( + obj, _ = lstore.read(group + fld, f_evt_tmp) + lstore.write( obj=obj, name=group + fld, lh5_file=f_evt, @@ -1493,7 +1493,7 @@ def skim_evt( f_evt input LH5 file of the evt level expression - skimming expression. Can contain variabels from event file or from the params dictionary. + skimming expression. Can contain variables from event file or from the params dictionary. f_out output LH5 file. Can be None if wo_mode is set to overwrite f_evt. wo_mode @@ -1519,7 +1519,7 @@ def skim_evt( for e in store.ls(f_evt, evt_group) if e.split("/")[-1] in exprl ] - var = {e: lstore.read_object(evt_group + e, f_evt)[0] for e in flds} + var = {e: lstore.read(evt_group + e, f_evt)[0] for e in flds} # to make any operations to VoVs we have to blow it up to a table (future change to more intelligant way) arr_keys = [] @@ -1554,8 +1554,8 @@ def skim_evt( of_tmp = of.replace(of.split("/")[-1], ".tmp_" + of.split("/")[-1]) for fld in fields: - ob, _ = lstore.read_object(fld, f_evt, idx=idx_list) - lstore.write_object( + ob, _ = lstore.read(fld, f_evt, idx=idx_list) + lstore.write( obj=ob, name=fld, lh5_file=of_tmp, diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index aace36501..51f025638 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -11,67 +11,12 @@ import awkward as ak import h5py import lgdo.lh5_store as store -import numpy as np import pandas as pd -from lgdo import Array, ArrayOfEqualSizedArrays, VectorOfVectors +from lgdo import VectorOfVectors log = logging.getLogger(__name__) -def vov_to_ak(vov: VectorOfVectors) -> ak.Array: - """ - Temporary function to convert VectorOfVectors to awkward arrays. This function will be removed soon. - - Parameters - ---------- - vov - VectorOfVectors to be converted. - """ - flattened_data = vov.flattened_data - cumulative_length = vov.cumulative_length - if isinstance(flattened_data, Array): - flattened_data = flattened_data.nda - if isinstance(cumulative_length, Array): - cumulative_length = cumulative_length.nda - - offsets = np.empty(len(cumulative_length) + 1, dtype=cumulative_length.dtype) - offsets[1:] = cumulative_length - offsets[0] = 0 - - layout = ak.contents.ListOffsetArray( - offsets=ak.index.Index(offsets), content=ak.contents.NumpyArray(flattened_data) - ) - return ak.Array(layout) - - -def vov_to_aoesa( - vov: VectorOfVectors, missing_value=np.nan, length: int = None -) -> ArrayOfEqualSizedArrays: - """ - Temporary function to convert VectorOfVectors to ArrayOfEqualSizedArrays. This function will be removed soon. - - Parameters - ---------- - vov - VectorOfVectors to be converted. - missing_value - missing value to be inserted. Determines the datatype of the output ArrayOfEqualSizedArrays - length - length of each row in the ArrayOfEqualSizedArrays. If the row in VectorOfVectors is shorter than length, the row gets padded with missing_value. If the row in VectorOfVectors is longer than length, the row gets clipped. - """ - arr = vov_to_ak(vov) - if length is not None: - max_len = length - else: - max_len = int(ak.max(ak.count(arr, axis=-1))) - return ArrayOfEqualSizedArrays( - nda=ak.fill_none(ak.pad_none(arr, max_len, clip=True), missing_value) - .to_numpy(allow_missing=False) - .astype(type(missing_value)), - attrs=vov.getattrs(), - ) - - def build_skm( f_evt: str | list, f_skm: str, @@ -131,7 +76,7 @@ def build_skm( wo_mode writing mode. - - ``write_safe`` or ``w``: only proceed with writing if the file does not already exis. + - ``write_safe`` or ``w``: only proceed with writing if the file does not already exists. - ``append`` or ``a``: append to file. - ``overwrite`` or ``o``: replaces existing file. group @@ -216,11 +161,11 @@ def build_skm( raise ValueError( f"({fld[0]}) is a VectorOfVector field and no missing_value is specified" ) - vls, _ = lstore.read_object(group + fld[1], f_evt) + vls, _ = lstore.read(group + fld[1], f_evt) mv = tbl_cfg["skimmed_fields"][fld[0]]["missing_value"] if mv in ["np.inf", "-np.inf", "np.nan"]: mv = eval(mv) - out = vov_to_aoesa(vls, missing_value=mv, length=multi).nda + out = vls.vov_to_aoesa(max_len=multi, fill_val=mv).nda nms = [fld[0] + f"_{e}" for e in range(multi)] df = df.join(pd.DataFrame(data=out, columns=nms), how="outer") @@ -235,13 +180,13 @@ def build_skm( mode = tbl_cfg["global_fields"][k]["aggregation_mode"] fld = tbl_cfg["global_fields"][k]["evt_field"] - obj, _ = lstore.read_object(group + fld, f_evt) + obj, _ = lstore.read(group + fld, f_evt) if not isinstance(obj, VectorOfVectors): raise ValueError( f"global {k} operation not possible, since {fld} is not an VectorOfVectors" ) - obj_ak = vov_to_ak(obj) + obj_ak = obj.view_as("ak") if mode in [ "sum", "prod", diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 838f76b38..f771579c5 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -117,12 +117,12 @@ def test_vov(lgnd_test_data, tmptestdir): assert os.path.exists(outfile) assert len(ls(outfile, "/evt/")) == 9 lstore = store.LH5Store() - vov_ene, _ = lstore.read_object("/evt/energy", outfile) - vov_aoe, _ = lstore.read_object("/evt/aoe", outfile) - arr_ac, _ = lstore.read_object("/evt/multiplicity", outfile) - vov_aoeene, _ = lstore.read_object("/evt/energy_times_aoe", outfile) - vov_eneac, _ = lstore.read_object("/evt/energy_times_multiplicity", outfile) - arr_ac2, _ = lstore.read_object("/evt/multiplicity_squared", outfile) + vov_ene, _ = lstore.read("/evt/energy", outfile) + vov_aoe, _ = lstore.read("/evt/aoe", outfile) + arr_ac, _ = lstore.read("/evt/multiplicity", outfile) + vov_aoeene, _ = lstore.read("/evt/energy_times_aoe", outfile) + vov_eneac, _ = lstore.read("/evt/energy_times_multiplicity", outfile) + arr_ac2, _ = lstore.read("/evt/multiplicity_squared", outfile) assert isinstance(vov_ene, VectorOfVectors) assert isinstance(vov_aoe, VectorOfVectors) assert isinstance(arr_ac, Array) @@ -236,10 +236,10 @@ def test_vector_sort(lgnd_test_data, tmptestdir): assert os.path.exists(outfile) assert len(ls(outfile, "/evt/")) == 4 lstore = store.LH5Store() - vov_t0, _ = lstore.read_object("/evt/t0_acend", outfile) + vov_t0, _ = lstore.read("/evt/t0_acend", outfile) nda_t0 = vov_t0.to_aoesa().nda assert ((np.diff(nda_t0) >= 0) | (np.isnan(np.diff(nda_t0)))).all() - vov_t0, _ = lstore.read_object("/evt/t0_decend", outfile) + vov_t0, _ = lstore.read("/evt/t0_decend", outfile) nda_t0 = vov_t0.to_aoesa().nda assert ((np.diff(nda_t0) <= 0) | (np.isnan(np.diff(nda_t0)))).all() @@ -256,16 +256,16 @@ def test_skimming(lgnd_test_data, tmptestdir): build_evt(f_tcm, f_dsp, f_hit, outfile, f_config) lstore = store.LH5Store() - ac = lstore.read_object("/evt/multiplicity", outfile)[0].nda + ac = lstore.read("/evt/multiplicity", outfile)[0].nda ac = len(ac[ac == 3]) outfile_skm = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.lh5" skim_evt(outfile, "multiplicity == 3", None, outfile_skm, "n") - assert ac == len(lstore.read_object("/evt/energy", outfile_skm)[0].to_aoesa().nda) + assert ac == len(lstore.read("/evt/energy", outfile_skm)[0].to_aoesa().nda) skim_evt(outfile, "multiplicity == 3", None, None, "o") - assert ac == len(lstore.read_object("/evt/energy", outfile)[0].to_aoesa().nda) + assert ac == len(lstore.read("/evt/energy", outfile)[0].to_aoesa().nda) with pytest.raises(ValueError): skim_evt(outfile, "multiplicity == 3", None, None, "bla") diff --git a/tests/skm/test_build_skm.py b/tests/skm/test_build_skm.py index 984be7936..0b2beebe4 100644 --- a/tests/skm/test_build_skm.py +++ b/tests/skm/test_build_skm.py @@ -29,10 +29,10 @@ def test_basics(lgnd_test_data, tmptestdir): skm_conf = f"{config_dir}/basic-skm-config.json" skm_out = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.parquet" - build_skm(outfile, skm_out, skm_conf, wo_mode="o") + build_skm(outfile, skm_out, skm_conf, wo_mode="o", skim_format="hdf") assert os.path.exists(skm_out) - df = pd.read_parquet(skm_out) + df = pd.read_hdf(skm_out) assert df.index.name == "timestamp" assert "energy_0" in df.keys() assert "energy_1" in df.keys() From c636d75cc9b0ac02afb98667dfff22069f2383bf Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 2 Jan 2024 19:25:47 +0100 Subject: [PATCH 117/191] explicit numpy import --- src/pygama/skm/build_skm.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index 51f025638..b0be9d620 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -11,6 +11,7 @@ import awkward as ak import h5py import lgdo.lh5_store as store +import numpy as np import pandas as pd from lgdo import VectorOfVectors @@ -165,7 +166,7 @@ def build_skm( mv = tbl_cfg["skimmed_fields"][fld[0]]["missing_value"] if mv in ["np.inf", "-np.inf", "np.nan"]: mv = eval(mv) - out = vls.vov_to_aoesa(max_len=multi, fill_val=mv).nda + out = vls.to_aoesa(max_len=multi, fill_val=mv).nda nms = [fld[0] + f"_{e}" for e in range(multi)] df = df.join(pd.DataFrame(data=out, columns=nms), how="outer") @@ -214,8 +215,10 @@ def build_skm( f"global {k} {mode} operation needs a missing value assigned" ) mv = tbl_cfg["global_fields"][k]["missing_value"] - if mv in ["np.inf", "-np.inf"]: - mv = eval(mv) + if mv == "np.inf": + mv = np.inf + elif mv == "-np.inf": + mv = -1 * np.inf val = ak.fill_none(val, mv) df = df.join( pd.DataFrame(data=val.to_numpy(allow_missing=False), columns=[k]) From 218330af55772348c33f82a02605e0e4d88dd403 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 4 Jan 2024 11:31:00 +0100 Subject: [PATCH 118/191] [ci] disable tqdm, should fix the random MacOS CI failures --- .github/workflows/main.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 95dba9133..e5e817249 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -12,6 +12,9 @@ concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true +env: + TQDM_MININTERVAL: 10 + jobs: build-and-test: From 4a73c935ca93b9ea2fdec7e4010a33edf3fcd0ca Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 11 Jan 2024 14:48:27 +0100 Subject: [PATCH 119/191] Update to latest pydataobj version --- src/pygama/evt/build_evt.py | 75 +++++++++-------- src/pygama/evt/modules/spm.py | 154 +++++++++++++++++++--------------- src/pygama/skm/build_skm.py | 31 ++++--- 3 files changed, 143 insertions(+), 117 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 49c621cd0..5c9f291d4 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -12,9 +12,9 @@ import re from importlib import import_module -import lgdo.lh5_store as store import numpy as np -from lgdo import Array, VectorOfVectors +from lgdo import Array, VectorOfVectors, lh5 +from lgdo.lh5 import LH5Store log = logging.getLogger(__name__) @@ -92,6 +92,8 @@ def evaluate_expression( can be used to sort vector outputs according to sorter expression (see :func:`evaluate_to_vector`) """ + store = LH5Store() + # find parameters in evt file or in parameters exprl = re.findall(r"(evt|hit|dsp).([a-zA-Z_$][\w$]*)", expr) var_ph = {} @@ -133,9 +135,8 @@ def evaluate_expression( qry_mask = eval(qry.replace("evt.", "evt_"), var_qry) # load TCM data to define an event - nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") - ids = nda["array_id"] - idx = nda["array_idx"] + ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") + idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") # switch through modes if ( @@ -143,10 +144,9 @@ def evaluate_expression( and "keep_at:" == mode[:8] and "evt." == mode[8:][:4] and mode[8:].split(".")[-1] - in [e.split("/")[-1] for e in store.ls(f_evt, "/evt/")] + in [e.split("/")[-1] for e in lh5.ls(f_evt, "/evt/")] ): - lstore = store.LH5Store() - ch_comp, _ = lstore.read(mode[8:].replace(".", "/"), f_evt) + ch_comp, _ = store.read(mode[8:].replace(".", "/"), f_evt) if isinstance(ch_comp, Array): return evaluate_at_channel( idx, @@ -334,16 +334,16 @@ def load_vars_to_nda(f: str, group: str, exprl: list, idx: np.ndarray = None) -> list of parameter-tuples (root_group, field) to be found in f """ - lstore = store.LH5Store() + store = LH5Store() var = { - f"{e[0]}_{e[1]}": lstore.read( + f"{e[0]}_{e[1]}": store.read( f"{group.replace('/','')}/{e[0]}/{e[1]}", f, idx=idx, )[0] for e in exprl if e[1] - in [x.split("/")[-1] for x in store.ls(f, f"{group.replace('/','')}/{e[0]}/")] + in [x.split("/")[-1] for x in lh5.ls(f, f"{group.replace('/','')}/{e[0]}/")] } # to make any operations to VoVs we have to blow it up to a table (future change to more intelligant way) @@ -536,6 +536,8 @@ def evaluate_to_first( out_chs = np.zeros(len(out), dtype=int) outt = np.zeros(len(out)) + store = LH5Store() + for ch in chns: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] @@ -562,12 +564,11 @@ def evaluate_to_first( outt[:] = np.inf # find if sorter is in hit or dsp - t0 = store.load_nda( + t0 = store.read( + f"{ch}/{sorter[0]}/{sorter[1]}", f_hit if "hit" == sorter[0] else f_dsp, - [sorter[1]], - f"{ch}/{sorter[0]}/", - idx_ch, - )[sorter[1]] + idx=idx_ch, + )[0].view_as("np") out[idx_ch] = np.where((t0 < outt) & (limarr), res, out[idx_ch]) out_chs[idx_ch] = np.where((t0 < outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) @@ -629,6 +630,8 @@ def evaluate_to_last( out_chs = np.zeros(len(out), dtype=int) outt = np.zeros(len(out)) + store = LH5Store() + for ch in chns: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] @@ -652,12 +655,11 @@ def evaluate_to_last( # append to out according to mode == last # find if sorter is in hit or dsp - t0 = store.load_nda( + t0 = store.read( + f"{ch}/{sorter[0]}/{sorter[1]}", f_hit if "hit" == sorter[0] else f_dsp, - [sorter[1]], - f"{ch}/{sorter[0]}/", - idx_ch, - )[sorter[1]] + idx=idx_ch, + )[0].view_as("np") out[idx_ch] = np.where((t0 > outt) & (limarr), res, out[idx_ch]) out_chs[idx_ch] = np.where((t0 > outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) @@ -940,7 +942,7 @@ def evaluate_at_channel( for ch in np.unique(ch_comp.nda.astype(int)): # skip default value - if f"ch{ch}" not in store.ls(f_hit): + if f"ch{ch}" not in lh5.ls(f_hit): continue # get index list for this channel to be loaded idx_ch = idx[ids == ch] @@ -1305,7 +1307,7 @@ def build_evt( lh5 root group in tcm file """ - lstore = store.LH5Store() + store = LH5Store() tbl_cfg = evt_config if not isinstance(tbl_cfg, (str, dict)): raise TypeError() @@ -1355,8 +1357,9 @@ def build_evt( chns[k] = [e for e in v] nrows = len( - store.load_nda(f_tcm, ["cumulative_length"], tcm_group)["cumulative_length"] + lh5.load_nda(f_tcm, ["cumulative_length"], tcm_group)["cumulative_length"] ) + # nrows = store.read_n_rows(f"{tcm_group}/cumulative_length", f_tcm) log.info( f"Applying {len(tbl_cfg['operations'].keys())} operations to key {f_tcm.split('-')[-2]}" ) @@ -1393,7 +1396,7 @@ def build_evt( f"Currently only 2d formats are supported, the evaluated array has the dimension {res.shape}" ) - lstore.write( + store.write( obj=res, name=group + k, lh5_file=f_evt_tmp, @@ -1450,7 +1453,7 @@ def build_evt( obj = result["values"] if isinstance(obj, np.ndarray): obj = Array(result["values"]) - lstore.write( + store.write( obj=obj, name=group + k, lh5_file=f_evt_tmp, @@ -1462,8 +1465,8 @@ def build_evt( if len(tbl_cfg["outputs"]) < 1: log.warning("No output fields specified, no file will be written.") for fld in tbl_cfg["outputs"]: - obj, _ = lstore.read(group + fld, f_evt_tmp) - lstore.write( + obj, _ = store.read(group + fld, f_evt_tmp) + store.write( obj=obj, name=group + fld, lh5_file=f_evt, @@ -1507,19 +1510,17 @@ def skim_evt( wo_mode + " is a invalid writing mode. Valid options are: 'o', 'overwrite','n','new'" ) - lstore = store.LH5Store() - fields = store.ls(f_evt, evt_group) - nrows = lstore.read_n_rows(fields[0], f_evt) + store = LH5Store() + fields = lh5.ls(f_evt, evt_group) + nrows = store.read_n_rows(fields[0], f_evt) # load fields in expression exprl = re.findall(r"[a-zA-Z_$][\w$]*", expression) var = {} flds = [ - e.split("/")[-1] - for e in store.ls(f_evt, evt_group) - if e.split("/")[-1] in exprl + e.split("/")[-1] for e in lh5.ls(f_evt, evt_group) if e.split("/")[-1] in exprl ] - var = {e: lstore.read(evt_group + e, f_evt)[0] for e in flds} + var = {e: store.read(evt_group + e, f_evt)[0] for e in flds} # to make any operations to VoVs we have to blow it up to a table (future change to more intelligant way) arr_keys = [] @@ -1554,8 +1555,8 @@ def skim_evt( of_tmp = of.replace(of.split("/")[-1], ".tmp_" + of.split("/")[-1]) for fld in fields: - ob, _ = lstore.read(fld, f_evt, idx=idx_list) - lstore.write( + ob, _ = store.read(fld, f_evt, idx=idx_list) + store.write( obj=ob, name=fld, lh5_file=of_tmp, diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 7bd530531..0907b3a13 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -11,9 +11,9 @@ import warnings -import lgdo.lh5_store as store import numpy as np from lgdo import Array, VectorOfVectors +from lgdo.lh5 import LH5Store # get LAr energy per event over all channels @@ -35,21 +35,25 @@ def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): tma = trig + tmax sum = np.zeros(len(trig)) # load TCM data to define an event - nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") - ids = nda["array_id"] - idx = nda["array_idx"] + store = LH5Store() + ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") + idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") + for ch in chs: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - df = store.load_nda( - f_hit, ["energy_in_pe", "trigger_pos"], ch + "/hit/", idx_ch + energy_in_pe = store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[ + 0 + ].view_as("np") + trigger_pos = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as( + "np" ) mask = ( - (df["trigger_pos"] < tma[:, None] / 16) - & (df["trigger_pos"] > tmi[:, None] / 16) - & (df["energy_in_pe"] > lim) + (trigger_pos < tma[:, None] / 16) + & (trigger_pos > tmi[:, None] / 16) + & (energy_in_pe > lim) ) - pes = df["energy_in_pe"] + pes = energy_in_pe pes = np.where(np.isnan(pes), 0, pes) pes = np.where(mask, pes, 0) chsum = np.nansum(pes, axis=1) @@ -76,21 +80,24 @@ def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): tma = trig + tmax maj = np.zeros(len(trig)) # load TCM data to define an event - nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") - ids = nda["array_id"] - idx = nda["array_idx"] + store = LH5Store() + ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") + idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") for ch in chs: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - df = store.load_nda( - f_hit, ["energy_in_pe", "trigger_pos"], ch + "/hit/", idx_ch + energy_in_pe = store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[ + 0 + ].view_as("np") + trigger_pos = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as( + "np" ) mask = ( - (df["trigger_pos"] < tma[:, None] / 16) - & (df["trigger_pos"] > tmi[:, None] / 16) - & (df["energy_in_pe"] > lim) + (trigger_pos < tma[:, None] / 16) + & (trigger_pos > tmi[:, None] / 16) + & (energy_in_pe > lim) ) - pes = df["energy_in_pe"] + pes = energy_in_pe pes = np.where(np.isnan(pes), 0, pes) pes = np.where(mask, pes, 0) chsum = np.nansum(pes, axis=1) @@ -118,21 +125,24 @@ def get_energy_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): tma = trig + tmax sum = np.zeros(len(trig)) # load TCM data to define an event - nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") - ids = nda["array_id"] - idx = nda["array_idx"] + store = LH5Store() + ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") + idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") for ch in chs: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - df = store.load_nda( - f_hit, ["energy_in_pe_dplms", "trigger_pos_dplms"], ch + "/hit/", idx_ch - ) + energy_in_pe_dplms = store.read( + f"{ch}/hit/energy_in_pe_dplms", f_hit, idx=idx_ch + )[0].view_as("np") + trigger_pos_dplms = store.read( + f"{ch}/hit/trigger_pos_dplms", f_hit, idx=idx_ch + )[0].view_as("np") mask = ( - (df["trigger_pos_dplms"] < tma[:, None] / 16) - & (df["trigger_pos_dplms"] > tmi[:, None] / 16) - & (df["energy_in_pe_dplms"] > lim) + (trigger_pos_dplms < tma[:, None] / 16) + & (trigger_pos_dplms > tmi[:, None] / 16) + & (energy_in_pe_dplms > lim) ) - pes = df["energy_in_pe_dplms"] + pes = energy_in_pe_dplms pes = np.where(np.isnan(pes), 0, pes) pes = np.where(mask, pes, 0) chsum = np.nansum(pes, axis=1) @@ -159,21 +169,24 @@ def get_majority_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax tma = trig + tmax maj = np.zeros(len(trig)) # load TCM data to define an event - nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") - ids = nda["array_id"] - idx = nda["array_idx"] + store = LH5Store() + ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") + idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") for ch in chs: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - df = store.load_nda( - f_hit, ["energy_in_pe_dplms", "trigger_pos_dplms"], ch + "/hit/", idx_ch - ) + energy_in_pe_dplms = store.read( + f"{ch}/hit/energy_in_pe_dplms", f_hit, idx=idx_ch + )[0].view_as("np") + trigger_pos_dplms = store.read( + f"{ch}/hit/trigger_pos_dplms", f_hit, idx=idx_ch + )[0].view_as("np") mask = ( - (df["trigger_pos_dplms"] < tma[:, None] / 16) - & (df["trigger_pos_dplms"] > tmi[:, None] / 16) - & (df["energy_in_pe_dplms"] > lim) + (trigger_pos_dplms < tma[:, None] / 16) + & (trigger_pos_dplms > tmi[:, None] / 16) + & (energy_in_pe_dplms > lim) ) - pes = df["energy_in_pe_dplms"] + pes = energy_in_pe_dplms pes = np.where(np.isnan(pes), 0, pes) pes = np.where(mask, pes, 0) chsum = np.nansum(pes, axis=1) @@ -188,9 +201,10 @@ def get_etc(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, tra warnings.filterwarnings("ignore", r"invalid value encountered in true_divide") warnings.filterwarnings("ignore", r"invalid value encountered in divide") - predf = store.load_nda(f_hit, ["energy_in_pe", "timestamp"], chs[0] + "/hit/") + store = LH5Store() + energy_in_pe, _ = store.read(f"{chs[0]}/hit/energy_in_pe", f_hit) - peshape = (predf["energy_in_pe"]).shape + peshape = energy_in_pe.view_as("np").shape # 1D = channel, 2D = event num, 3D = array per event pes = np.zeros([len(chs), peshape[0], peshape[1]]) times = np.zeros([len(chs), peshape[0], peshape[1]]) @@ -213,25 +227,25 @@ def get_etc(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, tra tma = tge + tmax # load TCM data to define an event - nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") - ids = nda["array_id"] - idx = nda["array_idx"] + store = LH5Store() + ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") + idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") for i in range(len(chs)): # get index list for this channel to be loaded idx_ch = idx[ids == int(chs[i][2:])] - df = store.load_nda( - f_hit, - ["energy_in_pe", "trigger_pos", "timestamp"], - chs[i] + "/hit/", - idx_ch, - ) + energy_in_pe = store.read(f"{chs[i]}/hit/energy_in_pe", f_hit, idx=idx_ch)[ + 0 + ].view_as("np") + trigger_pos = store.read(f"{chs[i]}/hit/trigger_pos", f_hit, idx=idx_ch)[ + 0 + ].view_as("np") mask = ( - (df["trigger_pos"] < tma[:, None] / 16) - & (df["trigger_pos"] > tmi[:, None] / 16) - & (df["energy_in_pe"] > lim) + (trigger_pos < tma[:, None] / 16) + & (trigger_pos > tmi[:, None] / 16) + & (energy_in_pe > lim) ) - pe = df["energy_in_pe"] - time = df["trigger_pos"] * 16 + pe = energy_in_pe + time = trigger_pos * 16 pe = np.where(mask, pe, np.nan) time = np.where(mask, time, np.nan) @@ -271,8 +285,12 @@ def get_etc(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, tra def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): - predf = store.load_nda(f_hit, ["energy_in_pe"], chs[0] + "/hit/") - peshape = (predf["energy_in_pe"]).shape + store = LH5Store() + energy_in_pe, _ = store.read( + f"{chs[0]}/hit/energy_in_pe", + f_hit, + ) + peshape = energy_in_pe.view_as("np").shape times = np.zeros([len(chs), peshape[0], peshape[1]]) tge = trgr @@ -293,22 +311,24 @@ def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): tma = tge + tmax # load TCM data to define an event - nda = store.load_nda(f_tcm, ["array_id", "array_idx"], "hardware_tcm_1/") - ids = nda["array_id"] - idx = nda["array_idx"] + ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") + idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") for i in range(len(chs)): # get index list for this channel to be loaded idx_ch = idx[ids == int(chs[i][2:])] - df = store.load_nda( - f_hit, ["energy_in_pe", "trigger_pos"], chs[i] + "/hit/", idx_ch - ) + energy_in_pe = store.read(f"{chs[i]}/hit/energy_in_pe", f_hit, idx=idx_ch)[ + 0 + ].view_as("np") + trigger_pos = store.read(f"{chs[i]}/hit/trigger_pos", f_hit, idx=idx_ch)[ + 0 + ].view_as("np") mask = ( - (df["trigger_pos"] < tma[:, None] / 16) - & (df["trigger_pos"] > tmi[:, None] / 16) - & (df["energy_in_pe"] > lim) + (trigger_pos < tma[:, None] / 16) + & (trigger_pos > tmi[:, None] / 16) + & (energy_in_pe > lim) ) - time = df["trigger_pos"] * 16 + time = trigger_pos * 16 time = np.where(mask, time, np.nan) times[i][idx_ch] = time diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index b0be9d620..7f5d2de3b 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -10,10 +10,10 @@ import awkward as ak import h5py -import lgdo.lh5_store as store import numpy as np import pandas as pd -from lgdo import VectorOfVectors +from lgdo import VectorOfVectors, lh5 +from lgdo.lh5 import LH5Store log = logging.getLogger(__name__) @@ -107,7 +107,7 @@ def build_skm( if x[1] in [ e.split("/")[-1] - for e in store.ls(f_evt[0] if isinstance(f_evt, list) else f_evt, group) + for e in lh5.ls(f_evt[0] if isinstance(f_evt, list) else f_evt, group) if "array<1>{array<1>{" in f[e].attrs.get("datatype") ] ] @@ -118,7 +118,7 @@ def build_skm( and x[1] in [ e.split("/")[-1] - for e in store.ls(f_evt[0] if isinstance(f_evt, list) else f_evt, group) + for e in lh5.ls(f_evt[0] if isinstance(f_evt, list) else f_evt, group) ] ] @@ -142,27 +142,32 @@ def build_skm( # init pandas df df = pd.DataFrame() + store = LH5Store() # add array like fields if isinstance(flds_arr, list): log.debug("Crunching array-like fields") - df = df.join( - store.load_dfs(f_evt, [x[1] for x in flds_arr], group).rename( - columns={y: x for x, y in flds_arr} - ), - how="outer", - ) + + _df = store.read( + group, + f_evt, + field_mask=[x[1] for x in flds_arr], + )[ + 0 + ].view_as("pd") + + _df = _df.rename(columns={y: x for x, y in flds_arr}) + df = df.join(_df, how="outer") # take care of vector like fields if isinstance(flds_vov, list): log.debug("Processing VoV-like fields") - lstore = store.LH5Store() for fld in flds_vov: if "missing_value" not in tbl_cfg["skimmed_fields"][fld[0]].keys(): raise ValueError( f"({fld[0]}) is a VectorOfVector field and no missing_value is specified" ) - vls, _ = lstore.read(group + fld[1], f_evt) + vls, _ = store.read(group + fld[1], f_evt) mv = tbl_cfg["skimmed_fields"][fld[0]]["missing_value"] if mv in ["np.inf", "-np.inf", "np.nan"]: mv = eval(mv) @@ -181,7 +186,7 @@ def build_skm( mode = tbl_cfg["global_fields"][k]["aggregation_mode"] fld = tbl_cfg["global_fields"][k]["evt_field"] - obj, _ = lstore.read(group + fld, f_evt) + obj, _ = store.read(group + fld, f_evt) if not isinstance(obj, VectorOfVectors): raise ValueError( f"global {k} operation not possible, since {fld} is not an VectorOfVectors" From a0cda34a29aa93ec54220d0bacceefa3381acde5 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 11 Jan 2024 15:29:18 +0100 Subject: [PATCH 120/191] Bug fix in build_evt tests --- src/pygama/evt/build_evt.py | 24 ++++++++++++------------ tests/evt/test_build_evt.py | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 5c9f291d4..faea920cc 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -1397,9 +1397,9 @@ def build_evt( ) store.write( - obj=res, - name=group + k, - lh5_file=f_evt_tmp, + res, + group + k, + f_evt_tmp, wo_mode=wo_mode, ) @@ -1454,9 +1454,9 @@ def build_evt( if isinstance(obj, np.ndarray): obj = Array(result["values"]) store.write( - obj=obj, - name=group + k, - lh5_file=f_evt_tmp, + obj, + group + k, + f_evt_tmp, wo_mode=wo_mode, ) @@ -1467,9 +1467,9 @@ def build_evt( for fld in tbl_cfg["outputs"]: obj, _ = store.read(group + fld, f_evt_tmp) store.write( - obj=obj, - name=group + fld, - lh5_file=f_evt, + obj, + group + fld, + f_evt, wo_mode=wo_mode, ) else: @@ -1557,9 +1557,9 @@ def skim_evt( for fld in fields: ob, _ = store.read(fld, f_evt, idx=idx_list) store.write( - obj=ob, - name=fld, - lh5_file=of_tmp, + ob, + fld, + of_tmp, wo_mode="o", ) diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index f771579c5..94e4dfb58 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -145,7 +145,7 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): with pytest.raises(RuntimeError): build_evt(f_dsp, f_tcm, f_hit, outfile, f_config) - with pytest.raises(RuntimeError): + with pytest.raises(KeyError): build_evt(f_tcm, f_hit, f_dsp, outfile, f_config) with pytest.raises(TypeError): From ebdcd9c05e2e7d29e005496078d141459c650407 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 11 Jan 2024 16:52:08 +0100 Subject: [PATCH 121/191] Docstring cosmetics --- src/pygama/evt/build_evt.py | 520 +++++++++++++++++++----------------- src/pygama/hit/build_hit.py | 16 +- src/pygama/skm/build_skm.py | 95 ++++--- 3 files changed, 333 insertions(+), 298 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index faea920cc..d36255be8 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -1,5 +1,5 @@ """ -This module implements routines to build the evt tier. +This module implements routines to build the `evt` tier. """ from __future__ import annotations @@ -52,44 +52,59 @@ def evaluate_expression( defv=np.nan, sorter: str = None, ) -> dict: - """ - Evaluates the expression defined by the user across all channels according to the mode + """Evaluates the expression defined by the user across all channels + according to the mode. Parameters ---------- f_tcm - Path to tcm tier file + path to `tcm` tier file. f_evt - Path to event tier file + path to `evt` tier file. f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. chns - List of channel names across which expression gets evaluated (form: "ch") + list of channel names across which expression gets evaluated (form: + ``ch``). chns_rm - List of channels which get set to default value during evaluation. In function mode they are removed entirely (form: "ch") + list of channels which get set to default value during evaluation. In + function mode they are removed entirely (form: ``ch``) mode - The mode determines how the event entry is calculated across channels. Options are: - - "first_at:sorter": aggregates across channels by returning the expression of the channel with smallest value of sorter. - - "last_at": aggregates across channels by returning the expression of the channel with largest value of sorter. - - "sum": aggregates by summation. - - "any": aggregates by logical or. - - "all": aggregates by logical and. - - "keep_at:ch_field": aggregates according to passed ch_field - - "gather": Channels are not combined, but result saved as VectorOfVectors. + The mode determines how the event entry is calculated across channels. + Options are: + + - ``first_at:sorter``: aggregates across channels by returning the + expression of the channel with smallest value of sorter. + - ``last_at``: aggregates across channels by returning the expression of + the channel with largest value of sorter. + - ``sum``: aggregates by summation. + - ``any``: aggregates by logical or. + - ``all``: aggregates by logical and. + - ``keep_at:ch_field``: aggregates according to passed ch_field + - ``gather``: Channels are not combined, but result saved as + :class:`.VectorOfVectors`. + qry - A query that can mask the aggregation. + a query that can mask the aggregation. expr - The expression. That can be any mathematical equation/comparison. If mode == function, the expression needs to be a special processing function defined in modules (e.g. "modules.spm.get_energy). In the expression parameters from either hit, dsp, evt tier (from operations performed before this one! --> JSON operations order matters), or from the "parameters" field can be used. + the expression. That can be any mathematical equation/comparison. If + `mode` is ``function``, the expression needs to be a special processing + function defined in modules (e.g. :func:`.modules.spm.get_energy`). In + the expression parameters from either hit, dsp, evt tier (from + operations performed before this one! Dictionary operations order + matters), or from the ``parameters`` field can be used. nrows - Number of rows to be processed. + number of rows to be processed. para - Dictionary of parameters defined in the "parameters" field in the configuration JSON file. + dictionary of parameters defined in the ``parameters`` field in the + configuration dictionary. defv - default value of evaluation + default value of evaluation. sorter - can be used to sort vector outputs according to sorter expression (see :func:`evaluate_to_vector`) + can be used to sort vector outputs according to sorter expression (see + :func:`evaluate_to_vector`). """ store = LH5Store() @@ -293,21 +308,21 @@ def find_parameters( idx_ch: np.ndarray, exprl: list, ) -> dict: - """ - Wraps :func:`load_vars_to_nda` to return parameters from hit and dsp tiers. + """Wraps :func:`load_vars_to_nda` to return parameters from `hit` and `dsp` + tiers. Parameters ---------- f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. ch - rawid in the tiers + "rawid" in the tiers. idx_ch - index array of entries to be read from files + index array of entries to be read from files. exprl - list of tuples (tier, field) to be found in the hit/dsp tiers + list of tuples ``(tier, field)`` to be found in the `hit/dsp` tiers. """ # find fields in either dsp, hit @@ -318,20 +333,19 @@ def find_parameters( def load_vars_to_nda(f: str, group: str, exprl: list, idx: np.ndarray = None) -> dict: - """ - Maps parameter expressions to parameters if found in f. - Blows up VectorOfVectors to ArrayOfEqualSizedArrays. + """Maps parameter expressions to parameters if found in `f`. + Blows up :class:`.VectorOfVectors` to :class:`.ArrayOfEqualSizedArrays`. Parameters ---------- f - Path to a LGDO file + path to a LGDO file. group - additional group in f + additional group in `f`. idx - index array of entries to be read from files + index array of entries to be read from files. exprl - list of parameter-tuples (root_group, field) to be found in f + list of parameter-tuples ``(root_group, field)`` to be found in `f`. """ store = LH5Store() @@ -382,31 +396,31 @@ def get_data_at_channel( outsize: int, defv, ) -> np.ndarray: - """ - Evaluates an expression and returns the result + """Evaluates an expression and returns the result. Parameters ---------- ch - rawid of channel to be evaluated + "rawid" of channel to be evaluated. idx_ch - array of indices to be evaluated + array of indices to be evaluated. expr - expression to be evaluated + expression to be evaluated. exprl - list of parameter-tuples (root_group, field) found in the expression + list of parameter-tuples ``(root_group, field)`` found in the expression. var_ph - dict of additional parameters that are not channel dependent + dict of additional parameters that are not channel dependent. is_evaluated - if false, the expression does not get evaluated but an array of default values is returned + if false, the expression does not get evaluated but an array of default + values is returned. f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. outsize - size of the return array + size of the return array. defv - default value + default value. """ if not is_evaluated: @@ -444,23 +458,22 @@ def get_mask_from_query( f_hit: str, f_dsp: str, ) -> np.ndarray: - """ - Evaluates an query expression and returns a mask accordingly + """Evaluates a query expression and returns a mask accordingly. Parameters ---------- qry - query expression + query expression. length - length of the return mask + length of the return mask. ch - rawid of channel to be evaluated + "rawid" of channel to be evaluated. idx_ch - array of indices to be evaluated + array of indices to be evaluated. f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. """ # get sub evt based query condition if needed @@ -498,37 +511,37 @@ def evaluate_to_first( var_ph: dict = None, defv=np.nan, ) -> dict: - """ - aggregates across channels by returning the expression of the channel with smallest value of sorter. + """Aggregates across channels by returning the expression of the channel + with smallest value of `sorter`. Parameters ---------- idx - tcm index array + `tcm` index array. ids - tcm id array + `tcm` id array. f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. chns - list of channels to be aggregated + list of channels to be aggregated. chns_rm - list of channels to be skipped from evaluation and set to default value + list of channels to be skipped from evaluation and set to default value. expr - expression string to be evaluated + expression string to be evaluated. exprl - list of dsp/hit/evt parameter tuples in expression (tier,field) + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. qry - query expression to mask aggregation + query expression to mask aggregation. nrows - length of output array + length of output array. sorter - tuple of field in hit/dsp/evt tier to evaluate (tier,field) + tuple of field in `hit/dsp/evt` tier to evaluate ``(tier, field)``. var_ph - dictionary of evt and additional parameters and their values + dictionary of `evt` and additional parameters and their values. defv - default value + default value. """ # define dimension of output array @@ -592,37 +605,37 @@ def evaluate_to_last( var_ph: dict = None, defv=np.nan, ) -> dict: - """ - aggregates across channels by returning the expression of the channel with largest value of sorter. + """Aggregates across channels by returning the expression of the channel + with largest value of `sorter`. Parameters ---------- idx - tcm index array + `tcm` index array. ids - tcm id array + `tcm` id array. f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. chns - list of channels to be aggregated + list of channels to be aggregated. chns_rm - list of channels to be skipped from evaluation and set to default value + list of channels to be skipped from evaluation and set to default value. expr - expression string to be evaluated + expression string to be evaluated. exprl - list of dsp/hit/evt parameter tuples in expression (tier,field) + list of dsp/hit/evt parameter tuples in expression ``(tier, field)``. qry - query expression to mask aggregation + query expression to mask aggregation. nrows - length of output array + length of output array. sorter - tuple of field in hit/dsp/evt tier to evaluate (tier,field) + tuple of field in `hit/dsp/evt` tier to evaluate ``(tier, field)``. var_ph - dictionary of evt and additional parameters and their values + dictionary of `evt` and additional parameters and their values. defv - default value + default value. """ # define dimension of output array @@ -682,35 +695,34 @@ def evaluate_to_tot( var_ph: dict = None, defv=np.nan, ) -> dict: - """ - aggregates by summation across channels. + """Aggregates by summation across channels. Parameters ---------- idx - tcm index array + tcm index array. ids - tcm id array + tcm id array. f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. chns - list of channels to be aggregated + list of channels to be aggregated. chns_rm - list of channels to be skipped from evaluation and set to default value + list of channels to be skipped from evaluation and set to default value. expr - expression string to be evaluated + expression string to be evaluated. exprl - list of dsp/hit/evt parameter tuples in expression (tier,field) + list of dsp/hit/evt parameter tuples in expression (tier, field). qry - query expression to mask aggregation + query expression to mask aggregation. nrows length of output array var_ph - dictionary of evt and additional parameters and their values + dictionary of evt and additional parameters and their values. defv - default value + default value. """ # define dimension of output array @@ -759,35 +771,35 @@ def evaluate_to_any( var_ph: dict = None, defv=np.nan, ) -> dict: - """ - aggregates by logical or operation across channels. If the expression evaluates to a non boolean value it is casted to bool. + """Aggregates by logical or operation across channels. If the expression + evaluates to a non boolean value it is casted to boolean. Parameters ---------- idx - tcm index array + `tcm` index array. ids - tcm id array + `tcm` id array. f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. chns - list of channels to be aggregated + list of channels to be aggregated. chns_rm - list of channels to be skipped from evaluation and set to default value + list of channels to be skipped from evaluation and set to default value. expr - expression string to be evaluated + expression string to be evaluated. exprl - list of dsp/hit/evt parameter tuples in expression (tier,field) + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. qry - query expression to mask aggregation + query expression to mask aggregation. nrows - length of output array + length of output array. var_ph - dictionary of evt and additional parameters and their values + dictionary of `evt` and additional parameters and their values. defv - default value + default value. """ # define dimension of output array @@ -836,35 +848,35 @@ def evaluate_to_all( var_ph: dict = None, defv=np.nan, ) -> dict: - """ - aggregates by logical and operation across channels. If the expression evaluates to a non boolean value it is casted to bool. + """Aggregates by logical and operation across channels. If the expression + evaluates to a non boolean value it is casted to boolean. Parameters ---------- idx - tcm index array + `tcm` index array. ids - tcm id array + `tcm` id array. f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. chns - list of channels to be aggregated + list of channels to be aggregated. chns_rm - list of channels to be skipped from evaluation and set to default value + list of channels to be skipped from evaluation and set to default value. expr - expression string to be evaluated + expression string to be evaluated. exprl - list of dsp/hit/evt parameter tuples in expression (tier,field) + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. qry - query expression to mask aggregation + query expression to mask aggregation. nrows - length of output array + length of output array. var_ph - dictionary of evt and additional parameters and their values + dictionary of evt and additional parameters and their values. defv - default value + default value. """ # define dimension of output array @@ -911,31 +923,30 @@ def evaluate_at_channel( var_ph: dict = None, defv=np.nan, ) -> dict: - """ - aggregates by evaluating the expression at a given channel + """Aggregates by evaluating the expression at a given channel. Parameters ---------- idx - tcm index array + `tcm` index array. ids - tcm id array + `tcm` id array. f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. chns_rm - list of channels to be skipped from evaluation and set to default value + list of channels to be skipped from evaluation and set to default value. expr - expression string to be evaluated + expression string to be evaluated. exprl - list of dsp/hit/evt parameter tuples in expression (tier,field) + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. ch_comp - array of rawids at which the expression is evaluated + array of rawids at which the expression is evaluated. var_ph - dictionary of evt and additional parameters and their values + dictionary of `evt` and additional parameters and their values. defv - default value + default value. """ out = np.full(len(ch_comp.nda), defv, dtype=type(defv)) @@ -977,31 +988,31 @@ def evaluate_at_channel_vov( var_ph: dict = None, defv=np.nan, ) -> dict: - """ - same as :func:`evaluate_at_channel` but evaluates expression at non flat channels VectorOfVectors. + """Same as :func:`evaluate_at_channel` but evaluates expression at non + flat channels :class:`.VectorOfVectors`. Parameters ---------- idx - tcm index array + `tcm` index array. ids - tcm id array + `tcm` id array. f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. expr - expression string to be evaluated + expression string to be evaluated. exprl - list of dsp/hit/evt parameter tuples in expression (tier,field) + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. ch_comp - array of rawids at which the expression is evaluated + array of "rawid"s at which the expression is evaluated. chns_rm - list of channels to be skipped from evaluation and set to default value + list of channels to be skipped from evaluation and set to default value. var_ph - dictionary of evt and additional parameters and their values + dictionary of `evt` and additional parameters and their values. defv - default value + default value. """ # blow up vov to aoesa @@ -1055,41 +1066,41 @@ def evaluate_to_aoesa( defv=np.nan, missv=np.nan, ) -> np.ndarray: - """ - Aggregates by returning a ArrayOfEqualSizedArrays of evaluated expressions of channels that fulfill a query expression. + """Aggregates by returning an :class:`.ArrayOfEqualSizedArrays` of evaluated + expressions of channels that fulfill a query expression. Parameters ---------- idx - tcm index array + `tcm` index array. ids - tcm id array + `tcm` id array. f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. chns - list of channels to be aggregated + list of channels to be aggregated. chns_rm - list of channels to be skipped from evaluation and set to default value + list of channels to be skipped from evaluation and set to default value. expr - expression string to be evaluated + expression string to be evaluated. exprl - list of dsp/hit/evt parameter tuples in expression (tier,field) + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. qry - query expression to mask aggregation + query expression to mask aggregation. nrows - length of output VectorOfVectors + length of output :class:`.VectorOfVectors`. ch_comp - array of rawids at which the expression is evaluated + array of "rawid"s at which the expression is evaluated. var_ph - dictionary of evt and additional parameters and their values + dictionary of `evt` and additional parameters and their values. defv - default value + default value. missv - missing value + missing value. sorter - sorts the entries in the vector according to sorter expression + sorts the entries in the vector according to sorter expression. """ # define dimension of output array out = np.full((nrows, len(chns)), missv) @@ -1138,39 +1149,41 @@ def evaluate_to_vector( defv=np.nan, sorter: str = None, ) -> dict: - """ - Aggregates by returning a VectorOfVector of evaluated expressions of channels that fulfill a query expression. + """Aggregates by returning a :class:`.VectorOfVector` of evaluated + expressions of channels that fulfill a query expression. Parameters ---------- idx - tcm index array + `tcm` index array. ids - tcm id array + `tcm` id array. f_hit - Path to hit tier file + path to `hit` tier file. f_dsp - Path to dsp tier file + path to `dsp` tier file. chns - list of channels to be aggregated + list of channels to be aggregated. chns_rm - list of channels to be skipped from evaluation and set to default value + list of channels to be skipped from evaluation and set to default value. expr - expression string to be evaluated + expression string to be evaluated. exprl - list of dsp/hit/evt parameter tuples in expression (tier,field) + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. qry - query expression to mask aggregation + query expression to mask aggregation. nrows - length of output VectorOfVectors + length of output :class:`.VectorOfVectors`. ch_comp - array of rawids at which the expression is evaluated + array of "rawids" at which the expression is evaluated. var_ph - dictionary of evt and additional parameters and their values + dictionary of `evt` and additional parameters and their values. defv - default value + default value. sorter - sorts the entries in the vector according to sorter expression. acend_by: results in an vector ordered ascending, decend_by: sorts descending + sorts the entries in the vector according to sorter expression. + ``ascend_by:`` results in an vector ordered ascending, + ``decend_by:`` sorts descending. """ out = evaluate_to_aoesa( idx, @@ -1233,80 +1246,88 @@ def build_evt( group: str = "/evt/", tcm_group: str = "/hardware_tcm_1/", ) -> None: - """ - Transform data from the hit and dsp levels which a channel sorted - to a event sorted data format + """Transform data from the `hit` and `dsp` levels which a channel sorted to a + event sorted data format. Parameters ---------- f_tcm - input LH5 file of the tcm level + input LH5 file of the tcm level. f_dsp - input LH5 file of the dsp level + input LH5 file of the dsp level. f_hit - input LH5 file of the hit level - + input LH5 file of the hit level. f_evt - name of the output file + name of the output file. evt_config - name of JSON file or dict defining evt fields. Channel lists can be defined by importing a meta module. The "operations" dictionary defines the fields (name=key), where "channels" specifies the channels used to for this field (either a string or a list of strings), "aggregation_mode" defines how the channels should be combined (see evaluate_expression). "expression" defnies the mathematical/special function to apply (see evaluate_expression), - "query" defines an expression to mask the aggregation. - "parameters" defines any other parameter used in expression. For example: + name of configuration file or dictionary defining event fields. Channel + lists can be defined by importing a metadata module. + + - ``operations`` defines the fields ``name=key``, where ``channels`` + specifies the channels used to for this field (either a string or a + list of strings), + - ``aggregation_mode`` defines how the channels should be combined (see + :func:`evaluate_expression`). + - ``expression`` defnies the mathematical/special function to apply + (see :func:`evaluate_expression`), + - ``query`` defines an expression to mask the aggregation. + - ``parameters`` defines any other parameter used in expression. - .. code-block::json + For example: + + .. code-block:: json { - "channels": { - "geds_on": ["ch1084803", "ch1084804", "ch1121600"], - "spms_on": ["ch1057600", "ch1059201", "ch1062405"], - "muon": "ch1027202", + "channels": { + "geds_on": ["ch1084803", "ch1084804", "ch1121600"], + "spms_on": ["ch1057600", "ch1059201", "ch1062405"], + "muon": "ch1027202", + }, + "operations": { + "energy_id":{ + "channels": "geds_on", + "aggregation_mode": "gather", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "tcm.array_id", + "sort": "ascend_by:dsp.tp_0_est" }, - "operations": { - "energy_id":{ - "channels": "geds_on", - "aggregation_mode": "gather", - "query": "hit.cuspEmax_ctc_cal>25", - "expression": "tcm.array_id", - "sort": "ascend_by:dsp.tp_0_est" - }, - "energy":{ - "aggregation_mode": "keep_at:evt.energy_id", - "expression": "hit.cuspEmax_ctc_cal>25" - } - "is_muon_rejected":{ - "channels": "muon", - "aggregation_mode": "any", - "expression": "dsp.wf_max>a", - "parameters": {"a":15100}, - "initial": false - }, - "multiplicity":{ - "channels": ["geds_on","geds_no_psd","geds_ac"], - "aggregation_mode": "sum", - "expression": "hit.cuspEmax_ctc_cal > a", - "parameters": {"a":25}, - "initial": 0 - }, - "t0":{ - "aggregation_mode": "keep_at:evt.energy_id", - "expression": "dsp.tp_0_est" - }, - "lar_energy":{ - "channels": "spms_on", - "aggregation_mode": "function", - "expression": ".modules.spm.get_energy(0.5,evt.t0,48000,1000,5000)" - }, + "energy":{ + "aggregation_mode": "keep_at:evt.energy_id", + "expression": "hit.cuspEmax_ctc_cal>25" } + "is_muon_rejected":{ + "channels": "muon", + "aggregation_mode": "any", + "expression": "dsp.wf_max>a", + "parameters": {"a":15100}, + "initial": false + }, + "multiplicity":{ + "channels": ["geds_on","geds_no_psd","geds_ac"], + "aggregation_mode": "sum", + "expression": "hit.cuspEmax_ctc_cal > a", + "parameters": {"a":25}, + "initial": 0 + }, + "t0":{ + "aggregation_mode": "keep_at:evt.energy_id", + "expression": "dsp.tp_0_est" + }, + "lar_energy":{ + "channels": "spms_on", + "aggregation_mode": "function", + "expression": ".modules.spm.get_energy(0.5,evt.t0,48000,1000,5000)" + }, + } } wo_mode - writing mode + writing mode. group - lh5 root group name + LH5 root group name. tcm_group - lh5 root group in tcm file + LH5 root group in tcm file. """ - store = LH5Store() tbl_cfg = evt_config if not isinstance(tbl_cfg, (str, dict)): @@ -1488,21 +1509,23 @@ def skim_evt( wo_mode="n", evt_group="/evt/", ) -> None: - """ - Skimms events from a evt file which are fullfling the expression, discards all other events. + """Skims events from an `evt` file which are fulfilling the expression, + discards all other events. Parameters ---------- f_evt - input LH5 file of the evt level + input LH5 file of the `evt` level. expression - skimming expression. Can contain variables from event file or from the params dictionary. + skimming expression. Can contain variables from event file or from the + `params` dictionary. f_out - output LH5 file. Can be None if wo_mode is set to overwrite f_evt. + output LH5 file. Can be ``None`` if `wo_mode` is set to overwrite `f_evt`. wo_mode - Write mode: "o"/"overwrite" overwrites f_evt. "n"/"new" writes to a new file specified in f_out. + Write mode: ``o``/``overwrite`` overwrites f_evt. ``n``/``new`` writes + to a new file specified in `f_out`. evt_group - lh5 root group of the evt file + LH5 root group of the `evt` file. """ if wo_mode not in ["o", "overwrite", "n", "new"]: @@ -1543,7 +1566,8 @@ def skim_evt( if res.shape != (nrows,): raise ValueError( - f"The expression must result to 1D with length = event number. Current shape is {res.shape}" + "The expression must result to 1D with length = event number. " + f"Current shape is {res.shape}" ) res = res.astype(bool) diff --git a/src/pygama/hit/build_hit.py b/src/pygama/hit/build_hit.py index 2b3e8ef5f..2a6d6a066 100644 --- a/src/pygama/hit/build_hit.py +++ b/src/pygama/hit/build_hit.py @@ -48,14 +48,14 @@ def build_hit( .. code-block:: json { - "outputs": ["calE", "AoE"], - "operations": { - "calE": { - "expression": "sqrt(a + b * trapEmax**2)", - "parameters": {"a": "1.23", "b": "42.69"}, - }, - "AoE": {"expression": "A_max/calE"}, - } + "outputs": ["calE", "AoE"], + "operations": { + "calE": { + "expression": "sqrt(a + b * trapEmax**2)", + "parameters": {"a": "1.23", "b": "42.69"}, + }, + "AoE": {"expression": "A_max/calE"}, + } } The ``outputs`` array lists columns that will be effectively written in diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index 7f5d2de3b..8bad1050e 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -1,5 +1,6 @@ """ -This module implements routines to build the evt tier. +This module implements routines to build the `skm` tier, consisting of skimmed +data from the `evt` tier. """ from __future__ import annotations @@ -25,67 +26,77 @@ def build_skm( wo_mode="w", group: str = "/evt/", skim_format: str = "parquet", -): - """ - Builds a skimmed file from a (set) of evt tier file(s). +) -> None: + """Builds a skimmed file from a (set) of evt tier file(s). Parameters ---------- f_evt - list/path of evt file(s) + list/path of `evt` file(s). f_skm - name of the skm output file + name of the `skm` output file. skm_conf - name of JSON file or dict defining skm fields. multiplicity defines upto which row length VectorOfVector fields should be kept. Skimmed fields are forwarded from the evt tier and clipped/padded according to missing_value if needed. Global fields define an operation to reduce the dimension of VectorOfVector event fields. + name of configuration file or dictionary defining `skm` fields. + + - ``multiplicity`` defines up to which row length + :class:`.VectorOfVector` fields should be kept. + - ``index_field`` + - ``skimmed_fields`` are forwarded from the evt tier and clipped/padded + according to ``missing_value`` if needed. + - ``global_fields`` defines an operation to reduce the dimension of + :class:`.VectorOfVector` event fields. + For example: - .. code-block::json + .. code-block:: json { - "multiplicity": 2, - "index_field": "timestamp", - "skimmed_fields": { - "timestamp":{ - "evt_field": "timestamp" - }, - "is_muon_rejected":{ - "evt_field": "is_muon_rejected" - }, - "multiplicity":{ - "evt_field": "multiplicity" - }, - "energy":{ - "evt_field": "energy", - "missing_value": "np.nan" - }, - "energy_id":{ - "evt_field": "energy_id", - "missing_value": 0 - }, - "global_fields":{ - "energy_sum":{ - "aggregation_mode": "sum", - "evt_field": "energy" - }, - "is_all_physical":{ - "aggregation_mode": "all", - "evt_field": "is_physical" - }, - } + "multiplicity": 2, + "index_field": "timestamp", + "skimmed_fields": { + "timestamp":{ + "evt_field": "timestamp" + }, + "is_muon_rejected":{ + "evt_field": "is_muon_rejected" + }, + "multiplicity":{ + "evt_field": "multiplicity" + }, + "energy":{ + "evt_field": "energy", + "missing_value": "np.nan" + }, + "energy_id":{ + "evt_field": "energy_id", + "missing_value": 0 + }, + "global_fields":{ + "energy_sum":{ + "aggregation_mode": "sum", + "evt_field": "energy" + }, + "is_all_physical":{ + "aggregation_mode": "all", + "evt_field": "is_physical" + }, } + } } wo_mode writing mode. - - ``write_safe`` or ``w``: only proceed with writing if the file does not already exists. + + - ``write_safe`` or ``w``: only proceed with writing if the file does + not already exists. - ``append`` or ``a``: append to file. - ``overwrite`` or ``o``: replaces existing file. + group - lh5 root group name of the evt tier + LH5 root group name of the evt tier. skim_format - data format of the skimmed output (hdf or parquet) + data format of the skimmed output (``hdf`` or ``parquet``). """ - log = logging.getLogger(__name__) log.info("Starting skimming") log.debug(f"I am skimning {len(f_evt) if isinstance(f_evt,list) else 1} files") From b4a422b5506cd286fc7642d762bbf9f71211798e Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 11 Jan 2024 16:59:05 +0100 Subject: [PATCH 122/191] Type hints cosmetics --- src/pygama/evt/build_evt.py | 89 +++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 44 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index d36255be8..b42a88ef5 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -15,6 +15,7 @@ import numpy as np from lgdo import Array, VectorOfVectors, lh5 from lgdo.lh5 import LH5Store +from numpy.typing import NDArray log = logging.getLogger(__name__) @@ -49,7 +50,7 @@ def evaluate_expression( nrows: int, para: dict = None, qry: str = None, - defv=np.nan, + defv: bool | int | float = np.nan, sorter: str = None, ) -> dict: """Evaluates the expression defined by the user across all channels @@ -305,7 +306,7 @@ def find_parameters( f_hit: str, f_dsp: str, ch: str, - idx_ch: np.ndarray, + idx_ch: NDArray, exprl: list, ) -> dict: """Wraps :func:`load_vars_to_nda` to return parameters from `hit` and `dsp` @@ -332,7 +333,7 @@ def find_parameters( return dsp_dic | var -def load_vars_to_nda(f: str, group: str, exprl: list, idx: np.ndarray = None) -> dict: +def load_vars_to_nda(f: str, group: str, exprl: list, idx: NDArray = None) -> dict: """Maps parameter expressions to parameters if found in `f`. Blows up :class:`.VectorOfVectors` to :class:`.ArrayOfEqualSizedArrays`. @@ -386,7 +387,7 @@ def load_vars_to_nda(f: str, group: str, exprl: list, idx: np.ndarray = None) -> def get_data_at_channel( ch: str, - idx_ch: np.ndarray, + idx_ch: NDArray, expr: str, exprl: list, var_ph: dict, @@ -451,10 +452,10 @@ def get_data_at_channel( def get_mask_from_query( - qry: str | np.ndarray, + qry: str | NDArray, length: int, ch: str, - idx_ch: np.ndarray, + idx_ch: NDArray, f_hit: str, f_dsp: str, ) -> np.ndarray: @@ -497,19 +498,19 @@ def get_mask_from_query( def evaluate_to_first( - idx: np.ndarray, - ids: np.ndarray, + idx: NDArray, + ids: NDArray, f_hit: str, f_dsp: str, chns: list, chns_rm: list, expr: str, exprl: list, - qry: str | np.ndarray, + qry: str | NDArray, nrows: int, sorter: tuple, var_ph: dict = None, - defv=np.nan, + defv: bool | int | float = np.nan, ) -> dict: """Aggregates across channels by returning the expression of the channel with smallest value of `sorter`. @@ -591,19 +592,19 @@ def evaluate_to_first( def evaluate_to_last( - idx: np.ndarray, - ids: np.ndarray, + idx: NDArray, + ids: NDArray, f_hit: str, f_dsp: str, chns: list, chns_rm: list, expr: str, exprl: list, - qry: str | np.ndarray, + qry: str | NDArray, nrows: int, sorter: tuple, var_ph: dict = None, - defv=np.nan, + defv: bool | int | float = np.nan, ) -> dict: """Aggregates across channels by returning the expression of the channel with largest value of `sorter`. @@ -682,18 +683,18 @@ def evaluate_to_last( def evaluate_to_tot( - idx: np.ndarray, - ids: np.ndarray, + idx: NDArray, + ids: NDArray, f_hit: str, f_dsp: str, chns: list, chns_rm: list, expr: str, exprl: list, - qry: str | np.ndarray, + qry: str | NDArray, nrows: int, var_ph: dict = None, - defv=np.nan, + defv: bool | int | float = np.nan, ) -> dict: """Aggregates by summation across channels. @@ -758,18 +759,18 @@ def evaluate_to_tot( def evaluate_to_any( - idx: np.ndarray, - ids: np.ndarray, + idx: NDArray, + ids: NDArray, f_hit: str, f_dsp: str, chns: list, chns_rm: list, expr: str, exprl: list, - qry: str | np.ndarray, + qry: str | NDArray, nrows: int, var_ph: dict = None, - defv=np.nan, + defv: bool | int | float = np.nan, ) -> dict: """Aggregates by logical or operation across channels. If the expression evaluates to a non boolean value it is casted to boolean. @@ -835,18 +836,18 @@ def evaluate_to_any( def evaluate_to_all( - idx: np.ndarray, - ids: np.ndarray, + idx: NDArray, + ids: NDArray, f_hit: str, f_dsp: str, chns: list, chns_rm: list, expr: str, exprl: list, - qry: str | np.ndarray, + qry: str | NDArray, nrows: int, var_ph: dict = None, - defv=np.nan, + defv: bool | int | float = np.nan, ) -> dict: """Aggregates by logical and operation across channels. If the expression evaluates to a non boolean value it is casted to boolean. @@ -912,8 +913,8 @@ def evaluate_to_all( def evaluate_at_channel( - idx: np.ndarray, - ids: np.ndarray, + idx: NDArray, + ids: NDArray, f_hit: str, f_dsp: str, chns_rm: list, @@ -921,7 +922,7 @@ def evaluate_at_channel( exprl: list, ch_comp: Array, var_ph: dict = None, - defv=np.nan, + defv: bool | int | float = np.nan, ) -> dict: """Aggregates by evaluating the expression at a given channel. @@ -977,8 +978,8 @@ def evaluate_at_channel( def evaluate_at_channel_vov( - idx: np.ndarray, - ids: np.ndarray, + idx: NDArray, + ids: NDArray, f_hit: str, f_dsp: str, expr: str, @@ -986,7 +987,7 @@ def evaluate_at_channel_vov( ch_comp: VectorOfVectors, chns_rm: list, var_ph: dict = None, - defv=np.nan, + defv: bool | int | float = np.nan, ) -> dict: """Same as :func:`evaluate_at_channel` but evaluates expression at non flat channels :class:`.VectorOfVectors`. @@ -1052,18 +1053,18 @@ def evaluate_at_channel_vov( def evaluate_to_aoesa( - idx: np.ndarray, - ids: np.ndarray, + idx: NDArray, + ids: NDArray, f_hit: str, f_dsp: str, chns: list, chns_rm: list, expr: str, exprl: list, - qry: str | np.ndarray, + qry: str | NDArray, nrows: int, var_ph: dict = None, - defv=np.nan, + defv: bool | int | float = np.nan, missv=np.nan, ) -> np.ndarray: """Aggregates by returning an :class:`.ArrayOfEqualSizedArrays` of evaluated @@ -1135,18 +1136,18 @@ def evaluate_to_aoesa( def evaluate_to_vector( - idx: np.ndarray, - ids: np.ndarray, + idx: NDArray, + ids: NDArray, f_hit: str, f_dsp: str, chns: list, chns_rm: list, expr: str, exprl: list, - qry: str | np.ndarray, + qry: str | NDArray, nrows: int, var_ph: dict = None, - defv=np.nan, + defv: bool | int | float = np.nan, sorter: str = None, ) -> dict: """Aggregates by returning a :class:`.VectorOfVector` of evaluated @@ -1287,13 +1288,13 @@ def build_evt( "energy_id":{ "channels": "geds_on", "aggregation_mode": "gather", - "query": "hit.cuspEmax_ctc_cal>25", + "query": "hit.cuspEmax_ctc_cal > 25", "expression": "tcm.array_id", "sort": "ascend_by:dsp.tp_0_est" }, "energy":{ "aggregation_mode": "keep_at:evt.energy_id", - "expression": "hit.cuspEmax_ctc_cal>25" + "expression": "hit.cuspEmax_ctc_cal > 25" } "is_muon_rejected":{ "channels": "muon", @@ -1303,7 +1304,7 @@ def build_evt( "initial": false }, "multiplicity":{ - "channels": ["geds_on","geds_no_psd","geds_ac"], + "channels": ["geds_on", "geds_no_psd", "geds_ac"], "aggregation_mode": "sum", "expression": "hit.cuspEmax_ctc_cal > a", "parameters": {"a":25}, @@ -1316,7 +1317,7 @@ def build_evt( "lar_energy":{ "channels": "spms_on", "aggregation_mode": "function", - "expression": ".modules.spm.get_energy(0.5,evt.t0,48000,1000,5000)" + "expression": ".modules.spm.get_energy(0.5, evt.t0, 48000, 1000, 5000)" }, } } From c4184f8ebe39d9771e4ddd1383f452856e7ee460 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 11 Jan 2024 17:22:18 +0100 Subject: [PATCH 123/191] Replace another deprecated call to load_nda --- src/pygama/evt/build_evt.py | 10 +--------- src/pygama/skm/build_skm.py | 4 +--- tests/evt/test_build_evt.py | 2 +- 3 files changed, 3 insertions(+), 13 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index b42a88ef5..37e360bb0 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -1378,13 +1378,7 @@ def build_evt( elif isinstance(v, list): chns[k] = [e for e in v] - nrows = len( - lh5.load_nda(f_tcm, ["cumulative_length"], tcm_group)["cumulative_length"] - ) - # nrows = store.read_n_rows(f"{tcm_group}/cumulative_length", f_tcm) - log.info( - f"Applying {len(tbl_cfg['operations'].keys())} operations to key {f_tcm.split('-')[-2]}" - ) + nrows = store.read_n_rows(f"{tcm_group}/cumulative_length", f_tcm) # Define temporary file f_evt_tmp = f"{os.path.dirname(f_evt)}/{os.path.basename(f_evt).split('.')[0]}_tmp{random.randrange(9999):04d}.lh5" @@ -1499,8 +1493,6 @@ def build_evt( os.remove(f_evt_tmp) - log.info("Done") - def skim_evt( f_evt: str, diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index 8bad1050e..98d02a033 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -98,8 +98,8 @@ def build_skm( data format of the skimmed output (``hdf`` or ``parquet``). """ log = logging.getLogger(__name__) - log.info("Starting skimming") log.debug(f"I am skimning {len(f_evt) if isinstance(f_evt,list) else 1} files") + tbl_cfg = skm_conf if not isinstance(tbl_cfg, (str, dict)): raise TypeError() @@ -276,5 +276,3 @@ def build_skm( df.to_parquet(f_skm, append=True) else: raise ValueError(f"wo_mode {wo_mode} not valid.") - - log.info("done") diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 94e4dfb58..e3e378ab6 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -142,7 +142,7 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): f_hit = lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")) f_config = f"{config_dir}/basic-evt-config.json" - with pytest.raises(RuntimeError): + with pytest.raises(KeyError): build_evt(f_dsp, f_tcm, f_hit, outfile, f_config) with pytest.raises(KeyError): From ad906ffc8d0799863a567e3abebd2bbee709cb65 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 11 Jan 2024 17:30:56 +0100 Subject: [PATCH 124/191] Rename evt.modules.legend.legend_meta to evt.modules.legend.metadata --- src/pygama/evt/modules/legend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pygama/evt/modules/legend.py b/src/pygama/evt/modules/legend.py index f12b81e6e..f2f8137ef 100644 --- a/src/pygama/evt/modules/legend.py +++ b/src/pygama/evt/modules/legend.py @@ -4,7 +4,7 @@ from importlib import import_module -def legend_meta(params: dict) -> list: +def metadata(params: dict) -> list: # only import legend meta data when needed. # LEGEND collaborators can use the meta keyword # While for users w/o access to the LEGEND meta data this is still working From 7fd3a93bca8b760c2a4885633b5ef7e559dc4eb5 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Mon, 15 Jan 2024 18:06:55 +0100 Subject: [PATCH 125/191] keep evt tier in memory instead of io after each operation --- src/pygama/evt/build_evt.py | 90 ++++++++++--------------------------- 1 file changed, 24 insertions(+), 66 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 37e360bb0..1f845cb5b 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -8,12 +8,11 @@ import json import logging import os -import random import re from importlib import import_module import numpy as np -from lgdo import Array, VectorOfVectors, lh5 +from lgdo import Array, Table, VectorOfVectors, lh5 from lgdo.lh5 import LH5Store from numpy.typing import NDArray @@ -40,7 +39,6 @@ def num_and_pars(value: str, par_dic: dict): def evaluate_expression( f_tcm: str, - f_evt: str, f_hit: str, f_dsp: str, chns: list, @@ -48,6 +46,7 @@ def evaluate_expression( mode: str, expr: str, nrows: int, + table: Table = None, para: dict = None, qry: str = None, defv: bool | int | float = np.nan, @@ -60,8 +59,6 @@ def evaluate_expression( ---------- f_tcm path to `tcm` tier file. - f_evt - path to `evt` tier file. f_hit path to `hit` tier file. f_dsp @@ -98,6 +95,8 @@ def evaluate_expression( matters), or from the ``parameters`` field can be used. nrows number of rows to be processed. + table + table of 'evt' tier data. para dictionary of parameters defined in the ``parameters`` field in the configuration dictionary. @@ -113,8 +112,8 @@ def evaluate_expression( # find parameters in evt file or in parameters exprl = re.findall(r"(evt|hit|dsp).([a-zA-Z_$][\w$]*)", expr) var_ph = {} - if os.path.exists(f_evt): - var_ph = load_vars_to_nda(f_evt, "", exprl) + if table: + var_ph = var_ph | table if para: var_ph = var_ph | para @@ -122,9 +121,7 @@ def evaluate_expression( # evaluate expression func, params = expr.split("(") params = ( - params.replace("dsp.", "dsp_") - .replace("hit.", "hit_") - .replace("evt.", "evt_") + params.replace("dsp.", "dsp_").replace("hit.", "hit_").replace("evt.", "") ) params = [f_hit, f_dsp, f_tcm, [x for x in chns if x not in chns_rm]] + [ num_and_pars(e, var_ph) for e in params[:-1].split(",") @@ -144,11 +141,8 @@ def evaluate_expression( raise ValueError("Query can't be a mix of evt tier and lower tiers.") # if it is an evt query we can evaluate it directly here - if os.path.exists(f_evt) and "evt." in qry: - var_qry = load_vars_to_nda( - f_evt, "", re.findall(r"(evt).([a-zA-Z_$][\w$]*)", qry) - ) - qry_mask = eval(qry.replace("evt.", "evt_"), var_qry) + if table and "evt." in qry: + qry_mask = eval(qry.replace("evt.", ""), table) # load TCM data to define an event ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") @@ -156,13 +150,12 @@ def evaluate_expression( # switch through modes if ( - os.path.exists(f_evt) + table and "keep_at:" == mode[:8] and "evt." == mode[8:][:4] - and mode[8:].split(".")[-1] - in [e.split("/")[-1] for e in lh5.ls(f_evt, "/evt/")] + and mode[8:].split(".")[-1] in table.keys() ): - ch_comp, _ = store.read(mode[8:].replace(".", "/"), f_evt) + ch_comp = table[mode[8:].replace("evt.", "")] if isinstance(ch_comp, Array): return evaluate_at_channel( idx, @@ -1380,44 +1373,18 @@ def build_evt( nrows = store.read_n_rows(f"{tcm_group}/cumulative_length", f_tcm) - # Define temporary file - f_evt_tmp = f"{os.path.dirname(f_evt)}/{os.path.basename(f_evt).split('.')[0]}_tmp{random.randrange(9999):04d}.lh5" + table = Table(size=nrows) for k, v in tbl_cfg["operations"].items(): log.debug("Processing field" + k) # if mode not defined in operation, it can only be an operation on the evt level. if "aggregation_mode" not in v.keys(): - exprl = re.findall(r"(evt).([a-zA-Z_$][\w$]*)", v["expression"]) var = {} - if os.path.exists(f_evt_tmp): - var = load_vars_to_nda(f_evt_tmp, "", exprl) - if "parameters" in v.keys(): var = var | v["parameters"] - res = eval(v["expression"].replace("evt.", "evt_"), var) - - # now check what dimension we have after the evaluation - if len(res.shape) == 1: - res = Array(res) - elif len(res.shape) == 2: - res = VectorOfVectors( - flattened_data=res.flatten()[~np.isnan(res.flatten())], - cumulative_length=np.cumsum( - np.count_nonzero(~np.isnan(res), axis=1) - ), - ) - else: - raise NotImplementedError( - f"Currently only 2d formats are supported, the evaluated array has the dimension {res.shape}" - ) - - store.write( - res, - group + k, - f_evt_tmp, - wo_mode=wo_mode, - ) + res = table.eval(v["expression"].replace("evt.", ""), var) + table.add_field(k, res) # Else we build the event entry else: @@ -1452,7 +1419,6 @@ def build_evt( result = evaluate_expression( f_tcm, - f_evt_tmp, f_hit, f_dsp, chns_e, @@ -1460,6 +1426,7 @@ def build_evt( v["aggregation_mode"], v["expression"], nrows, + table, pars, qry, defaultv, @@ -1469,30 +1436,21 @@ def build_evt( obj = result["values"] if isinstance(obj, np.ndarray): obj = Array(result["values"]) - store.write( - obj, - group + k, - f_evt_tmp, - wo_mode=wo_mode, - ) - # write output fields into f_evt and delete temporary file + table.add_field(k, obj) + + # write output fields into f_evt if "outputs" in tbl_cfg.keys(): if len(tbl_cfg["outputs"]) < 1: log.warning("No output fields specified, no file will be written.") - for fld in tbl_cfg["outputs"]: - obj, _ = store.read(group + fld, f_evt_tmp) - store.write( - obj, - group + fld, - f_evt, - wo_mode=wo_mode, - ) + else: + clms_to_remove = [e for e in table.keys() if e not in tbl_cfg["outputs"]] + for fld in clms_to_remove: + table.remove_field(fld, True) + store.write(obj=table, name=group, lh5_file=f_evt, wo_mode=wo_mode) else: log.warning("No output fields specified, no file will be written.") - os.remove(f_evt_tmp) - def skim_evt( f_evt: str, From 17e0fb227d866e2b4bb30f150b41d4ef2576ff7e Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Mon, 15 Jan 2024 18:36:31 +0100 Subject: [PATCH 126/191] fix deprecated syntax --- tests/evt/test_build_evt.py | 71 ++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 37 deletions(-) diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index e3e378ab6..939b1263d 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -1,14 +1,15 @@ import os from pathlib import Path -import lgdo.lh5_store as store import numpy as np import pytest -from lgdo import Array, VectorOfVectors, load_nda, ls +from lgdo import Array, VectorOfVectors, lh5 +from lgdo.lh5 import LH5Store from pygama.evt import build_evt, skim_evt config_dir = Path(__file__).parent / "configs" +store = LH5Store() def test_basics(lgnd_test_data, tmptestdir): @@ -28,10 +29,11 @@ def test_basics(lgnd_test_data, tmptestdir): ) assert os.path.exists(outfile) - assert len(ls(outfile, "/evt/")) == 10 - nda = load_nda( - outfile, ["energy", "energy_aux", "energy_sum", "multiplicity"], "/evt/" - ) + assert len(lh5.ls(outfile, "/evt/")) == 10 + nda = { + e: store.read(f"/evt/{e}", outfile)[0].view_as("np") + for e in ["energy", "energy_aux", "energy_sum", "multiplicity"] + } assert ( nda["energy"][nda["multiplicity"] == 1] == nda["energy_aux"][nda["multiplicity"] == 1] @@ -62,12 +64,11 @@ def test_lar_module(lgnd_test_data, tmptestdir): ) assert os.path.exists(outfile) - assert len(ls(outfile, "/evt/")) == 10 - nda = load_nda( - outfile, - ["lar_multiplicity", "lar_multiplicity_dplms", "t0", "lar_time_shift"], - "/evt/", - ) + assert len(lh5.ls(outfile, "/evt/")) == 10 + nda = { + e: store.read(f"/evt/{e}", outfile)[0].view_as("np") + for e in ["lar_multiplicity", "lar_multiplicity_dplms", "t0", "lar_time_shift"] + } assert np.max(nda["lar_multiplicity"]) <= 3 assert np.max(nda["lar_multiplicity_dplms"]) <= 3 assert ((nda["lar_time_shift"] + nda["t0"]) >= 0).all() @@ -89,12 +90,11 @@ def test_lar_t0_vov_module(lgnd_test_data, tmptestdir): ) assert os.path.exists(outfile) - assert len(ls(outfile, "/evt/")) == 10 - nda = load_nda( - outfile, - ["lar_multiplicity", "lar_multiplicity_dplms", "lar_time_shift"], - "/evt/", - ) + assert len(lh5.ls(outfile, "/evt/")) == 10 + nda = { + e: store.read(f"/evt/{e}", outfile)[0].view_as("np") + for e in ["lar_multiplicity", "lar_multiplicity_dplms", "lar_time_shift"] + } assert np.max(nda["lar_multiplicity"]) <= 3 assert np.max(nda["lar_multiplicity_dplms"]) <= 3 @@ -115,14 +115,13 @@ def test_vov(lgnd_test_data, tmptestdir): ) assert os.path.exists(outfile) - assert len(ls(outfile, "/evt/")) == 9 - lstore = store.LH5Store() - vov_ene, _ = lstore.read("/evt/energy", outfile) - vov_aoe, _ = lstore.read("/evt/aoe", outfile) - arr_ac, _ = lstore.read("/evt/multiplicity", outfile) - vov_aoeene, _ = lstore.read("/evt/energy_times_aoe", outfile) - vov_eneac, _ = lstore.read("/evt/energy_times_multiplicity", outfile) - arr_ac2, _ = lstore.read("/evt/multiplicity_squared", outfile) + assert len(lh5.ls(outfile, "/evt/")) == 9 + vov_ene, _ = store.read("/evt/energy", outfile) + vov_aoe, _ = store.read("/evt/aoe", outfile) + arr_ac, _ = store.read("/evt/multiplicity", outfile) + vov_aoeene, _ = store.read("/evt/energy_times_aoe", outfile) + vov_eneac, _ = store.read("/evt/energy_times_multiplicity", outfile) + arr_ac2, _ = store.read("/evt/multiplicity_squared", outfile) assert isinstance(vov_ene, VectorOfVectors) assert isinstance(vov_aoe, VectorOfVectors) assert isinstance(arr_ac, Array) @@ -191,7 +190,7 @@ def test_query(lgnd_test_data, tmptestdir): group="/evt/", tcm_group="hardware_tcm_1", ) - assert len(ls(outfile, "/evt/")) == 12 + assert len(lh5.ls(outfile, "/evt/")) == 12 def test_vector_sort(lgnd_test_data, tmptestdir): @@ -234,13 +233,12 @@ def test_vector_sort(lgnd_test_data, tmptestdir): build_evt(f_tcm, f_dsp, f_hit, outfile, conf) assert os.path.exists(outfile) - assert len(ls(outfile, "/evt/")) == 4 - lstore = store.LH5Store() - vov_t0, _ = lstore.read("/evt/t0_acend", outfile) - nda_t0 = vov_t0.to_aoesa().nda + assert len(lh5.ls(outfile, "/evt/")) == 4 + vov_t0, _ = store.read("/evt/t0_acend", outfile) + nda_t0 = vov_t0.to_aoesa().view_as("np") assert ((np.diff(nda_t0) >= 0) | (np.isnan(np.diff(nda_t0)))).all() - vov_t0, _ = lstore.read("/evt/t0_decend", outfile) - nda_t0 = vov_t0.to_aoesa().nda + vov_t0, _ = store.read("/evt/t0_decend", outfile) + nda_t0 = vov_t0.to_aoesa().view_as("np") assert ((np.diff(nda_t0) <= 0) | (np.isnan(np.diff(nda_t0)))).all() @@ -255,17 +253,16 @@ def test_skimming(lgnd_test_data, tmptestdir): f_config = f"{config_dir}/vov-test-evt-config.json" build_evt(f_tcm, f_dsp, f_hit, outfile, f_config) - lstore = store.LH5Store() - ac = lstore.read("/evt/multiplicity", outfile)[0].nda + ac = store.read("/evt/multiplicity", outfile)[0].view_as("np") ac = len(ac[ac == 3]) outfile_skm = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.lh5" skim_evt(outfile, "multiplicity == 3", None, outfile_skm, "n") - assert ac == len(lstore.read("/evt/energy", outfile_skm)[0].to_aoesa().nda) + assert ac == len(store.read("/evt/energy", outfile_skm)[0].to_aoesa().view_as("np")) skim_evt(outfile, "multiplicity == 3", None, None, "o") - assert ac == len(lstore.read("/evt/energy", outfile)[0].to_aoesa().nda) + assert ac == len(store.read("/evt/energy", outfile)[0].to_aoesa().view_as("np")) with pytest.raises(ValueError): skim_evt(outfile, "multiplicity == 3", None, None, "bla") From 67a34930642a9f4bf078bd98dd2c4d63791b513a Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Mon, 15 Jan 2024 23:59:27 +0100 Subject: [PATCH 127/191] add awkward evaluation of VoVs --- src/pygama/evt/build_evt.py | 550 ++++++++-------------------------- src/pygama/evt/modules/spm.py | 32 +- 2 files changed, 150 insertions(+), 432 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 1f845cb5b..0e30c175c 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -11,8 +11,9 @@ import re from importlib import import_module +import awkward as ak import numpy as np -from lgdo import Array, Table, VectorOfVectors, lh5 +from lgdo import Array, ArrayOfEqualSizedArrays, Table, VectorOfVectors, lh5 from lgdo.lh5 import LH5Store from numpy.typing import NDArray @@ -51,7 +52,7 @@ def evaluate_expression( qry: str = None, defv: bool | int | float = np.nan, sorter: str = None, -) -> dict: +) -> Array | ArrayOfEqualSizedArrays | VectorOfVectors: """Evaluates the expression defined by the user across all channels according to the mode. @@ -130,8 +131,7 @@ def evaluate_expression( # load function dynamically p, m = func.rsplit(".", 1) met = getattr(import_module(p, package=__package__), m) - out = met(*params) - return {"values": out} + return met(*params) else: # check if query is either on channel basis or evt basis (and not a mix) @@ -187,35 +187,13 @@ def evaluate_expression( type(ch_comp) + " not supported (only Array and VectorOfVectors are supported)" ) - - elif "first_at:" in mode: + elif "first_at:" in mode or "last_at:" in mode: sorter = tuple( re.findall( r"(evt|hit|dsp).([a-zA-Z_$][\w$]*)", mode.split("first_at:")[-1] )[0] ) - return evaluate_to_first( - idx, - ids, - f_hit, - f_dsp, - chns, - chns_rm, - expr, - exprl, - qry_mask, - nrows, - sorter, - var_ph, - defv, - ) - elif "last_at:" in mode: - sorter = tuple( - re.findall( - r"(evt|hit|dsp).([a-zA-Z_$][\w$]*)", mode.split("last_at:")[-1] - )[0] - ) - return evaluate_to_last( + return evaluate_to_first_or_last( idx, ids, f_hit, @@ -229,9 +207,11 @@ def evaluate_expression( sorter, var_ph, defv, + is_first=True if "first_at:" in mode else False, ) - elif "sum" == mode: - return evaluate_to_tot( + elif mode in ["sum", "any", "all"]: + return evaluate_to_scalar( + mode, idx, ids, f_hit, @@ -261,36 +241,6 @@ def evaluate_expression( defv, sorter, ) - elif "any" == mode: - return evaluate_to_any( - idx, - ids, - f_hit, - f_dsp, - chns, - chns_rm, - expr, - exprl, - qry_mask, - nrows, - var_ph, - defv, - ) - elif "all" == mode: - return evaluate_to_all( - idx, - ids, - f_hit, - f_dsp, - chns, - chns_rm, - expr, - exprl, - qry_mask, - nrows, - var_ph, - defv, - ) else: raise ValueError(mode + " not a valid mode") @@ -320,67 +270,29 @@ def find_parameters( """ # find fields in either dsp, hit - var = load_vars_to_nda(f_hit, ch, exprl, idx_ch) - dsp_dic = load_vars_to_nda(f_dsp, ch, exprl, idx_ch) - - return dsp_dic | var - - -def load_vars_to_nda(f: str, group: str, exprl: list, idx: NDArray = None) -> dict: - """Maps parameter expressions to parameters if found in `f`. - Blows up :class:`.VectorOfVectors` to :class:`.ArrayOfEqualSizedArrays`. - - Parameters - ---------- - f - path to a LGDO file. - group - additional group in `f`. - idx - index array of entries to be read from files. - exprl - list of parameter-tuples ``(root_group, field)`` to be found in `f`. - """ + dsp_flds = [e[1] for e in exprl if e[0] == "dsp"] + hit_flds = [e[1] for e in exprl if e[0] == "hit"] store = LH5Store() - var = { - f"{e[0]}_{e[1]}": store.read( - f"{group.replace('/','')}/{e[0]}/{e[1]}", - f, - idx=idx, - )[0] - for e in exprl - if e[1] - in [x.split("/")[-1] for x in lh5.ls(f, f"{group.replace('/','')}/{e[0]}/")] - } + hit_dict, dsp_dict = {}, {} + if len(hit_flds) > 0: + hit_ak = store.read( + f"{ch.replace('/','')}/hit/", f_hit, field_mask=hit_flds, idx=idx_ch + )[0].view_as("ak") + hit_dict = dict(zip(["hit_" + e for e in ak.fields(hit_ak)], ak.unzip(hit_ak))) + if len(dsp_flds) > 0: + dsp_ak = store.read( + f"{ch.replace('/','')}/dsp/", f_dsp, field_mask=dsp_flds, idx=idx_ch + )[0].view_as("ak") + dsp_dict = dict(zip(["dsp_" + e for e in ak.fields(dsp_ak)], ak.unzip(dsp_ak))) - # to make any operations to VoVs we have to blow it up to a table (future change to more intelligant way) - arr_keys = [] - for key, value in var.items(): - if isinstance(value, VectorOfVectors): - var[key] = value.to_aoesa().nda - elif isinstance(value, Array): - var[key] = value.nda - if var[key].ndim > 2: - raise ValueError("Dim > 2 not supported") - if var[key].ndim == 1: - arr_keys.append(key) - else: - raise ValueError(f"{type(value)} not supported") - - # now we also need to set dimensions if we have an expression - # consisting of a mix of VoV and Arrays - if len(arr_keys) > 0 and not set(arr_keys) == set(var.keys()): - for key in arr_keys: - var[key] = var[key][:, None] - - log.debug(f"Found parameters {var.keys()}") - return var + return hit_dict | dsp_dict def get_data_at_channel( ch: str, - idx_ch: NDArray, + ids: NDArray, + idx: NDArray, expr: str, exprl: list, var_ph: dict, @@ -396,8 +308,10 @@ def get_data_at_channel( ---------- ch "rawid" of channel to be evaluated. - idx_ch - array of indices to be evaluated. + idx + `tcm` index array. + ids + `tcm` id array. expr expression to be evaluated. exprl @@ -417,10 +331,15 @@ def get_data_at_channel( default value. """ + # get index list for this channel to be loaded + idx_ch = idx[ids == int(ch[2:])] + if not is_evaluated: res = np.full(outsize, defv, dtype=type(defv)) elif "tcm.array_id" == expr: res = np.full(outsize, int(ch[2:]), dtype=int) + elif "tcm.index" == expr: + res = np.where(ids == int(ch[2:]))[0] else: var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) @@ -436,10 +355,20 @@ def get_data_at_channel( var, ) - # if it is not a nparray it could be a single value - # expand accordingly - if not isinstance(res, np.ndarray): - res = np.full(outsize, res, dtype=type(res)) + # in case the expression evaluates to a single value blow it up + if (not hasattr(res, "__len__")) or (isinstance(res, str)): + return np.full(outsize, res) + + # the resulting arrays need to be 1D from the operation, + # this can only change once we support larger than two dimensional LGDOs + # ak.to_numpy() raises error if array not regular + res = ak.to_numpy(res, allow_missing=False) + + # in this method only 1D values are allowed + if res.ndim > 1: + raise ValueError( + f"expression '{expr}' must return 1D array. If you are using VectorOfVectors or ArrayOfEqualSizedArrays, use awkward reduction functions to reduce the dimension" + ) return res @@ -448,7 +377,8 @@ def get_mask_from_query( qry: str | NDArray, length: int, ch: str, - idx_ch: NDArray, + ids: NDArray, + idx: NDArray, f_hit: str, f_dsp: str, ) -> np.ndarray: @@ -462,13 +392,17 @@ def get_mask_from_query( length of the return mask. ch "rawid" of channel to be evaluated. - idx_ch - array of indices to be evaluated. + idx + `tcm` index array. + ids + `tcm` id array. f_hit path to `hit` tier file. f_dsp path to `dsp` tier file. """ + # get index list for this channel to be loaded + idx_ch = idx[ids == int(ch[2:])] # get sub evt based query condition if needed if isinstance(qry, str): @@ -476,6 +410,16 @@ def get_mask_from_query( qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) + # in case the expression evaluates to a single value blow it up + if (not hasattr(limarr, "__len__")) or (isinstance(limarr, str)): + return np.full(len(idx_ch), limarr) + + limarr = ak.to_numpy(limarr, allow_missing=False) + if limarr.ndim > 1: + raise ValueError( + f"query '{qry}' must return 1D array. If you are using VectorOfVectors or ArrayOfEqualSizedArrays, use awkward reduction functions to reduce the dimension" + ) + # or forward the array elif isinstance(qry, np.ndarray): limarr = qry @@ -484,13 +428,14 @@ def get_mask_from_query( else: limarr = np.ones(length).astype(bool) + # explicit cast to bool if limarr.dtype != bool: limarr = limarr.astype(bool) return limarr -def evaluate_to_first( +def evaluate_to_first_or_last( idx: NDArray, ids: NDArray, f_hit: str, @@ -504,9 +449,10 @@ def evaluate_to_first( sorter: tuple, var_ph: dict = None, defv: bool | int | float = np.nan, -) -> dict: + is_first: bool = True, +) -> Array: """Aggregates across channels by returning the expression of the channel - with smallest value of `sorter`. + with value of `sorter`. Parameters ---------- @@ -536,11 +482,12 @@ def evaluate_to_first( dictionary of `evt` and additional parameters and their values. defv default value. + is_first + defines if sorted by smallest or largest value of `sorter` """ # define dimension of output array out = np.full(nrows, defv, dtype=type(defv)) - out_chs = np.zeros(len(out), dtype=int) outt = np.zeros(len(out)) store = LH5Store() @@ -552,7 +499,8 @@ def evaluate_to_first( # evaluate at channel res = get_data_at_channel( ch, - idx_ch, + ids, + idx, expr, exprl, var_ph, @@ -564,11 +512,7 @@ def evaluate_to_first( ) # get mask from query - limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) - - # append to out according to mode == first - if ch == chns[0]: - outt[:] = np.inf + limarr = get_mask_from_query(qry, len(res), ch, ids, idx, f_hit, f_dsp) # find if sorter is in hit or dsp t0 = store.read( @@ -577,105 +521,25 @@ def evaluate_to_first( idx=idx_ch, )[0].view_as("np") - out[idx_ch] = np.where((t0 < outt) & (limarr), res, out[idx_ch]) - out_chs[idx_ch] = np.where((t0 < outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) - outt[idx_ch] = np.where((t0 < outt) & (limarr), t0, outt[idx_ch]) - - return {"values": out, "channels": out_chs} - - -def evaluate_to_last( - idx: NDArray, - ids: NDArray, - f_hit: str, - f_dsp: str, - chns: list, - chns_rm: list, - expr: str, - exprl: list, - qry: str | NDArray, - nrows: int, - sorter: tuple, - var_ph: dict = None, - defv: bool | int | float = np.nan, -) -> dict: - """Aggregates across channels by returning the expression of the channel - with largest value of `sorter`. - - Parameters - ---------- - idx - `tcm` index array. - ids - `tcm` id array. - f_hit - path to `hit` tier file. - f_dsp - path to `dsp` tier file. - chns - list of channels to be aggregated. - chns_rm - list of channels to be skipped from evaluation and set to default value. - expr - expression string to be evaluated. - exprl - list of dsp/hit/evt parameter tuples in expression ``(tier, field)``. - qry - query expression to mask aggregation. - nrows - length of output array. - sorter - tuple of field in `hit/dsp/evt` tier to evaluate ``(tier, field)``. - var_ph - dictionary of `evt` and additional parameters and their values. - defv - default value. - """ - - # define dimension of output array - out = np.full(nrows, defv, dtype=type(defv)) - out_chs = np.zeros(len(out), dtype=int) - outt = np.zeros(len(out)) - - store = LH5Store() - - for ch in chns: - # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] - - # evaluate at channel - res = get_data_at_channel( - ch, - idx_ch, - expr, - exprl, - var_ph, - ch not in chns_rm, - f_hit, - f_dsp, - len(out), - defv, - ) + if t0.ndim > 1: + raise ValueError(f"sorter '{sorter[0]}/{sorter[1]}' must be a 1D array") - # get mask from query - limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) + if is_first: + if ch == chns[0]: + outt[:] = np.inf - # append to out according to mode == last - # find if sorter is in hit or dsp - t0 = store.read( - f"{ch}/{sorter[0]}/{sorter[1]}", - f_hit if "hit" == sorter[0] else f_dsp, - idx=idx_ch, - )[0].view_as("np") + out[idx_ch] = np.where((t0 < outt) & (limarr), res, out[idx_ch]) + outt[idx_ch] = np.where((t0 < outt) & (limarr), t0, outt[idx_ch]) - out[idx_ch] = np.where((t0 > outt) & (limarr), res, out[idx_ch]) - out_chs[idx_ch] = np.where((t0 > outt) & (limarr), int(ch[2:]), out_chs[idx_ch]) - outt[idx_ch] = np.where((t0 > outt) & (limarr), t0, outt[idx_ch]) + else: + out[idx_ch] = np.where((t0 > outt) & (limarr), res, out[idx_ch]) + outt[idx_ch] = np.where((t0 > outt) & (limarr), t0, outt[idx_ch]) - return {"values": out, "channels": out_chs} + return Array(nda=out) -def evaluate_to_tot( +def evaluate_to_scalar( + mode: str, idx: NDArray, ids: NDArray, f_hit: str, @@ -688,11 +552,13 @@ def evaluate_to_tot( nrows: int, var_ph: dict = None, defv: bool | int | float = np.nan, -) -> dict: +) -> Array: """Aggregates by summation across channels. Parameters ---------- + mode + aggregation mode. idx tcm index array. ids @@ -728,161 +594,8 @@ def evaluate_to_tot( res = get_data_at_channel( ch, - idx_ch, - expr, - exprl, - var_ph, - ch not in chns_rm, - f_hit, - f_dsp, - len(out), - defv, - ) - - # get mask from query - limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) - - # append to out according to mode == tot - if res.dtype == bool: - res = res.astype(int) - - out[idx_ch] = np.where(limarr, res + out[idx_ch], out[idx_ch]) - - return {"values": out} - - -def evaluate_to_any( - idx: NDArray, - ids: NDArray, - f_hit: str, - f_dsp: str, - chns: list, - chns_rm: list, - expr: str, - exprl: list, - qry: str | NDArray, - nrows: int, - var_ph: dict = None, - defv: bool | int | float = np.nan, -) -> dict: - """Aggregates by logical or operation across channels. If the expression - evaluates to a non boolean value it is casted to boolean. - - Parameters - ---------- - idx - `tcm` index array. - ids - `tcm` id array. - f_hit - path to `hit` tier file. - f_dsp - path to `dsp` tier file. - chns - list of channels to be aggregated. - chns_rm - list of channels to be skipped from evaluation and set to default value. - expr - expression string to be evaluated. - exprl - list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. - qry - query expression to mask aggregation. - nrows - length of output array. - var_ph - dictionary of `evt` and additional parameters and their values. - defv - default value. - """ - - # define dimension of output array - out = np.full(nrows, defv, dtype=type(defv)) - - for ch in chns: - # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] - - res = get_data_at_channel( - ch, - idx_ch, - expr, - exprl, - var_ph, - ch not in chns_rm, - f_hit, - f_dsp, - len(out), - defv, - ) - - # get mask from query - limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) - - # append to out according to mode == any - if res.dtype != bool: - res = res.astype(bool) - - out[idx_ch] = out[idx_ch] | (res & limarr) - - return {"values": out} - - -def evaluate_to_all( - idx: NDArray, - ids: NDArray, - f_hit: str, - f_dsp: str, - chns: list, - chns_rm: list, - expr: str, - exprl: list, - qry: str | NDArray, - nrows: int, - var_ph: dict = None, - defv: bool | int | float = np.nan, -) -> dict: - """Aggregates by logical and operation across channels. If the expression - evaluates to a non boolean value it is casted to boolean. - - Parameters - ---------- - idx - `tcm` index array. - ids - `tcm` id array. - f_hit - path to `hit` tier file. - f_dsp - path to `dsp` tier file. - chns - list of channels to be aggregated. - chns_rm - list of channels to be skipped from evaluation and set to default value. - expr - expression string to be evaluated. - exprl - list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. - qry - query expression to mask aggregation. - nrows - length of output array. - var_ph - dictionary of evt and additional parameters and their values. - defv - default value. - """ - - # define dimension of output array - out = np.full(nrows, defv, dtype=type(defv)) - - for ch in chns: - # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] - - res = get_data_at_channel( - ch, - idx_ch, + ids, + idx, expr, exprl, var_ph, @@ -894,15 +607,23 @@ def evaluate_to_all( ) # get mask from query - limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) - - # append to out according to mode == all - if res.dtype != bool: - res = res.astype(bool) - - out[idx_ch] = out[idx_ch] & res & limarr + limarr = get_mask_from_query(qry, len(res), ch, ids, idx, f_hit, f_dsp) - return {"values": out} + # switch through modes + if "sum" == mode: + if res.dtype == bool: + res = res.astype(int) + out[idx_ch] = np.where(limarr, res + out[idx_ch], out[idx_ch]) + if "any" == mode: + if res.dtype != bool: + res = res.astype(bool) + out[idx_ch] = out[idx_ch] | (res & limarr) + if "all" == mode: + if res.dtype != bool: + res = res.astype(bool) + out[idx_ch] = out[idx_ch] & res & limarr + + return Array(nda=out) def evaluate_at_channel( @@ -916,7 +637,7 @@ def evaluate_at_channel( ch_comp: Array, var_ph: dict = None, defv: bool | int | float = np.nan, -) -> dict: +) -> Array: """Aggregates by evaluating the expression at a given channel. Parameters @@ -949,12 +670,11 @@ def evaluate_at_channel( # skip default value if f"ch{ch}" not in lh5.ls(f_hit): continue - # get index list for this channel to be loaded - idx_ch = idx[ids == ch] res = get_data_at_channel( f"ch{ch}", - idx_ch, + ids, + idx, expr, exprl, var_ph, @@ -967,7 +687,7 @@ def evaluate_at_channel( out = np.where(ch == ch_comp.nda, res, out) - return {"values": out} + return Array(nda=out) def evaluate_at_channel_vov( @@ -981,7 +701,7 @@ def evaluate_at_channel_vov( chns_rm: list, var_ph: dict = None, defv: bool | int | float = np.nan, -) -> dict: +) -> VectorOfVectors: """Same as :func:`evaluate_at_channel` but evaluates expression at non flat channels :class:`.VectorOfVectors`. @@ -1010,16 +730,16 @@ def evaluate_at_channel_vov( """ # blow up vov to aoesa - out = ch_comp.to_aoesa().nda + out = ch_comp.to_aoesa().view_as("np") chns = np.unique(out[~np.isnan(out)]).astype(int) + type_name = None for ch in chns: - # get index list for this channel to be loaded - idx_ch = idx[ids == ch] res = get_data_at_channel( f"ch{ch}", - idx_ch, + ids, + idx, expr, exprl, var_ph, @@ -1042,7 +762,7 @@ def evaluate_at_channel_vov( flattened_data=out.flatten()[~np.isnan(out.flatten())].astype(type_name), cumulative_length=np.cumsum(np.count_nonzero(~np.isnan(out), axis=1)), ) - return {"values": out, "channels": ch_comp} + return out def evaluate_to_aoesa( @@ -1059,7 +779,7 @@ def evaluate_to_aoesa( var_ph: dict = None, defv: bool | int | float = np.nan, missv=np.nan, -) -> np.ndarray: +) -> ArrayOfEqualSizedArrays: """Aggregates by returning an :class:`.ArrayOfEqualSizedArrays` of evaluated expressions of channels that fulfill a query expression. @@ -1101,12 +821,10 @@ def evaluate_to_aoesa( i = 0 for ch in chns: - # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] - res = get_data_at_channel( ch, - idx_ch, + ids, + idx, expr, exprl, var_ph, @@ -1118,14 +836,14 @@ def evaluate_to_aoesa( ) # get mask from query - limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) + limarr = get_mask_from_query(qry, len(res), ch, ids, idx, f_hit, f_dsp) # append to out according to mode == vov out[:, i][limarr] = res[limarr] i += 1 - return out + return ArrayOfEqualSizedArrays(nda=out) def evaluate_to_vector( @@ -1142,7 +860,7 @@ def evaluate_to_vector( var_ph: dict = None, defv: bool | int | float = np.nan, sorter: str = None, -) -> dict: +) -> VectorOfVectors: """Aggregates by returning a :class:`.VectorOfVector` of evaluated expressions of channels that fulfill a query expression. @@ -1193,7 +911,7 @@ def evaluate_to_vector( var_ph, defv, np.nan, - ) + ).view_as("np") # if a sorter is given sort accordingly if sorter is not None: @@ -1209,7 +927,7 @@ def evaluate_to_vector( [tuple(fld.split("."))], None, nrows, - ) + ).view_as("np") if "ascend_by" == md: out[np.arange(len(out))[:, None], np.argsort(s_val)] @@ -1220,14 +938,12 @@ def evaluate_to_vector( "sorter values can only have 'ascend_by' or 'descend_by' prefixes" ) - # This can be smarter - # shorten to vov (FUTURE: replace with awkward) out = VectorOfVectors( flattened_data=out.flatten()[~np.isnan(out.flatten())], cumulative_length=np.cumsum(np.count_nonzero(~np.isnan(out), axis=1)), ) - return {"values": out} + return out def build_evt( @@ -1417,7 +1133,7 @@ def build_evt( if "sort" in v.keys(): srter = v["sort"] - result = evaluate_expression( + obj = evaluate_expression( f_tcm, f_hit, f_dsp, @@ -1433,10 +1149,6 @@ def build_evt( srter, ) - obj = result["values"] - if isinstance(obj, np.ndarray): - obj = Array(result["values"]) - table.add_field(k, obj) # write output fields into f_evt diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 0907b3a13..a96c134c9 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -17,7 +17,7 @@ # get LAr energy per event over all channels -def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): +def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: trig = trgr if isinstance(trgr, VectorOfVectors): trig = trig.to_aoesa().nda @@ -58,11 +58,11 @@ def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): pes = np.where(mask, pes, 0) chsum = np.nansum(pes, axis=1) sum[idx_ch] = sum[idx_ch] + chsum - return sum + return Array(nda=sum) # get LAr majority per event over all channels -def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): +def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: trig = trgr if isinstance(trgr, VectorOfVectors): trig = trig.to_aoesa().nda @@ -103,11 +103,13 @@ def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): chsum = np.nansum(pes, axis=1) chmaj = np.where(chsum > lim, 1, 0) maj[idx_ch] = maj[idx_ch] + chmaj - return maj + return Array(nda=maj) # get LAr energy per event over all channels -def get_energy_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): +def get_energy_dplms( + f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax +) -> Array: trig = trgr if isinstance(trgr, VectorOfVectors): trig = trig.to_aoesa().nda @@ -147,11 +149,13 @@ def get_energy_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): pes = np.where(mask, pes, 0) chsum = np.nansum(pes, axis=1) sum[idx_ch] = sum[idx_ch] + chsum - return sum + return Array(nda=sum) # get LAr majority per event over all channels -def get_majority_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): +def get_majority_dplms( + f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax +) -> Array: trig = trgr if isinstance(trgr, VectorOfVectors): trig = trig.to_aoesa().nda @@ -192,10 +196,12 @@ def get_majority_dplms(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax chsum = np.nansum(pes, axis=1) chmaj = np.where(chsum > lim, 1, 0) maj[idx_ch] = maj[idx_ch] + chmaj - return maj + return Array(nda=maj) -def get_etc(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, trail): +def get_etc( + f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, trail +) -> Array: # ignore stupid numpy warnings warnings.filterwarnings("ignore", r"All-NaN slice encountered") warnings.filterwarnings("ignore", r"invalid value encountered in true_divide") @@ -267,7 +273,7 @@ def get_etc(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, tra / np.nansum(np.where((times >= tt), pes, 0), axis=(0, 2)), np.nansum(np.where((times >= tt), pes, 0), axis=(0, 2)), ) - return outi + return Array(nda=outi) else: outi = np.where( @@ -281,10 +287,10 @@ def get_etc(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, tra / np.nansum(np.where((times >= tge[:, None]), pes, 0), axis=(0, 2)), np.nansum(pes, axis=(0, 2)), ) - return outi + return Array(nda=outi) -def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): +def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: store = LH5Store() energy_in_pe, _ = store.read( f"{chs[0]}/hit/energy_in_pe", @@ -334,4 +340,4 @@ def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax): t1d = np.nanmin(times, axis=(0, 2)) - return t1d - tge + return Array(t1d - tge) From 333e90227b2f039c47b20e69acf42b65c2e9ce63 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 16 Jan 2024 11:40:42 +0100 Subject: [PATCH 128/191] allow for tcm index aggeregation and LAr pulse index --- src/pygama/evt/modules/spm.py | 216 ++++++++---------- .../module-test-t0-vov-evt-config.json | 14 +- tests/evt/test_build_evt.py | 8 +- 3 files changed, 110 insertions(+), 128 deletions(-) diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index a96c134c9..deb2f6ca8 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -11,18 +11,19 @@ import warnings +import awkward as ak import numpy as np from lgdo import Array, VectorOfVectors from lgdo.lh5 import LH5Store -# get LAr energy per event over all channels -def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: +# get SiPM coincidence window mask +def get_spm_mask(lim, trgr, tdefault, tmin, tmax, pe, times) -> np.ndarray: trig = trgr if isinstance(trgr, VectorOfVectors): - trig = trig.to_aoesa().nda + trig = trig.to_aoesa().view_as("np") elif isinstance(trgr, Array): - trig = trig.nda + trig = trig.view_as("np") if isinstance(trig, np.ndarray) and trig.ndim == 2: trig = np.where(np.isnan(trig).all(axis=1)[:, None], tdefault, trig) trig = np.nanmin(trig, axis=1) @@ -31,14 +32,70 @@ def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Arr trig = np.where(np.isnan(trig), tdefault, trig) else: raise ValueError(f"Can't deal with t0 of type {type(trgr)}") + tmi = trig - tmin tma = trig + tmax - sum = np.zeros(len(trig)) + + mask = (times < tma[:, None] / 16) & (times > tmi[:, None] / 16) & (pe > lim) + return mask, trig + + +# get LAr indices according to mask per event over all channels +def get_masked_tcm_idx( + f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, get_pls_idx=False +) -> VectorOfVectors: # load TCM data to define an event store = LH5Store() ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") + arr_lst = [] + for ch in chs: + idx_ch = idx[ids == int(ch[2:])] + energy_in_pe = store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[ + 0 + ].view_as("np") + trigger_pos = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as( + "np" + ) + mask, _ = get_spm_mask( + lim, trgr, tdefault, tmin, tmax, energy_in_pe, trigger_pos + ) + + if get_pls_idx: + out_idx = np.repeat( + np.arange(len(mask[0]))[:, None], repeats=len(mask), axis=1 + ).T + out_idx = np.where(mask, out_idx, np.nan) + out_idx = VectorOfVectors( + flattened_data=out_idx.flatten()[~np.isnan(out_idx.flatten())], + cumulative_length=np.cumsum( + np.count_nonzero(~np.isnan(out_idx), axis=1) + ), + dtype=int, + ).view_as("ak") + else: + out_idx = np.where(mask, np.where(ids == int(ch[2:]))[0][:, None], np.nan) + out_idx = VectorOfVectors( + flattened_data=out_idx.flatten()[~np.isnan(out_idx.flatten())], + cumulative_length=np.cumsum( + np.count_nonzero(~np.isnan(out_idx), axis=1) + ), + dtype=int, + ).view_as("ak") + + arr_lst.append(out_idx) + + return VectorOfVectors(array=ak.concatenate(arr_lst, axis=-1)) + + +# get LAr energy per event over all channels +def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: + # load TCM data to define an event + store = LH5Store() + ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") + idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") + sum = np.zeros(np.max(idx) + 1) for ch in chs: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] @@ -48,10 +105,8 @@ def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Arr trigger_pos = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as( "np" ) - mask = ( - (trigger_pos < tma[:, None] / 16) - & (trigger_pos > tmi[:, None] / 16) - & (energy_in_pe > lim) + mask, _ = get_spm_mask( + lim, trgr, tdefault, tmin, tmax, energy_in_pe, trigger_pos ) pes = energy_in_pe pes = np.where(np.isnan(pes), 0, pes) @@ -63,26 +118,11 @@ def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Arr # get LAr majority per event over all channels def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: - trig = trgr - if isinstance(trgr, VectorOfVectors): - trig = trig.to_aoesa().nda - elif isinstance(trgr, Array): - trig = trig.nda - if isinstance(trig, np.ndarray) and trig.ndim == 2: - trig = np.where(np.isnan(trig).all(axis=1)[:, None], tdefault, trig) - trig = np.nanmin(trig, axis=1) - - elif isinstance(trig, np.ndarray) and trig.ndim == 1: - trig = np.where(np.isnan(trig), tdefault, trig) - else: - raise ValueError(f"Can't deal with t0 of type {type(trgr)}") - tmi = trig - tmin - tma = trig + tmax - maj = np.zeros(len(trig)) # load TCM data to define an event store = LH5Store() ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") + maj = np.zeros(np.max(idx) + 1) for ch in chs: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] @@ -92,11 +132,10 @@ def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> A trigger_pos = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as( "np" ) - mask = ( - (trigger_pos < tma[:, None] / 16) - & (trigger_pos > tmi[:, None] / 16) - & (energy_in_pe > lim) + mask, _ = get_spm_mask( + lim, trgr, tdefault, tmin, tmax, energy_in_pe, trigger_pos ) + pes = energy_in_pe pes = np.where(np.isnan(pes), 0, pes) pes = np.where(mask, pes, 0) @@ -110,26 +149,11 @@ def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> A def get_energy_dplms( f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax ) -> Array: - trig = trgr - if isinstance(trgr, VectorOfVectors): - trig = trig.to_aoesa().nda - elif isinstance(trgr, Array): - trig = trig.nda - if isinstance(trig, np.ndarray) and trig.ndim == 2: - trig = np.where(np.isnan(trig).all(axis=1)[:, None], tdefault, trig) - trig = np.nanmin(trig, axis=1) - - elif isinstance(trig, np.ndarray) and trig.ndim == 1: - trig = np.where(np.isnan(trig), tdefault, trig) - else: - raise ValueError(f"Can't deal with t0 of type {type(trgr)}") - tmi = trig - tmin - tma = trig + tmax - sum = np.zeros(len(trig)) # load TCM data to define an event store = LH5Store() ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") + sum = np.zeros(np.max(idx) + 1) for ch in chs: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] @@ -139,10 +163,8 @@ def get_energy_dplms( trigger_pos_dplms = store.read( f"{ch}/hit/trigger_pos_dplms", f_hit, idx=idx_ch )[0].view_as("np") - mask = ( - (trigger_pos_dplms < tma[:, None] / 16) - & (trigger_pos_dplms > tmi[:, None] / 16) - & (energy_in_pe_dplms > lim) + mask, _ = get_spm_mask( + lim, trgr, tdefault, tmin, tmax, energy_in_pe_dplms, trigger_pos_dplms ) pes = energy_in_pe_dplms pes = np.where(np.isnan(pes), 0, pes) @@ -156,26 +178,11 @@ def get_energy_dplms( def get_majority_dplms( f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax ) -> Array: - trig = trgr - if isinstance(trgr, VectorOfVectors): - trig = trig.to_aoesa().nda - elif isinstance(trgr, Array): - trig = trig.nda - if isinstance(trig, np.ndarray) and trig.ndim == 2: - trig = np.where(np.isnan(trig).all(axis=1)[:, None], tdefault, trig) - trig = np.nanmin(trig, axis=1) - - elif isinstance(trig, np.ndarray) and trig.ndim == 1: - trig = np.where(np.isnan(trig), tdefault, trig) - else: - raise ValueError(f"Can't deal with t0 of type {type(trgr)}") - tmi = trig - tmin - tma = trig + tmax - maj = np.zeros(len(trig)) # load TCM data to define an event store = LH5Store() ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") + maj = np.zeros(np.max(idx) + 1) for ch in chs: # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] @@ -185,10 +192,8 @@ def get_majority_dplms( trigger_pos_dplms = store.read( f"{ch}/hit/trigger_pos_dplms", f_hit, idx=idx_ch )[0].view_as("np") - mask = ( - (trigger_pos_dplms < tma[:, None] / 16) - & (trigger_pos_dplms > tmi[:, None] / 16) - & (energy_in_pe_dplms > lim) + mask, _ = get_spm_mask( + lim, trgr, tdefault, tmin, tmax, energy_in_pe_dplms, trigger_pos_dplms ) pes = energy_in_pe_dplms pes = np.where(np.isnan(pes), 0, pes) @@ -215,23 +220,6 @@ def get_etc( pes = np.zeros([len(chs), peshape[0], peshape[1]]) times = np.zeros([len(chs), peshape[0], peshape[1]]) - tge = trgr - if isinstance(trgr, VectorOfVectors): - tge = tge.to_aoesa().nda - elif isinstance(trgr, Array): - tge = tge.nda - if isinstance(tge, np.ndarray) and tge.ndim == 2: - tge = np.where(np.isnan(tge).all(axis=1)[:, None], tdefault, tge) - tge = np.nanmin(tge, axis=1) - - elif isinstance(tge, np.ndarray) and tge.ndim == 1: - tge = np.where(np.isnan(tge), tdefault, tge) - else: - raise ValueError(f"Can't deal with t0 of type {type(trgr)}") - - tmi = tge - tmin - tma = tge + tmax - # load TCM data to define an event store = LH5Store() ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") @@ -245,10 +233,8 @@ def get_etc( trigger_pos = store.read(f"{chs[i]}/hit/trigger_pos", f_hit, idx=idx_ch)[ 0 ].view_as("np") - mask = ( - (trigger_pos < tma[:, None] / 16) - & (trigger_pos > tmi[:, None] / 16) - & (energy_in_pe > lim) + mask, tge = get_spm_mask( + lim, trgr, tdefault, tmin, tmax, energy_in_pe, trigger_pos ) pe = energy_in_pe time = trigger_pos * 16 @@ -292,52 +278,30 @@ def get_etc( def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: store = LH5Store() - energy_in_pe, _ = store.read( - f"{chs[0]}/hit/energy_in_pe", - f_hit, - ) - peshape = energy_in_pe.view_as("np").shape - times = np.zeros([len(chs), peshape[0], peshape[1]]) - - tge = trgr - if isinstance(trgr, VectorOfVectors): - tge = tge.to_aoesa().nda - elif isinstance(trgr, Array): - tge = tge.nda - if isinstance(tge, np.ndarray) and tge.ndim == 2: - tge = np.where(np.isnan(tge).all(axis=1)[:, None], tdefault, tge) - tge = np.nanmin(tge, axis=1) - - elif isinstance(tge, np.ndarray) and tge.ndim == 1: - tge = np.where(np.isnan(tge), tdefault, tge) - else: - raise ValueError(f"Can't deal with t0 of type {type(trgr)}") - - tmi = tge - tmin - tma = tge + tmax - # load TCM data to define an event ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") + spm_tmin = np.full(np.max(idx), np.inf) for i in range(len(chs)): # get index list for this channel to be loaded idx_ch = idx[ids == int(chs[i][2:])] energy_in_pe = store.read(f"{chs[i]}/hit/energy_in_pe", f_hit, idx=idx_ch)[ 0 - ].view_as("np") + ].view_as("ak") trigger_pos = store.read(f"{chs[i]}/hit/trigger_pos", f_hit, idx=idx_ch)[ 0 - ].view_as("np") - mask = ( - (trigger_pos < tma[:, None] / 16) - & (trigger_pos > tmi[:, None] / 16) - & (energy_in_pe > lim) + ].view_as("ak") + mask, tge = get_spm_mask( + lim, trgr, tdefault, tmin, tmax, energy_in_pe, trigger_pos ) time = trigger_pos * 16 - time = np.where(mask, time, np.nan) - times[i][idx_ch] = time - - t1d = np.nanmin(times, axis=(0, 2)) - - return Array(t1d - tge) + time = ak.min(ak.nan_to_none(time[mask]), axis=-1) + if not time: + return Array(nda=np.zeros(len(spm_tmin))) + time = ak.fill_none(time, tdefault) + if not time: + time = ak.to_numpy(time, allow_missing=False) + spm_tmin = np.where(time < spm_tmin, time, spm_tmin) + + return Array(spm_tmin - tge) diff --git a/tests/evt/configs/module-test-t0-vov-evt-config.json b/tests/evt/configs/module-test-t0-vov-evt-config.json index f1bf09a8e..ff16ea628 100644 --- a/tests/evt/configs/module-test-t0-vov-evt-config.json +++ b/tests/evt/configs/module-test-t0-vov-evt-config.json @@ -13,7 +13,9 @@ "lar_classifier", "lar_energy_dplms", "lar_multiplicity_dplms", - "lar_time_shift" + "lar_time_shift", + "lar_tcm_index", + "lar_pulse_index" ], "operations": { "energy": { @@ -65,6 +67,16 @@ "channels": "spms_on", "aggregation_mode": "function", "expression": ".modules.spm.get_time_shift(0.5,evt.t0,48000,1000,5000)" + }, + "lar_tcm_index": { + "channels": "spms_on", + "aggregation_mode": "function", + "expression": ".modules.spm.get_masked_tcm_idx(0.5,evt.t0,48000,1000,5000,False)" + }, + "lar_pulse_index": { + "channels": "spms_on", + "aggregation_mode": "function", + "expression": ".modules.spm.get_masked_tcm_idx(0.5,evt.t0,48000,1000,5000,True)" } } } diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 939b1263d..cfd3b92cb 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -1,6 +1,7 @@ import os from pathlib import Path +import awkward as ak import numpy as np import pytest from lgdo import Array, VectorOfVectors, lh5 @@ -90,7 +91,7 @@ def test_lar_t0_vov_module(lgnd_test_data, tmptestdir): ) assert os.path.exists(outfile) - assert len(lh5.ls(outfile, "/evt/")) == 10 + assert len(lh5.ls(outfile, "/evt/")) == 12 nda = { e: store.read(f"/evt/{e}", outfile)[0].view_as("np") for e in ["lar_multiplicity", "lar_multiplicity_dplms", "lar_time_shift"] @@ -98,6 +99,11 @@ def test_lar_t0_vov_module(lgnd_test_data, tmptestdir): assert np.max(nda["lar_multiplicity"]) <= 3 assert np.max(nda["lar_multiplicity_dplms"]) <= 3 + ch_idx = store.read("/evt/lar_tcm_index", outfile)[0].view_as("ak") + pls_idx = store.read("/evt/lar_pulse_index", outfile)[0].view_as("ak") + assert ak.count(ch_idx) == ak.count(pls_idx) + assert ak.all(ak.count(ch_idx, axis=-1) == ak.count(pls_idx, axis=-1)) + def test_vov(lgnd_test_data, tmptestdir): outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" From bad7d461648b2d69b050279793e748a4b5a8085d Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 16 Jan 2024 17:56:49 +0100 Subject: [PATCH 129/191] small bug fixes --- src/pygama/evt/build_evt.py | 10 ++++++---- src/pygama/evt/modules/spm.py | 13 ++++++++++++- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 0e30c175c..cd64d6c57 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -114,7 +114,11 @@ def evaluate_expression( exprl = re.findall(r"(evt|hit|dsp).([a-zA-Z_$][\w$]*)", expr) var_ph = {} if table: - var_ph = var_ph | table + var_ph = var_ph | { + e: table[e].view_as("ak") + for e in table.keys() + if isinstance(table[e], (Array, ArrayOfEqualSizedArrays, VectorOfVectors)) + } if para: var_ph = var_ph | para @@ -349,9 +353,7 @@ def get_data_at_channel( # evaluate expression # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) res = eval( - expr.replace("dsp.", "dsp_") - .replace("hit.", "hit_") - .replace("evt.", "evt_"), + expr.replace("dsp.", "dsp_").replace("hit.", "hit_").replace("evt.", ""), var, ) diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index deb2f6ca8..8e2a8f3e7 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -24,6 +24,17 @@ def get_spm_mask(lim, trgr, tdefault, tmin, tmax, pe, times) -> np.ndarray: trig = trig.to_aoesa().view_as("np") elif isinstance(trgr, Array): trig = trig.view_as("np") + elif isinstance(trgr, ak.Array): + if trgr.ndim == 1: + trig = ak.to_numpy(trig) + else: + trig = ak.to_numpy( + ak.fill_none( + ak.pad_none(trig, target=ak.max(ak.count(trig, axis=-1)), axis=-1), + np.nan, + ), + allow_missing=False, + ) if isinstance(trig, np.ndarray) and trig.ndim == 2: trig = np.where(np.isnan(trig).all(axis=1)[:, None], tdefault, trig) trig = np.nanmin(trig, axis=1) @@ -281,7 +292,7 @@ def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> # load TCM data to define an event ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") - spm_tmin = np.full(np.max(idx), np.inf) + spm_tmin = np.full(np.max(idx) + 1, np.inf) for i in range(len(chs)): # get index list for this channel to be loaded idx_ch = idx[ids == int(chs[i][2:])] From 983663bfe86b9a0ec8ccc75169a8fd03288abcb2 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 16 Jan 2024 18:43:26 +0100 Subject: [PATCH 130/191] a bit of verbosity --- src/pygama/evt/build_evt.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index cd64d6c57..5fdc9310e 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -1165,6 +1165,11 @@ def build_evt( else: log.warning("No output fields specified, no file will be written.") + key = re.search(r"\d{8}T\d{6}Z", f_hit).group(0) + log.info( + f"Applied {len(tbl_cfg['operations'])} operations to key {key} and saved {len(tbl_cfg['outputs'])} evt fields across {len(chns)} channel groups" + ) + def skim_evt( f_evt: str, From e5bf02486e3465779685b86169f59436aeb9836f Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 16 Jan 2024 22:19:55 +0100 Subject: [PATCH 131/191] spm module enhancement --- src/pygama/evt/modules/spm.py | 34 ++++++++++++++++--- .../module-test-t0-vov-evt-config.json | 4 +-- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 8e2a8f3e7..aa0e41a71 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -52,8 +52,12 @@ def get_spm_mask(lim, trgr, tdefault, tmin, tmax, pe, times) -> np.ndarray: # get LAr indices according to mask per event over all channels +# mode 0 -> return pulse indices +# mode 1 -> return tcm indices +# mode 2 -> return rawids +# mode 3 -> return tcm_idx def get_masked_tcm_idx( - f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, get_pls_idx=False + f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, mode=0 ) -> VectorOfVectors: # load TCM data to define an event store = LH5Store() @@ -73,7 +77,7 @@ def get_masked_tcm_idx( lim, trgr, tdefault, tmin, tmax, energy_in_pe, trigger_pos ) - if get_pls_idx: + if mode == 0: out_idx = np.repeat( np.arange(len(mask[0]))[:, None], repeats=len(mask), axis=1 ).T @@ -84,8 +88,8 @@ def get_masked_tcm_idx( np.count_nonzero(~np.isnan(out_idx), axis=1) ), dtype=int, - ).view_as("ak") - else: + ).view_as("ak", preserve_dtype=True) + elif mode == 1: out_idx = np.where(mask, np.where(ids == int(ch[2:]))[0][:, None], np.nan) out_idx = VectorOfVectors( flattened_data=out_idx.flatten()[~np.isnan(out_idx.flatten())], @@ -93,7 +97,27 @@ def get_masked_tcm_idx( np.count_nonzero(~np.isnan(out_idx), axis=1) ), dtype=int, - ).view_as("ak") + ).view_as("ak", preserve_dtype=True) + elif mode == 2: + out_idx = np.where(mask, int(ch[2:]), np.nan) + out_idx = VectorOfVectors( + flattened_data=out_idx.flatten()[~np.isnan(out_idx.flatten())], + cumulative_length=np.cumsum( + np.count_nonzero(~np.isnan(out_idx), axis=1) + ), + dtype=int, + ).view_as("ak", preserve_dtype=True) + elif mode == 3: + out_idx = np.where(mask, idx_ch[:, None], np.nan) + out_idx = VectorOfVectors( + flattened_data=out_idx.flatten()[~np.isnan(out_idx.flatten())], + cumulative_length=np.cumsum( + np.count_nonzero(~np.isnan(out_idx), axis=1) + ), + dtype=int, + ).view_as("ak", preserve_dtype=True) + else: + raise ValueError("Unknown mode") arr_lst.append(out_idx) diff --git a/tests/evt/configs/module-test-t0-vov-evt-config.json b/tests/evt/configs/module-test-t0-vov-evt-config.json index ff16ea628..64d3dd0e3 100644 --- a/tests/evt/configs/module-test-t0-vov-evt-config.json +++ b/tests/evt/configs/module-test-t0-vov-evt-config.json @@ -71,12 +71,12 @@ "lar_tcm_index": { "channels": "spms_on", "aggregation_mode": "function", - "expression": ".modules.spm.get_masked_tcm_idx(0.5,evt.t0,48000,1000,5000,False)" + "expression": ".modules.spm.get_masked_tcm_idx(0.5,evt.t0,48000,1000,5000,1)" }, "lar_pulse_index": { "channels": "spms_on", "aggregation_mode": "function", - "expression": ".modules.spm.get_masked_tcm_idx(0.5,evt.t0,48000,1000,5000,True)" + "expression": ".modules.spm.get_masked_tcm_idx(0.5,evt.t0,48000,1000,5000,0)" } } } From ccf73092ab5615c366bd529e882fbc7a27adebb9 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 17 Jan 2024 17:58:15 +0100 Subject: [PATCH 132/191] skm tier refactor and cleanup works --- src/pygama/evt/__init__.py | 4 +- src/pygama/evt/build_evt.py | 100 +------ src/pygama/skm/build_skm.py | 292 +++++++++------------ tests/evt/configs/vov-test-evt-config.json | 21 +- tests/evt/test_build_evt.py | 42 +-- tests/skm/configs/basic-skm-config.json | 34 +-- tests/skm/test_build_skm.py | 34 ++- 7 files changed, 204 insertions(+), 323 deletions(-) diff --git a/src/pygama/evt/__init__.py b/src/pygama/evt/__init__.py index 8bc8bf058..80b544455 100644 --- a/src/pygama/evt/__init__.py +++ b/src/pygama/evt/__init__.py @@ -2,8 +2,8 @@ Utilities for grouping hit data into events. """ -from .build_evt import build_evt, skim_evt +from .build_evt import build_evt from .build_tcm import build_tcm from .tcm import generate_tcm_cols -__all__ = ["build_tcm", "generate_tcm_cols", "build_evt", "skim_evt"] +__all__ = ["build_tcm", "generate_tcm_cols", "build_evt"] diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 5fdc9310e..a0cf1b5dc 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -7,7 +7,6 @@ import itertools import json import logging -import os import re from importlib import import_module @@ -941,7 +940,7 @@ def evaluate_to_vector( ) out = VectorOfVectors( - flattened_data=out.flatten()[~np.isnan(out.flatten())], + flattened_data=out.flatten()[~np.isnan(out.flatten())].astype(type(defv)), cumulative_length=np.cumsum(np.count_nonzero(~np.isnan(out), axis=1)), ) @@ -1130,8 +1129,12 @@ def build_evt( pars = v["parameters"] if "query" in v.keys(): qry = v["query"] - if "initial" in v.keys() and not v["initial"] == "np.nan": + if "initial" in v.keys(): defaultv = v["initial"] + if isinstance(defaultv, str) and ( + defaultv in ["np.nan", "np.inf", "-np.inf"] + ): + defaultv = eval(defaultv) if "sort" in v.keys(): srter = v["sort"] @@ -1169,94 +1172,3 @@ def build_evt( log.info( f"Applied {len(tbl_cfg['operations'])} operations to key {key} and saved {len(tbl_cfg['outputs'])} evt fields across {len(chns)} channel groups" ) - - -def skim_evt( - f_evt: str, - expression: str, - params: dict = None, - f_out: str = None, - wo_mode="n", - evt_group="/evt/", -) -> None: - """Skims events from an `evt` file which are fulfilling the expression, - discards all other events. - - Parameters - ---------- - f_evt - input LH5 file of the `evt` level. - expression - skimming expression. Can contain variables from event file or from the - `params` dictionary. - f_out - output LH5 file. Can be ``None`` if `wo_mode` is set to overwrite `f_evt`. - wo_mode - Write mode: ``o``/``overwrite`` overwrites f_evt. ``n``/``new`` writes - to a new file specified in `f_out`. - evt_group - LH5 root group of the `evt` file. - """ - - if wo_mode not in ["o", "overwrite", "n", "new"]: - raise ValueError( - wo_mode - + " is a invalid writing mode. Valid options are: 'o', 'overwrite','n','new'" - ) - store = LH5Store() - fields = lh5.ls(f_evt, evt_group) - nrows = store.read_n_rows(fields[0], f_evt) - # load fields in expression - exprl = re.findall(r"[a-zA-Z_$][\w$]*", expression) - var = {} - - flds = [ - e.split("/")[-1] for e in lh5.ls(f_evt, evt_group) if e.split("/")[-1] in exprl - ] - var = {e: store.read(evt_group + e, f_evt)[0] for e in flds} - - # to make any operations to VoVs we have to blow it up to a table (future change to more intelligant way) - arr_keys = [] - for key, value in var.items(): - if isinstance(value, VectorOfVectors): - var[key] = value.to_aoesa().nda - elif isinstance(value, Array): - var[key] = value.nda - arr_keys.append(key) - - # now we also need to set dimensions if we have an expression - # consisting of a mix of VoV and Arrays - if len(arr_keys) > 0 and not set(arr_keys) == set(var.keys()): - for key in arr_keys: - var[key] = var[key][:, None] - - if params is not None: - var = var | params - res = eval(expression, var) - - if res.shape != (nrows,): - raise ValueError( - "The expression must result to 1D with length = event number. " - f"Current shape is {res.shape}" - ) - - res = res.astype(bool) - idx_list = np.arange(nrows, dtype=int)[res] - - of = f_out - if wo_mode in ["o", "overwrite"]: - of = f_evt - of_tmp = of.replace(of.split("/")[-1], ".tmp_" + of.split("/")[-1]) - - for fld in fields: - ob, _ = store.read(fld, f_evt, idx=idx_list) - store.write( - ob, - fld, - of_tmp, - wo_mode="o", - ) - - if os.path.exists(of): - os.remove(of) - os.rename(of_tmp, of) diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index 98d02a033..28e07bbee 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -1,6 +1,6 @@ """ This module implements routines to build the `skm` tier, consisting of skimmed -data from the `evt` tier. +data from lower tiers. """ from __future__ import annotations @@ -10,29 +10,36 @@ import os import awkward as ak -import h5py import numpy as np import pandas as pd -from lgdo import VectorOfVectors, lh5 +from lgdo import Array from lgdo.lh5 import LH5Store log = logging.getLogger(__name__) def build_skm( - f_evt: str | list, + f_evt: str, + f_hit: str, + f_dsp: str, + f_tcm: str, f_skm: str, skm_conf: dict | str, wo_mode="w", - group: str = "/evt/", skim_format: str = "parquet", ) -> None: - """Builds a skimmed file from a (set) of evt tier file(s). + """Builds a skimmed file from a (set) of evt/hit/dsp tier file(s). Parameters ---------- f_evt - list/path of `evt` file(s). + path of `evt` file. + f_hit + path of `hit` file. + f_dsp + path of `dsp` file. + f_tcm + path of `tcm` file. f_skm name of the `skm` output file. skm_conf @@ -40,11 +47,12 @@ def build_skm( - ``multiplicity`` defines up to which row length :class:`.VectorOfVector` fields should be kept. - - ``index_field`` - - ``skimmed_fields`` are forwarded from the evt tier and clipped/padded - according to ``missing_value`` if needed. - - ``global_fields`` defines an operation to reduce the dimension of - :class:`.VectorOfVector` event fields. + - ``index_field`` sets the index of the output table. If not given + the index are set es increasing integers. + - ``operations`` are forwarded from lower tiers and clipped/padded + according to ``missing_value`` if needed. If the forwarded field + is not an evt tier, ``tcm_idx`` must be passed that specifies the + value to pick across channels. For example: @@ -53,35 +61,24 @@ def build_skm( { "multiplicity": 2, "index_field": "timestamp", - "skimmed_fields": { - "timestamp":{ - "evt_field": "timestamp" - }, - "is_muon_rejected":{ - "evt_field": "is_muon_rejected" - }, - "multiplicity":{ - "evt_field": "multiplicity" - }, - "energy":{ - "evt_field": "energy", - "missing_value": "np.nan" - }, - "energy_id":{ - "evt_field": "energy_id", - "missing_value": 0 - }, - "global_fields":{ - "energy_sum":{ - "aggregation_mode": "sum", - "evt_field": "energy" - }, - "is_all_physical":{ - "aggregation_mode": "all", - "evt_field": "is_physical" - }, + "operations": { + "timestamp":{ + "forward_field": "evt.timestamp" + }, + "multiplicity":{ + "forward_field": "evt.multiplicity" + }, + "energy":{ + "forward_field": "hit.cuspEmax_ctc_cal", + "missing_value": "np.nan", + "tcm_idx": "evt.energy_idx" + }, + "energy_id":{ + "forward_field": "tcm.array_id", + "missing_value": 0, + "tcm_idx": "evt.energy_idx" + } } - } } wo_mode @@ -92,11 +89,10 @@ def build_skm( - ``append`` or ``a``: append to file. - ``overwrite`` or ``o``: replaces existing file. - group - LH5 root group name of the evt tier. skim_format data format of the skimmed output (``hdf`` or ``parquet``). """ + f_dict = {"evt": f_evt, "hit": f_hit, "dsp": f_dsp, "tcm": f_tcm} log = logging.getLogger(__name__) log.debug(f"I am skimning {len(f_evt) if isinstance(f_evt,list) else 1} files") @@ -107,140 +103,92 @@ def build_skm( with open(tbl_cfg) as f: tbl_cfg = json.load(f) - flds, flds_vov, flds_arr, multi = None, None, None, None - if "skimmed_fields" in tbl_cfg.keys(): - flds = tbl_cfg["skimmed_fields"].keys() - evt_flds = [(e, tbl_cfg["skimmed_fields"][e]["evt_field"]) for e in flds] - f = h5py.File(f_evt[0] if isinstance(f_evt, list) else f_evt, "r") - flds_vov = [ - x - for x in evt_flds - if x[1] - in [ - e.split("/")[-1] - for e in lh5.ls(f_evt[0] if isinstance(f_evt, list) else f_evt, group) - if "array<1>{array<1>{" in f[e].attrs.get("datatype") - ] - ] - flds_arr = [ - x - for x in evt_flds - if x not in flds_vov - and x[1] - in [ - e.split("/")[-1] - for e in lh5.ls(f_evt[0] if isinstance(f_evt, list) else f_evt, group) - ] - ] - - gflds = None - if "global_fields" in tbl_cfg.keys(): - gflds = list(tbl_cfg["global_fields"].keys()) - - if flds is None and gflds is None: - return - - # Check if multiplicity is given, if vector like fields are skimmed - if ( - isinstance(flds_vov, list) - and len(flds_vov) > 0 - and "multiplicity" not in tbl_cfg.keys() - ): - raise ValueError("If skiime fields are passed, multiplicity must be given") - - elif "multiplicity" in tbl_cfg.keys(): - multi = tbl_cfg["multiplicity"] - - # init pandas df - df = pd.DataFrame() - store = LH5Store() + # Check if multiplicity is given + if "multiplicity" not in tbl_cfg.keys(): + raise ValueError("multiplicity field missing") - # add array like fields - if isinstance(flds_arr, list): - log.debug("Crunching array-like fields") - - _df = store.read( - group, - f_evt, - field_mask=[x[1] for x in flds_arr], - )[ - 0 - ].view_as("pd") - - _df = _df.rename(columns={y: x for x, y in flds_arr}) - df = df.join(_df, how="outer") - - # take care of vector like fields - if isinstance(flds_vov, list): - log.debug("Processing VoV-like fields") - for fld in flds_vov: - if "missing_value" not in tbl_cfg["skimmed_fields"][fld[0]].keys(): - raise ValueError( - f"({fld[0]}) is a VectorOfVector field and no missing_value is specified" - ) - vls, _ = store.read(group + fld[1], f_evt) - mv = tbl_cfg["skimmed_fields"][fld[0]]["missing_value"] - if mv in ["np.inf", "-np.inf", "np.nan"]: - mv = eval(mv) - out = vls.to_aoesa(max_len=multi, fill_val=mv).nda - nms = [fld[0] + f"_{e}" for e in range(multi)] - df = df.join(pd.DataFrame(data=out, columns=nms), how="outer") - - # ok now build global fields if requested - if isinstance(gflds, list): - log.debug("Defining global fields") - for k in gflds: - if "aggregation_mode" not in tbl_cfg["global_fields"][k].keys(): - raise ValueError(f"global {k} operation needs aggregation mode") - if "evt_field" not in tbl_cfg["global_fields"][k].keys(): - raise ValueError(f"global {k} operation needs evt_field") - mode = tbl_cfg["global_fields"][k]["aggregation_mode"] - fld = tbl_cfg["global_fields"][k]["evt_field"] - - obj, _ = store.read(group + fld, f_evt) - if not isinstance(obj, VectorOfVectors): - raise ValueError( - f"global {k} operation not possible, since {fld} is not an VectorOfVectors" - ) - - obj_ak = obj.view_as("ak") - if mode in [ - "sum", - "prod", - "nansum", - "nanprod", - "any", - "all", - "mean", - "std", - "var", - ]: - df = df.join( - pd.DataFrame( - data=getattr(ak, mode)(obj_ak, axis=-1).to_numpy( - allow_missing=False - ), - columns=[k], - ) - ) + multi = int(tbl_cfg["multiplicity"]) + store = LH5Store() + df = pd.DataFrame() - elif mode in ["min", "max"]: - val = getattr(ak, mode)(obj_ak, axis=-1, mask_identity=True) - if "missing_value" not in tbl_cfg["global_fields"][k].keys(): + if "operations" in tbl_cfg.keys(): + for op in tbl_cfg["operations"].keys(): + miss_val = np.nan + if "missing_value" in tbl_cfg["operations"][op].keys(): + miss_val = tbl_cfg["operations"][op]["missing_value"] + if isinstance(miss_val, str) and ( + miss_val in ["np.nan", "np.inf", "-np.inf"] + ): + miss_val = eval(miss_val) + + fw_fld = tbl_cfg["operations"][op]["forward_field"].split(".") + if fw_fld[0] not in ["evt", "hit", "dsp", "tcm"]: + raise ValueError(f"{fw_fld[0]} is not a valid tier") + + # load object if from evt tier + if fw_fld[0] == "evt": + obj = store.read(f"/{fw_fld[0]}/{fw_fld[1]}", f_dict[fw_fld[0]])[ + 0 + ].view_as("ak") + + # else collect data from lower tier via tcm_idx + else: + if "tcm_idx" not in tbl_cfg["operations"][op].keys(): raise ValueError( - f"global {k} {mode} operation needs a missing value assigned" + f"{op} is an sub evt level operation. tcm_idx field must be specified" ) - mv = tbl_cfg["global_fields"][k]["missing_value"] - if mv == "np.inf": - mv = np.inf - elif mv == "-np.inf": - mv = -1 * np.inf - val = ak.fill_none(val, mv) - df = df.join( - pd.DataFrame(data=val.to_numpy(allow_missing=False), columns=[k]) - ) - else: - raise ValueError("aggregation mode not supported") + tcm_idx_fld = tbl_cfg["operations"][op]["tcm_idx"].split(".") + tcm_idx = store.read( + f"/{tcm_idx_fld[0]}/{tcm_idx_fld[1]}", f_dict[tcm_idx_fld[0]] + )[0].view_as("ak")[:, :multi] + + obj = ak.Array([[] for x in range(len(tcm_idx))]) + + # load TCM data to define an event + ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("ak") + ids = ak.unflatten(ids[ak.flatten(tcm_idx)], ak.count(tcm_idx, axis=-1)) + + idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("ak") + idx = ak.unflatten(idx[ak.flatten(tcm_idx)], ak.count(tcm_idx, axis=-1)) + + if "tcm.array_id" == tbl_cfg["operations"][op]["forward_field"]: + obj = ids + elif "tcm.array_idx" == tbl_cfg["operations"][op]["forward_field"]: + obj = idx + + else: + chns = np.unique( + ak.to_numpy(ak.flatten(ids), allow_missing=False) + ).astype(int) + + # Get the data + for ch in chns: + ch_idx = idx[ids == ch] + ct_idx = ak.count(ch_idx, axis=-1) + fl_idx = ak.to_numpy(ak.flatten(ch_idx), allow_missing=False) + och, _ = store.read( + f"ch{ch}/{fw_fld[0]}/{fw_fld[1]}", + f_dict[fw_fld[0]], + idx=fl_idx, + ) + if not isinstance(och, Array): + raise ValueError( + f"{type(och)} not supported. Forward only Array fields" + ) + och = och.view_as("ak") + och = ak.unflatten(och, ct_idx) + obj = ak.concatenate((obj, och), axis=-1) + + # Pad, clip and numpyfy + if obj.ndim > 1: + obj = ak.pad_none(obj, multi, clip=True) + obj = ak.to_numpy(ak.fill_none(obj, miss_val)) + + nms = [op] + if obj.ndim > 1: + nms = [f"{op}_{x}" for x in range(multi)] + + df = df.join(pd.DataFrame(data=obj, columns=nms), how="outer") # Set an index column if specified if "index_field" in tbl_cfg.keys(): diff --git a/tests/evt/configs/vov-test-evt-config.json b/tests/evt/configs/vov-test-evt-config.json index ffdce3b31..32c2c0b59 100644 --- a/tests/evt/configs/vov-test-evt-config.json +++ b/tests/evt/configs/vov-test-evt-config.json @@ -6,7 +6,9 @@ "outputs": [ "timestamp", "energy", + "energy_sum", "energy_id", + "energy_idx", "aoe", "multiplicity", "is_saturated", @@ -27,11 +29,28 @@ "query": "hit.cuspEmax_ctc_cal>25", "expression": "hit.cuspEmax_ctc_cal" }, + "energy_sum": { + "channels": "geds_on", + "aggregation_mode": "sum", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "hit.cuspEmax_ctc_cal", + "initial": 0.0 + }, + "energy_idx": { + "channels": "geds_on", + "aggregation_mode": "gather", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "tcm.index", + "sort": "ascend_by:dsp.tp_0_est", + "initial": 0 + }, "energy_id": { "channels": "geds_on", "aggregation_mode": "gather", "query": "hit.cuspEmax_ctc_cal>25", - "expression": "tcm.array_id" + "expression": "tcm.array_id", + "sort": "ascend_by:dsp.tp_0_est", + "initial": 0 }, "aoe": { "aggregation_mode": "keep_at:evt.energy_id", diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index cfd3b92cb..89cc24386 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -7,7 +7,7 @@ from lgdo import Array, VectorOfVectors, lh5 from lgdo.lh5 import LH5Store -from pygama.evt import build_evt, skim_evt +from pygama.evt import build_evt config_dir = Path(__file__).parent / "configs" store = LH5Store() @@ -121,7 +121,7 @@ def test_vov(lgnd_test_data, tmptestdir): ) assert os.path.exists(outfile) - assert len(lh5.ls(outfile, "/evt/")) == 9 + assert len(lh5.ls(outfile, "/evt/")) == 11 vov_ene, _ = store.read("/evt/energy", outfile) vov_aoe, _ = store.read("/evt/aoe", outfile) arr_ac, _ = store.read("/evt/multiplicity", outfile) @@ -136,6 +136,18 @@ def test_vov(lgnd_test_data, tmptestdir): assert isinstance(arr_ac2, Array) assert (np.diff(vov_ene.cumulative_length.nda, prepend=[0]) == arr_ac.nda).all() + vov_eid = store.read("/evt/energy_id", outfile)[0].view_as("ak") + vov_eidx = store.read("/evt/energy_idx", outfile)[0].view_as("ak") + + ids = store.read("hardware_tcm_1/array_id", lgnd_test_data.get_path(tcm_path))[ + 0 + ].view_as("ak") + ids = ak.unflatten(ids[ak.flatten(vov_eidx)], ak.count(vov_eidx, axis=-1)) + assert ak.all(ids == vov_eid) + + arr_ene = store.read("/evt/energy_sum", outfile)[0].view_as("ak") + assert ak.all(arr_ene == ak.nansum(vov_ene.view_as("ak"), axis=-1)) + def test_graceful_crashing(lgnd_test_data, tmptestdir): outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" @@ -246,29 +258,3 @@ def test_vector_sort(lgnd_test_data, tmptestdir): vov_t0, _ = store.read("/evt/t0_decend", outfile) nda_t0 = vov_t0.to_aoesa().view_as("np") assert ((np.diff(nda_t0) <= 0) | (np.isnan(np.diff(nda_t0)))).all() - - -def test_skimming(lgnd_test_data, tmptestdir): - outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" - tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" - if os.path.exists(outfile): - os.remove(outfile) - f_tcm = lgnd_test_data.get_path(tcm_path) - f_dsp = lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")) - f_hit = lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")) - f_config = f"{config_dir}/vov-test-evt-config.json" - build_evt(f_tcm, f_dsp, f_hit, outfile, f_config) - - ac = store.read("/evt/multiplicity", outfile)[0].view_as("np") - ac = len(ac[ac == 3]) - - outfile_skm = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.lh5" - - skim_evt(outfile, "multiplicity == 3", None, outfile_skm, "n") - assert ac == len(store.read("/evt/energy", outfile_skm)[0].to_aoesa().view_as("np")) - - skim_evt(outfile, "multiplicity == 3", None, None, "o") - assert ac == len(store.read("/evt/energy", outfile)[0].to_aoesa().view_as("np")) - - with pytest.raises(ValueError): - skim_evt(outfile, "multiplicity == 3", None, None, "bla") diff --git a/tests/skm/configs/basic-skm-config.json b/tests/skm/configs/basic-skm-config.json index b1844ecb0..faf5e56cb 100644 --- a/tests/skm/configs/basic-skm-config.json +++ b/tests/skm/configs/basic-skm-config.json @@ -1,35 +1,25 @@ { "multiplicity": 3, "index_field": "timestamp", - "skimmed_fields": { + "operations": { "timestamp": { - "evt_field": "timestamp" + "forward_field": "evt.timestamp" + }, + "energy_sum": { + "forward_field": "evt.energy_sum" }, "multiplicity": { - "evt_field": "multiplicity" + "forward_field": "evt.multiplicity" }, "energy": { - "evt_field": "energy", - "missing_value": "np.nan" + "forward_field": "hit.cuspEmax_ctc_cal", + "missing_value": "np.nan", + "tcm_idx": "evt.energy_idx" }, "energy_id": { - "evt_field": "energy_id", - "missing_value": 0 - } - }, - "global_fields": { - "energy_sum": { - "aggregation_mode": "nansum", - "evt_field": "energy" - }, - "is_any_saturated": { - "aggregation_mode": "any", - "evt_field": "is_saturated" - }, - "max_energy": { - "aggregation_mode": "max", - "evt_field": "energy", - "missing_value": "np.inf" + "forward_field": "tcm.array_id", + "missing_value": 0, + "tcm_idx": "evt.energy_idx" } } } diff --git a/tests/skm/test_build_skm.py b/tests/skm/test_build_skm.py index 0b2beebe4..678fe2c41 100644 --- a/tests/skm/test_build_skm.py +++ b/tests/skm/test_build_skm.py @@ -1,14 +1,17 @@ import os from pathlib import Path +import awkward as ak import numpy as np import pandas as pd +from lgdo.lh5 import LH5Store from pygama.evt import build_evt from pygama.skm import build_skm config_dir = Path(__file__).parent / "configs" evt_config_dir = Path(__file__).parent.parent / "evt" / "configs" +store = LH5Store() def test_basics(lgnd_test_data, tmptestdir): @@ -29,7 +32,16 @@ def test_basics(lgnd_test_data, tmptestdir): skm_conf = f"{config_dir}/basic-skm-config.json" skm_out = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.parquet" - build_skm(outfile, skm_out, skm_conf, wo_mode="o", skim_format="hdf") + build_skm( + outfile, + lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), + lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), + lgnd_test_data.get_path(tcm_path), + skm_out, + skm_conf, + wo_mode="o", + skim_format="hdf", + ) assert os.path.exists(skm_out) df = pd.read_hdf(skm_out) @@ -37,7 +49,11 @@ def test_basics(lgnd_test_data, tmptestdir): assert "energy_0" in df.keys() assert "energy_1" in df.keys() assert "energy_2" in df.keys() + assert "energy_id_0" in df.keys() + assert "energy_id_1" in df.keys() + assert "energy_id_2" in df.keys() assert "multiplicity" in df.keys() + assert "energy_sum" in df.keys() assert (df.multiplicity.to_numpy() <= 3).all() assert ( np.nan_to_num(df.energy_0.to_numpy()) @@ -45,6 +61,16 @@ def test_basics(lgnd_test_data, tmptestdir): + np.nan_to_num(df.energy_2.to_numpy()) == df.energy_sum.to_numpy() ).all() - assert (np.nan_to_num(df.energy_0.to_numpy()) <= df.max_energy.to_numpy()).all() - assert (np.nan_to_num(df.energy_1.to_numpy()) <= df.max_energy.to_numpy()).all() - assert (np.nan_to_num(df.energy_2.to_numpy()) <= df.max_energy.to_numpy()).all() + + vov_eid = ak.to_numpy( + ak.fill_none( + ak.pad_none( + store.read("/evt/energy_id", outfile)[0].view_as("ak"), 3, clip=True + ), + 0, + ), + allow_missing=False, + ) + assert (vov_eid[:, 0] == df.energy_id_0.to_numpy()).all() + assert (vov_eid[:, 1] == df.energy_id_1.to_numpy()).all() + assert (vov_eid[:, 2] == df.energy_id_2.to_numpy()).all() From f262b020e1d57f4498e997dc20d3364d471efa57 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 17 Jan 2024 19:25:55 +0100 Subject: [PATCH 133/191] skm tier feature addition --- src/pygama/skm/build_skm.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index 28e07bbee..aed71e1eb 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -12,7 +12,7 @@ import awkward as ak import numpy as np import pandas as pd -from lgdo import Array +from lgdo import Array, lh5 from lgdo.lh5 import LH5Store log = logging.getLogger(__name__) @@ -47,6 +47,9 @@ def build_skm( - ``multiplicity`` defines up to which row length :class:`.VectorOfVector` fields should be kept. + - ``postfixes`` list of postfixes must be list of + ``len(multiplicity)``. If not given, numbers from 0 to + ``multiplicity -1`` are used - ``index_field`` sets the index of the output table. If not given the index are set es increasing integers. - ``operations`` are forwarded from lower tiers and clipped/padded @@ -60,6 +63,7 @@ def build_skm( { "multiplicity": 2, + "postfixes":["","aux"], "index_field": "timestamp", "operations": { "timestamp":{ @@ -166,11 +170,17 @@ def build_skm( ch_idx = idx[ids == ch] ct_idx = ak.count(ch_idx, axis=-1) fl_idx = ak.to_numpy(ak.flatten(ch_idx), allow_missing=False) - och, _ = store.read( - f"ch{ch}/{fw_fld[0]}/{fw_fld[1]}", - f_dict[fw_fld[0]], - idx=fl_idx, - ) + + if f"ch{ch}/{fw_fld[0]}/{fw_fld[1]}" not in lh5.ls( + f_dict[fw_fld[0]], f"ch{ch}/{fw_fld[0]}/" + ): + och = Array(nda=np.full(len(fl_idx), miss_val)) + else: + och, _ = store.read( + f"ch{ch}/{fw_fld[0]}/{fw_fld[1]}", + f_dict[fw_fld[0]], + idx=fl_idx, + ) if not isinstance(och, Array): raise ValueError( f"{type(och)} not supported. Forward only Array fields" @@ -186,7 +196,10 @@ def build_skm( nms = [op] if obj.ndim > 1: - nms = [f"{op}_{x}" for x in range(multi)] + if "postfixes" in tbl_cfg.keys(): + nms = [f"{op}{x}" for x in tbl_cfg["postfixes"]] + else: + nms = [f"{op}_{x}" for x in range(multi)] df = df.join(pd.DataFrame(data=obj, columns=nms), how="outer") From c087649819b2d983635026c85ba6f475a38ccf7d Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 17 Jan 2024 21:51:01 +0100 Subject: [PATCH 134/191] added option to keep at index --- src/pygama/evt/build_evt.py | 35 +++++++++++++------ tests/evt/configs/basic-evt-config.json | 14 ++++++-- tests/evt/configs/module-test-evt-config.json | 2 +- .../module-test-t0-vov-evt-config.json | 2 +- tests/evt/configs/vov-test-evt-config.json | 9 +++-- tests/evt/test_build_evt.py | 20 ++++++++--- 6 files changed, 61 insertions(+), 21 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index a0cf1b5dc..b37f5a3a5 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -80,7 +80,9 @@ def evaluate_expression( - ``sum``: aggregates by summation. - ``any``: aggregates by logical or. - ``all``: aggregates by logical and. - - ``keep_at:ch_field``: aggregates according to passed ch_field + - ``keep_at_ch:ch_field``: aggregates according to passed ch_field. + - ``keep_at_idx:tcm_idx_field``: aggregates according to passed tcm + index field. - ``gather``: Channels are not combined, but result saved as :class:`.VectorOfVectors`. @@ -152,13 +154,26 @@ def evaluate_expression( idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") # switch through modes - if ( - table - and "keep_at:" == mode[:8] - and "evt." == mode[8:][:4] - and mode[8:].split(".")[-1] in table.keys() - ): - ch_comp = table[mode[8:].replace("evt.", "")] + if table and (("keep_at_ch:" == mode[:11]) or ("keep_at_idx:" == mode[:12])): + if "keep_at_ch:" == mode[:11]: + ch_comp = table[mode[11:].replace("evt.", "")] + else: + ch_comp = table[mode[12:].replace("evt.", "")] + if isinstance(ch_comp, Array): + ch_comp = Array(nda=ids[ch_comp.view_as("np")]) + elif isinstance(ch_comp, VectorOfVectors): + ch_comp = ch_comp.view_as("ak") + ch_comp = VectorOfVectors( + array=ak.unflatten( + ids[ak.flatten(ch_comp)], ak.count(ch_comp, axis=-1) + ) + ) + else: + raise NotImplementedError( + type(ch_comp) + + " not supported (only Array and VectorOfVectors are supported)" + ) + if isinstance(ch_comp, Array): return evaluate_at_channel( idx, @@ -1003,7 +1018,7 @@ def build_evt( "sort": "ascend_by:dsp.tp_0_est" }, "energy":{ - "aggregation_mode": "keep_at:evt.energy_id", + "aggregation_mode": "keep_at_ch:evt.energy_id", "expression": "hit.cuspEmax_ctc_cal > 25" } "is_muon_rejected":{ @@ -1021,7 +1036,7 @@ def build_evt( "initial": 0 }, "t0":{ - "aggregation_mode": "keep_at:evt.energy_id", + "aggregation_mode": "keep_at_ch:evt.energy_id", "expression": "dsp.tp_0_est" }, "lar_energy":{ diff --git a/tests/evt/configs/basic-evt-config.json b/tests/evt/configs/basic-evt-config.json index 8eb23adf2..0c82c673f 100644 --- a/tests/evt/configs/basic-evt-config.json +++ b/tests/evt/configs/basic-evt-config.json @@ -6,6 +6,7 @@ "multiplicity", "energy", "energy_id", + "energy_idx", "energy_any_above1MeV", "energy_all_above1MeV", "energy_aux", @@ -36,6 +37,13 @@ "expression": "tcm.array_id", "initial": 0 }, + "energy_idx": { + "channels": "geds_on", + "aggregation_mode": "first_at:dsp.tp_0_est", + "query": "hit.cuspEmax_ctc_cal>25", + "expression": "tcm.index", + "initial": 999999999999 + }, "energy_any_above1MeV": { "channels": "geds_on", "aggregation_mode": "any", @@ -63,17 +71,17 @@ "initial": 0.0 }, "is_usable_aoe": { - "aggregation_mode": "keep_at:evt.energy_id", + "aggregation_mode": "keep_at_ch:evt.energy_id", "expression": "True", "initial": false }, "aoe": { - "aggregation_mode": "keep_at:evt.energy_id", + "aggregation_mode": "keep_at_ch:evt.energy_id", "expression": "hit.AoE_Classifier", "initial": "np.nan" }, "is_aoe_rejected": { - "aggregation_mode": "keep_at:evt.energy_id", + "aggregation_mode": "keep_at_ch:evt.energy_id", "expression": "~(hit.AoE_Double_Sided_Cut)", "initial": false } diff --git a/tests/evt/configs/module-test-evt-config.json b/tests/evt/configs/module-test-evt-config.json index 595999d60..d0ea1bc68 100644 --- a/tests/evt/configs/module-test-evt-config.json +++ b/tests/evt/configs/module-test-evt-config.json @@ -31,7 +31,7 @@ "initial": 0 }, "t0": { - "aggregation_mode": "keep_at:evt.energy_first_id", + "aggregation_mode": "keep_at_ch:evt.energy_first_id", "expression": "dsp.tp_0_est", "initial": 0.0 }, diff --git a/tests/evt/configs/module-test-t0-vov-evt-config.json b/tests/evt/configs/module-test-t0-vov-evt-config.json index 64d3dd0e3..d31e9717a 100644 --- a/tests/evt/configs/module-test-t0-vov-evt-config.json +++ b/tests/evt/configs/module-test-t0-vov-evt-config.json @@ -31,7 +31,7 @@ "expression": "tcm.array_id" }, "t0": { - "aggregation_mode": "keep_at:evt.energy_id", + "aggregation_mode": "keep_at_ch:evt.energy_id", "expression": "dsp.tp_0_est", "initial": 0.0 }, diff --git a/tests/evt/configs/vov-test-evt-config.json b/tests/evt/configs/vov-test-evt-config.json index 32c2c0b59..31334101e 100644 --- a/tests/evt/configs/vov-test-evt-config.json +++ b/tests/evt/configs/vov-test-evt-config.json @@ -10,6 +10,7 @@ "energy_id", "energy_idx", "aoe", + "aoe_idx", "multiplicity", "is_saturated", "energy_times_aoe", @@ -53,7 +54,11 @@ "initial": 0 }, "aoe": { - "aggregation_mode": "keep_at:evt.energy_id", + "aggregation_mode": "keep_at_ch:evt.energy_id", + "expression": "hit.AoE_Classifier" + }, + "aoe_idx": { + "aggregation_mode": "keep_at_idx:evt.energy_idx", "expression": "hit.AoE_Classifier" }, "multiplicity": { @@ -64,7 +69,7 @@ "initial": 0 }, "is_saturated": { - "aggregation_mode": "keep_at:evt.energy_id", + "aggregation_mode": "keep_at_ch:evt.energy_id", "expression": "hit.is_saturated" }, "energy_times_aoe": { diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 89cc24386..64ad133ed 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -30,7 +30,7 @@ def test_basics(lgnd_test_data, tmptestdir): ) assert os.path.exists(outfile) - assert len(lh5.ls(outfile, "/evt/")) == 10 + assert len(lh5.ls(outfile, "/evt/")) == 11 nda = { e: store.read(f"/evt/{e}", outfile)[0].view_as("np") for e in ["energy", "energy_aux", "energy_sum", "multiplicity"] @@ -48,6 +48,16 @@ def test_basics(lgnd_test_data, tmptestdir): == nda["energy_sum"][nda["multiplicity"] == 1] ).all() + eid = store.read("/evt/energy_id", outfile)[0].view_as("np") + eidx = store.read("/evt/energy_idx", outfile)[0].view_as("np") + eidx = eidx[eidx != 999999999999] + + ids = store.read("hardware_tcm_1/array_id", lgnd_test_data.get_path(tcm_path))[ + 0 + ].view_as("np") + ids = ids[eidx] + assert ak.all(ids == eid[eid != 0]) + def test_lar_module(lgnd_test_data, tmptestdir): outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" @@ -121,7 +131,7 @@ def test_vov(lgnd_test_data, tmptestdir): ) assert os.path.exists(outfile) - assert len(lh5.ls(outfile, "/evt/")) == 11 + assert len(lh5.ls(outfile, "/evt/")) == 12 vov_ene, _ = store.read("/evt/energy", outfile) vov_aoe, _ = store.read("/evt/aoe", outfile) arr_ac, _ = store.read("/evt/multiplicity", outfile) @@ -138,6 +148,7 @@ def test_vov(lgnd_test_data, tmptestdir): vov_eid = store.read("/evt/energy_id", outfile)[0].view_as("ak") vov_eidx = store.read("/evt/energy_idx", outfile)[0].view_as("ak") + vov_aoe_idx = store.read("/evt/aoe_idx", outfile)[0].view_as("ak") ids = store.read("hardware_tcm_1/array_id", lgnd_test_data.get_path(tcm_path))[ 0 @@ -147,6 +158,7 @@ def test_vov(lgnd_test_data, tmptestdir): arr_ene = store.read("/evt/energy_sum", outfile)[0].view_as("ak") assert ak.all(arr_ene == ak.nansum(vov_ene.view_as("ak"), axis=-1)) + assert ak.all(vov_aoe.view_as("ak") == vov_aoe_idx) def test_graceful_crashing(lgnd_test_data, tmptestdir): @@ -232,7 +244,7 @@ def test_vector_sort(lgnd_test_data, tmptestdir): "sort": "ascend_by:dsp.tp_0_est", }, "t0_acend": { - "aggregation_mode": "keep_at:evt.acend_id", + "aggregation_mode": "keep_at_ch:evt.acend_id", "expression": "dsp.tp_0_est", }, "decend_id": { @@ -243,7 +255,7 @@ def test_vector_sort(lgnd_test_data, tmptestdir): "sort": "descend_by:dsp.tp_0_est", }, "t0_decend": { - "aggregation_mode": "keep_at:evt.acend_id", + "aggregation_mode": "keep_at_ch:evt.acend_id", "expression": "dsp.tp_0_est", }, }, From 7af7e16a975b5e2c764c68d2fc78b2a1fe16c93f Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Thu, 18 Jan 2024 18:24:42 +0100 Subject: [PATCH 135/191] Spm module awkward refactor --- src/pygama/evt/modules/spm.py | 473 +++++++++--------- tests/evt/configs/module-test-evt-config.json | 2 +- .../module-test-t0-vov-evt-config.json | 2 +- 3 files changed, 244 insertions(+), 233 deletions(-) diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index aa0e41a71..cd1cfb812 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -1,7 +1,7 @@ """ Module for special event level routines for SiPMs -functions must take as the first 3 args in order: +functions must take as the first 4 args in order: - path to the hit file - path to the dsp file - path to the tcm file @@ -17,38 +17,52 @@ from lgdo.lh5 import LH5Store -# get SiPM coincidence window mask -def get_spm_mask(lim, trgr, tdefault, tmin, tmax, pe, times) -> np.ndarray: - trig = trgr - if isinstance(trgr, VectorOfVectors): - trig = trig.to_aoesa().view_as("np") - elif isinstance(trgr, Array): - trig = trig.view_as("np") +# get an 1D akward array from 0 to 2D array +# casted by minimum of a 2D array +def cast_trigger( + trgr: int | float | Array | VectorOfVectors | ak.Array, + tdefault: float, + length: int = None, +) -> ak.Array: + if isinstance(trgr, Array): + return ak.fill_none(ak.nan_to_none(trgr.view_as("ak")), tdefault) + + elif isinstance(trgr, (VectorOfVectors)): + return ak.fill_none( + ak.min(ak.fill_none(trgr.view_as("ak"), tdefault), axis=-1), tdefault + ) + elif isinstance(trgr, ak.Array): if trgr.ndim == 1: - trig = ak.to_numpy(trig) + return ak.fill_none(trgr, tdefault) + elif trgr.ndim == 2: + return ak.fill_none(ak.min(ak.fill_none(trgr, tdefault), axis=-1), tdefault) else: - trig = ak.to_numpy( - ak.fill_none( - ak.pad_none(trig, target=ak.max(ak.count(trig, axis=-1)), axis=-1), - np.nan, - ), - allow_missing=False, - ) - if isinstance(trig, np.ndarray) and trig.ndim == 2: - trig = np.where(np.isnan(trig).all(axis=1)[:, None], tdefault, trig) - trig = np.nanmin(trig, axis=1) - - elif isinstance(trig, np.ndarray) and trig.ndim == 1: - trig = np.where(np.isnan(trig), tdefault, trig) + raise ValueError(f"Too many dimensions: {trgr.ndim}") + elif isinstance(trgr, (float, int)) and isinstance(length, int): + return ak.Array([trgr] * length) else: raise ValueError(f"Can't deal with t0 of type {type(trgr)}") - tmi = trig - tmin - tma = trig + tmax - mask = (times < tma[:, None] / 16) & (times > tmi[:, None] / 16) & (pe > lim) - return mask, trig +# get SiPM coincidence window mask +def get_spm_mask( + lim: float, trgr: ak.Array, tmin: float, tmax: float, pe: ak.Array, times: ak.Array +) -> ak.Array: + if trgr.ndim != 1: + raise ValueError("trigger array muse be 1 dimensional!") + if (len(trgr) != len(pe)) or (len(trgr) != len(times)): + raise ValueError( + f"All arrays must have same dimension across first axis len(pe)={len(pe)}, len(times)={len(times)}, len(trgr)={len(trgr)}" + ) + + tmi = trgr - tmin + tma = trgr + tmax + + mask = ( + ((times * 16.0) < tma[:, None]) & ((times * 16.0) > tmi[:, None]) & (pe > lim) + ) + return mask # get LAr indices according to mask per event over all channels @@ -65,57 +79,45 @@ def get_masked_tcm_idx( idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") arr_lst = [] + + if isinstance(trgr, (float | int)): + tge = cast_trigger(trgr, tdefault, length=np.max(idx) + 1) + else: + tge = cast_trigger(trgr, tdefault, length=None) + for ch in chs: idx_ch = idx[ids == int(ch[2:])] - energy_in_pe = store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[ - 0 - ].view_as("np") - trigger_pos = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as( - "np" + + pe = ak.drop_none( + ak.nan_to_none( + store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as("ak") + ) ) - mask, _ = get_spm_mask( - lim, trgr, tdefault, tmin, tmax, energy_in_pe, trigger_pos + + # times are in sample units + times = ak.drop_none( + ak.nan_to_none( + store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as("ak") + ) ) + mask = get_spm_mask(lim, tge, tmin, tmax, pe, times) + if mode == 0: - out_idx = np.repeat( - np.arange(len(mask[0]))[:, None], repeats=len(mask), axis=1 - ).T - out_idx = np.where(mask, out_idx, np.nan) - out_idx = VectorOfVectors( - flattened_data=out_idx.flatten()[~np.isnan(out_idx.flatten())], - cumulative_length=np.cumsum( - np.count_nonzero(~np.isnan(out_idx), axis=1) - ), - dtype=int, - ).view_as("ak", preserve_dtype=True) + out_idx = ak.local_index(mask)[mask] + elif mode == 1: - out_idx = np.where(mask, np.where(ids == int(ch[2:]))[0][:, None], np.nan) - out_idx = VectorOfVectors( - flattened_data=out_idx.flatten()[~np.isnan(out_idx.flatten())], - cumulative_length=np.cumsum( - np.count_nonzero(~np.isnan(out_idx), axis=1) - ), - dtype=int, - ).view_as("ak", preserve_dtype=True) + out_idx = ak.Array(np.where(ids == int(ch[2:]))[0]) + out_idx = out_idx[:, None][mask[mask] - 1] + elif mode == 2: - out_idx = np.where(mask, int(ch[2:]), np.nan) - out_idx = VectorOfVectors( - flattened_data=out_idx.flatten()[~np.isnan(out_idx.flatten())], - cumulative_length=np.cumsum( - np.count_nonzero(~np.isnan(out_idx), axis=1) - ), - dtype=int, - ).view_as("ak", preserve_dtype=True) + out_idx = ak.Array([int(ch[2:])] * len(mask)) + out_idx = out_idx[:, None][mask[mask] - 1] + elif mode == 3: - out_idx = np.where(mask, idx_ch[:, None], np.nan) - out_idx = VectorOfVectors( - flattened_data=out_idx.flatten()[~np.isnan(out_idx.flatten())], - cumulative_length=np.cumsum( - np.count_nonzero(~np.isnan(out_idx), axis=1) - ), - dtype=int, - ).view_as("ak", preserve_dtype=True) + out_idx = ak.Array(idx_ch) + out_idx = out_idx[:, None][mask[mask] - 1] + else: raise ValueError("Unknown mode") @@ -124,219 +126,228 @@ def get_masked_tcm_idx( return VectorOfVectors(array=ak.concatenate(arr_lst, axis=-1)) -# get LAr energy per event over all channels -def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: +def get_spm_ene_or_maj(f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, mode): + if mode not in ["energy_hc", "energy_dplms", "majority_hc", "majority_dplms"]: + raise ValueError("Unknown mode") + # load TCM data to define an event store = LH5Store() ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") - sum = np.zeros(np.max(idx) + 1) + out = np.zeros(np.max(idx) + 1) + + if isinstance(trgr, (float | int)): + tge = cast_trigger(trgr, tdefault, length=np.max(idx) + 1) + else: + tge = cast_trigger(trgr, tdefault, length=None) + for ch in chs: - # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] - energy_in_pe = store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[ - 0 - ].view_as("np") - trigger_pos = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as( - "np" - ) - mask, _ = get_spm_mask( - lim, trgr, tdefault, tmin, tmax, energy_in_pe, trigger_pos - ) - pes = energy_in_pe - pes = np.where(np.isnan(pes), 0, pes) - pes = np.where(mask, pes, 0) - chsum = np.nansum(pes, axis=1) - sum[idx_ch] = sum[idx_ch] + chsum - return Array(nda=sum) + + if mode in ["energy_dplms", "majority_dplms"]: + pe = ak.drop_none( + ak.nan_to_none( + store.read(f"{ch}/hit/energy_in_pe_dplms", f_hit, idx=idx_ch)[ + 0 + ].view_as("ak") + ) + ) + + # times are in sample units + times = ak.drop_none( + ak.nan_to_none( + store.read(f"{ch}/hit/trigger_pos_dplms", f_hit, idx=idx_ch)[ + 0 + ].view_as("ak") + ) + ) + + else: + pe = ak.drop_none( + ak.nan_to_none( + store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as( + "ak" + ) + ) + ) + + # times are in sample units + times = ak.drop_none( + ak.nan_to_none( + store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as( + "ak" + ) + ) + ) + + mask = get_spm_mask(lim, tge, tmin, tmax, pe, times) + pe = pe[mask] + + if mode in ["energy_hc", "energy_dplms"]: + out[idx_ch] = out[idx_ch] + ak.to_numpy(ak.nansum(pe, axis=-1)) + + else: + out[idx_ch] = out[idx_ch] + ak.to_numpy( + ak.where(ak.nansum(pe, axis=-1) > lim, 1, 0) + ) + + return Array(nda=out) + + +# get LAr energy per event over all channels +def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: + return get_spm_ene_or_maj( + f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, "energy_hc" + ) # get LAr majority per event over all channels def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: - # load TCM data to define an event - store = LH5Store() - ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") - idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") - maj = np.zeros(np.max(idx) + 1) - for ch in chs: - # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] - energy_in_pe = store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[ - 0 - ].view_as("np") - trigger_pos = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as( - "np" - ) - mask, _ = get_spm_mask( - lim, trgr, tdefault, tmin, tmax, energy_in_pe, trigger_pos - ) - - pes = energy_in_pe - pes = np.where(np.isnan(pes), 0, pes) - pes = np.where(mask, pes, 0) - chsum = np.nansum(pes, axis=1) - chmaj = np.where(chsum > lim, 1, 0) - maj[idx_ch] = maj[idx_ch] + chmaj - return Array(nda=maj) + return get_spm_ene_or_maj( + f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, "majority_hc" + ) # get LAr energy per event over all channels def get_energy_dplms( f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax ) -> Array: - # load TCM data to define an event - store = LH5Store() - ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") - idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") - sum = np.zeros(np.max(idx) + 1) - for ch in chs: - # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] - energy_in_pe_dplms = store.read( - f"{ch}/hit/energy_in_pe_dplms", f_hit, idx=idx_ch - )[0].view_as("np") - trigger_pos_dplms = store.read( - f"{ch}/hit/trigger_pos_dplms", f_hit, idx=idx_ch - )[0].view_as("np") - mask, _ = get_spm_mask( - lim, trgr, tdefault, tmin, tmax, energy_in_pe_dplms, trigger_pos_dplms - ) - pes = energy_in_pe_dplms - pes = np.where(np.isnan(pes), 0, pes) - pes = np.where(mask, pes, 0) - chsum = np.nansum(pes, axis=1) - sum[idx_ch] = sum[idx_ch] + chsum - return Array(nda=sum) + return get_spm_ene_or_maj( + f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, "energy_dplms" + ) # get LAr majority per event over all channels def get_majority_dplms( f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax ) -> Array: - # load TCM data to define an event - store = LH5Store() - ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") - idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") - maj = np.zeros(np.max(idx) + 1) - for ch in chs: - # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] - energy_in_pe_dplms = store.read( - f"{ch}/hit/energy_in_pe_dplms", f_hit, idx=idx_ch - )[0].view_as("np") - trigger_pos_dplms = store.read( - f"{ch}/hit/trigger_pos_dplms", f_hit, idx=idx_ch - )[0].view_as("np") - mask, _ = get_spm_mask( - lim, trgr, tdefault, tmin, tmax, energy_in_pe_dplms, trigger_pos_dplms - ) - pes = energy_in_pe_dplms - pes = np.where(np.isnan(pes), 0, pes) - pes = np.where(mask, pes, 0) - chsum = np.nansum(pes, axis=1) - chmaj = np.where(chsum > lim, 1, 0) - maj[idx_ch] = maj[idx_ch] + chmaj - return Array(nda=maj) + return get_spm_ene_or_maj( + f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, "majority_dplms" + ) +# Calculate the ETC in different trailing modes: +# trail = 0: Singlet window = [tge,tge+swin] +# trail = 1: Singlet window = [t_first_lar_pulse, t_first_lar_pulse+ swin] +# trail = 2: Like trail = 1, but t_first_lar_pulse <= tge is ensured +# min_first_pls_ene sets the minimum energy of the first pulse (only used in trail > 0) def get_etc( - f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, swin, trail + f_hit, + f_dsp, + f_tcm, + chs, + lim, + trgr, + tdefault, + tmin, + tmax, + swin, + trail, + min_first_pls_ene, ) -> Array: # ignore stupid numpy warnings warnings.filterwarnings("ignore", r"All-NaN slice encountered") warnings.filterwarnings("ignore", r"invalid value encountered in true_divide") warnings.filterwarnings("ignore", r"invalid value encountered in divide") - store = LH5Store() - energy_in_pe, _ = store.read(f"{chs[0]}/hit/energy_in_pe", f_hit) - - peshape = energy_in_pe.view_as("np").shape - # 1D = channel, 2D = event num, 3D = array per event - pes = np.zeros([len(chs), peshape[0], peshape[1]]) - times = np.zeros([len(chs), peshape[0], peshape[1]]) - # load TCM data to define an event store = LH5Store() ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") - for i in range(len(chs)): - # get index list for this channel to be loaded - idx_ch = idx[ids == int(chs[i][2:])] - energy_in_pe = store.read(f"{chs[i]}/hit/energy_in_pe", f_hit, idx=idx_ch)[ - 0 - ].view_as("np") - trigger_pos = store.read(f"{chs[i]}/hit/trigger_pos", f_hit, idx=idx_ch)[ - 0 - ].view_as("np") - mask, tge = get_spm_mask( - lim, trgr, tdefault, tmin, tmax, energy_in_pe, trigger_pos - ) - pe = energy_in_pe - time = trigger_pos * 16 + pe_lst = [] + time_lst = [] - pe = np.where(mask, pe, np.nan) - time = np.where(mask, time, np.nan) + if isinstance(trgr, (float | int)): + tge = cast_trigger(trgr, tdefault, length=np.max(idx) + 1) + else: + tge = cast_trigger(trgr, tdefault, length=None) - pes[i][idx_ch] = pe - times[i][idx_ch] = time + for ch in chs: + idx_ch = idx[ids == int(ch[2:])] - outi = None - if trail > 0: - t1d = np.nanmin(times, axis=(0, 2)) - if trail == 2: - t1d[t1d > tge] = tge[t1d > tge] - tt = t1d[:, None] - outi = np.where( - np.nansum(np.where((times >= tt), pes, 0), axis=(0, 2)) > 0, - np.nansum( - np.where((times >= tt) & (times < tt + swin), pes, 0), axis=(0, 2) + pe = ak.drop_none( + ak.nan_to_none( + store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as("ak") ) - / np.nansum(np.where((times >= tt), pes, 0), axis=(0, 2)), - np.nansum(np.where((times >= tt), pes, 0), axis=(0, 2)), ) - return Array(nda=outi) - else: - outi = np.where( - np.nansum(pes, axis=(0, 2)) > 0, - np.nansum( - np.where( - (times >= tge[:, None]) & (times <= tge[:, None] + swin), pes, 0 - ), - axis=(0, 2), + # times are in sample units + times = ak.drop_none( + ak.nan_to_none( + store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as("ak") ) - / np.nansum(np.where((times >= tge[:, None]), pes, 0), axis=(0, 2)), - np.nansum(pes, axis=(0, 2)), ) - return Array(nda=outi) + + mask = get_spm_mask(lim, tge, tmin, tmax, pe, times) + + pe = pe[mask] + pe_lst.append(pe) + + times = times[mask] * 16 + time_lst.append(times) + + pe_all = ak.concatenate(pe_lst, axis=-1) + time_all = ak.concatenate(time_lst, axis=-1) + + if trail > 0: + t1d = ak.min(time_all[pe_all > min_first_pls_ene], axis=-1) + + if trail == 2: + t1d = ak.where(t1d > tge, tge, t1d) + + mask_total = time_all > t1d + mask_singlet = (time_all > t1d) & (time_all < t1d + swin) + + else: + mask_total = time_all > tge + mask_singlet = (time_all > tge) & (time_all < tge + swin) + + pe_singlet = ak.nansum(pe_all[mask_singlet], axis=-1) + pe_total = ak.nansum(pe_all[mask_total], axis=-1) + etc = ak.where(pe_total > 0, pe_singlet / pe_total, np.nan) + + return Array(nda=ak.to_numpy(ak.fill_none(etc, np.nan), allow_missing=False)) +# returns relative time shift of the first LAr pulse relative to the Ge trigger def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: store = LH5Store() # load TCM data to define an event ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") - spm_tmin = np.full(np.max(idx) + 1, np.inf) - for i in range(len(chs)): - # get index list for this channel to be loaded - idx_ch = idx[ids == int(chs[i][2:])] - energy_in_pe = store.read(f"{chs[i]}/hit/energy_in_pe", f_hit, idx=idx_ch)[ - 0 - ].view_as("ak") - trigger_pos = store.read(f"{chs[i]}/hit/trigger_pos", f_hit, idx=idx_ch)[ - 0 - ].view_as("ak") - mask, tge = get_spm_mask( - lim, trgr, tdefault, tmin, tmax, energy_in_pe, trigger_pos + time_lst = [] + + if isinstance(trgr, (float | int)): + tge = cast_trigger(trgr, tdefault, length=np.max(idx) + 1) + else: + tge = cast_trigger(trgr, tdefault, length=None) + + for ch in chs: + idx_ch = idx[ids == int(ch[2:])] + + pe = ak.drop_none( + ak.nan_to_none( + store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as("ak") + ) ) - time = trigger_pos * 16 - time = ak.min(ak.nan_to_none(time[mask]), axis=-1) - if not time: - return Array(nda=np.zeros(len(spm_tmin))) - time = ak.fill_none(time, tdefault) - if not time: - time = ak.to_numpy(time, allow_missing=False) - spm_tmin = np.where(time < spm_tmin, time, spm_tmin) + # times are in sample units + times = ak.drop_none( + ak.nan_to_none( + store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as("ak") + ) + ) + + mask = get_spm_mask(lim, tge, tmin, tmax, pe, times) + + # apply mask and convert sample units to ns + time_lst.append(times[mask] * 16) + + time_all = ak.concatenate(time_lst, axis=-1) + out = ak.min(time_all, axis=-1) + + # Convert to 1D numpy array + out = ak.to_numpy(ak.fill_none(out, np.inf), allow_missing=False) + tge = ak.to_numpy(tge, allow_missing=False) - return Array(spm_tmin - tge) + return Array(out - tge) diff --git a/tests/evt/configs/module-test-evt-config.json b/tests/evt/configs/module-test-evt-config.json index d0ea1bc68..6aba3bf75 100644 --- a/tests/evt/configs/module-test-evt-config.json +++ b/tests/evt/configs/module-test-evt-config.json @@ -51,7 +51,7 @@ "lar_classifier": { "channels": "spms_on", "aggregation_mode": "function", - "expression": ".modules.spm.get_etc(0.5,evt.t0,48000,100,6000,80,1)" + "expression": ".modules.spm.get_etc(0.5,evt.t0,48000,100,6000,80,1,0)" }, "lar_energy_dplms": { "channels": "spms_on", diff --git a/tests/evt/configs/module-test-t0-vov-evt-config.json b/tests/evt/configs/module-test-t0-vov-evt-config.json index d31e9717a..5d1c6f256 100644 --- a/tests/evt/configs/module-test-t0-vov-evt-config.json +++ b/tests/evt/configs/module-test-t0-vov-evt-config.json @@ -51,7 +51,7 @@ "lar_classifier": { "channels": "spms_on", "aggregation_mode": "function", - "expression": ".modules.spm.get_etc(0.5,evt.t0,48000,100,6000,80,1)" + "expression": ".modules.spm.get_etc(0.5,evt.t0,48000,100,6000,80,1,0)" }, "lar_energy_dplms": { "channels": "spms_on", From d35976985c37bd44aae769b5f7057b2d91da1fc5 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Thu, 18 Jan 2024 18:52:15 +0100 Subject: [PATCH 136/191] add possibility to add atributes to evt LGDO --- src/pygama/evt/build_evt.py | 9 +++++++++ tests/evt/configs/basic-evt-config.json | 3 ++- tests/evt/test_build_evt.py | 6 +++++- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index b37f5a3a5..c39ddffb4 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -1116,6 +1116,11 @@ def build_evt( if "parameters" in v.keys(): var = var | v["parameters"] res = table.eval(v["expression"].replace("evt.", ""), var) + + # add attribute if present + if "lgdo_attrs" in v.keys(): + res.attrs |= v["lgdo_attrs"] + table.add_field(k, res) # Else we build the event entry @@ -1169,6 +1174,10 @@ def build_evt( srter, ) + # add attribute if present + if "lgdo_attrs" in v.keys(): + obj.attrs |= v["lgdo_attrs"] + table.add_field(k, obj) # write output fields into f_evt diff --git a/tests/evt/configs/basic-evt-config.json b/tests/evt/configs/basic-evt-config.json index 0c82c673f..3a8c62753 100644 --- a/tests/evt/configs/basic-evt-config.json +++ b/tests/evt/configs/basic-evt-config.json @@ -21,7 +21,8 @@ "aggregation_mode": "sum", "expression": "hit.cuspEmax_ctc_cal > a", "parameters": { "a": 25 }, - "initial": 0 + "initial": 0, + "lgdo_attrs": { "statement": "0bb decay is real" } }, "energy": { "channels": "geds_on", diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 64ad133ed..2a7269e9d 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -28,7 +28,11 @@ def test_basics(lgnd_test_data, tmptestdir): group="/evt/", tcm_group="hardware_tcm_1", ) - + assert "statement" in store.read("/evt/multiplicity", outfile)[0].getattrs().keys() + assert ( + store.read("/evt/multiplicity", outfile)[0].getattrs()["statement"] + == "0bb decay is real" + ) assert os.path.exists(outfile) assert len(lh5.ls(outfile, "/evt/")) == 11 nda = { From c3324478288117b3b440f0ce8e0dfa32c50a5d82 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Thu, 18 Jan 2024 18:58:13 +0100 Subject: [PATCH 137/191] removed explicit typing in a spm module function to be compatible with Python 3.9 --- src/pygama/evt/modules/spm.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index cd1cfb812..96a1098b6 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -20,7 +20,7 @@ # get an 1D akward array from 0 to 2D array # casted by minimum of a 2D array def cast_trigger( - trgr: int | float | Array | VectorOfVectors | ak.Array, + trgr, tdefault: float, length: int = None, ) -> ak.Array: @@ -80,7 +80,7 @@ def get_masked_tcm_idx( arr_lst = [] - if isinstance(trgr, (float | int)): + if isinstance(trgr, (float, int)): tge = cast_trigger(trgr, tdefault, length=np.max(idx) + 1) else: tge = cast_trigger(trgr, tdefault, length=None) @@ -136,7 +136,7 @@ def get_spm_ene_or_maj(f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, mode) idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") out = np.zeros(np.max(idx) + 1) - if isinstance(trgr, (float | int)): + if isinstance(trgr, (float, int)): tge = cast_trigger(trgr, tdefault, length=np.max(idx) + 1) else: tge = cast_trigger(trgr, tdefault, length=None) @@ -257,7 +257,7 @@ def get_etc( pe_lst = [] time_lst = [] - if isinstance(trgr, (float | int)): + if isinstance(trgr, (float, int)): tge = cast_trigger(trgr, tdefault, length=np.max(idx) + 1) else: tge = cast_trigger(trgr, tdefault, length=None) @@ -317,7 +317,7 @@ def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") time_lst = [] - if isinstance(trgr, (float | int)): + if isinstance(trgr, (float, int)): tge = cast_trigger(trgr, tdefault, length=np.max(idx) + 1) else: tge = cast_trigger(trgr, tdefault, length=None) From 978836ef5757813c1addeb19e1938330dc71ebbd Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 28 Nov 2023 13:01:23 +0100 Subject: [PATCH 138/191] routine to create dplms dictionary for Ge processing --- src/pygama/pargen/dplms_ge_dict.py | 732 +++++++++++++++++++++++ src/pygama/pargen/energy_optimisation.py | 57 +- 2 files changed, 760 insertions(+), 29 deletions(-) create mode 100644 src/pygama/pargen/dplms_ge_dict.py diff --git a/src/pygama/pargen/dplms_ge_dict.py b/src/pygama/pargen/dplms_ge_dict.py new file mode 100644 index 000000000..8651ddc2f --- /dev/null +++ b/src/pygama/pargen/dplms_ge_dict.py @@ -0,0 +1,732 @@ +""" +This module is for creating dplms dictionary for ge processing +""" + +from __future__ import annotations + +import itertools +import json +import logging +import os +import pathlib +import pickle +import time +from collections import OrderedDict + +import lgdo +import lgdo.lh5_store as lh5 +import matplotlib.pyplot as plt +import numpy as np +from lgdo import Array +from scipy.signal import convolve, convolve2d + +from pygama.math.histogram import get_hist +from pygama.math.peak_fitting import ( + extended_gauss_step_pdf, + extended_radford_pdf, + gauss_step_pdf, + radford_pdf, +) +from pygama.pargen.cuts import find_pulser_properties, generate_cuts, get_cut_indexes +from pygama.pargen.dsp_optimize import run_one_dsp +from pygama.pargen.energy_cal import hpge_find_E_peaks +from pygama.pargen.energy_optimisation import ( + event_selection, + fom_FWHM, + fom_FWHM_with_dt_corr_fit, + index_data, +) +from pygama.pargen.noise_optimization import calculate_spread + +log = logging.getLogger(__name__) +sto = lh5.LH5Store() + + +def dplms_ge_dict( + lh5_path: str, + fft_files: list[str], + cal_files: list[str], + dsp_config: dict, + par_dsp: dict, + par_dsp_lh5: str, + dplms_dict: dict, + decay_const: float = 0, + ene_par: str = "dplmsEmax", + display: int = 0, +) -> dict: + """ + This function calculates the dplms dictionary for HPGe detectors. + + Parameters + ---------- + lh5_path: str + Name of channel to process, should be name of lh5 group in raw files + fft_files : list[str] + raw files with fft data + cal_files : list[str] + raw files with cal data + dsp_config: dict + dsp config file + par_dsp: dict + Dictionary with db parameters for dsp processing + par_dsp_lh5: str + Path for saving dplms coefficients + dplms_dict: dict + Dictionary with various parameters + + Returns + ------- + out_dict : dict + """ + + t0 = time.time() + log.info(f"\nSelecting baselines") + raw_bls = load_data( + fft_files, + lh5_path, + "bls", + n_events=dplms_dict["n_baselines"], + raw_wf_field=dplms_dict["raw_wf_field"], + ) + + dsp_bls = run_one_dsp(raw_bls, dsp_config, db_dict=par_dsp[lh5_path]) + cut_dict = generate_cuts(dsp_bls, parameters=dplms_dict["bls_cut_pars"]) + idxs = get_cut_indexes(dsp_bls, cut_dict) + bl_field = dplms_dict["bl_field"] + log.info(f"... {len(dsp_bls[bl_field].values.nda[idxs,:])} baselines after cuts") + + bls = dsp_bls[bl_field].values.nda[idxs, : dplms_dict["bsize"]] + bls_par = {} + bls_cut_pars = [par for par in dplms_dict["bls_cut_pars"].keys()] + for par in bls_cut_pars: + bls_par[par] = dsp_bls[par].nda + t1 = time.time() + log.info( + f"total events {len(raw_bls)}, {len(bls)} baseline selected in {(t1-t0):.2f} s" + ) + + log.info( + "\nCalculating noise matrix of length", + dplms_dict["length"], + "n. events", + bls.shape[0], + "size", + bls.shape[1], + ) + nmat = noise_matrix(bls, dplms_dict["length"]) + t2 = time.time() + log.info(f"Time to calculate noise matrix {(t2-t1):.2f} s") + + log.info("\nSelecting signals") + peaks_keV = np.array(dplms_dict["peaks_keV"]) + wsize = dplms_dict["wsize"] + wf_field = dplms_dict["wf_field"] + kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]] + + raw_cal, idx_list = event_selection( + cal_files, + f"{lh5_path}/raw", + dsp_config, + par_dsp[lh5_path], + peaks_keV, + np.arange(0, len(peaks_keV), 1).tolist(), + kev_widths, + cut_parameters=dplms_dict["wfs_cut_pars"], + n_events=dplms_dict["n_signals"], + ) + t3 = time.time() + log.info( + f"Time to run event selection {(t3-t2):.2f} s, total events {len(raw_cal)}" + ) + + raw_cal = index_data(raw_cal, idx_list[-1]) + log.info(f"Produce dsp data for {len(raw_cal)} events") + dsp_cal = run_one_dsp(raw_cal, dsp_config, db_dict=par_dsp[lh5_path]) + t4 = time.time() + log.info(f"Time to run dsp production {(t4-t3):.2f} s") + + # minimal processing chain + with open(dsp_config) as r: + dsp_config = json.load(r) + dsp_config["outputs"] = [ene_par, "dt_eff"] + + # dictionary for peak fitting + peak_dict = { + "peak": peaks_keV[-1], + "kev_width": kev_widths[-1], + "parameter": ene_par, + "func": extended_gauss_step_pdf, + "gof_func": gauss_step_pdf, + } + + if display > 0: + plot_dict = {} + plot_dict["dplms"] = {} + fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white") + + # penalized coefficients + dp_coeffs = dplms_dict["dp_coeffs"] + if lh5_path in dplms_dict["noisy_bl"]: + log.info("Setting explicit zero area condition") + za_coeff = dp_coeffs["za"] + else: + za_coeff = dplms_dict["dp_def"]["za"] + dp_coeffs.pop("za") + coeff_keys = [key for key in dp_coeffs.keys()] + lists = [dp_coeffs[key] for key in dp_coeffs.keys()] + + prod = list(itertools.product(*lists)) + grid_dict = {} + min_fom = float("inf") + min_idx = None + + for i, values in enumerate(prod): + coeff_values = dict(zip(coeff_keys, values)) + + log.info( + "\nCase", + i, + "->", + ", ".join(f"{key} = {value}" for key, value in coeff_values.items()), + ) + grid_dict[i] = coeff_values + + sel_dict = signal_selection(dsp_cal, dplms_dict, coeff_values) + wfs = dsp_cal[wf_field].nda[sel_dict["idxs"], :] + log.info(f"... {len(wfs)} signals after signal selection") + + ref, rmat, pmat, fmat = signal_matrices(wfs, dplms_dict["length"], decay_const) + + t_tmp = time.time() + nm_coeff = coeff_values["nm"] + ft_coeff = coeff_values["ft"] + x, y, refy = filter_synthesis( + ref, + nm_coeff * nmat, + rmat, + za_coeff, + pmat, + ft_coeff * fmat, + dplms_dict["length"], + wsize, + ) + par_dsp[lh5_path]["dplms"] = {} + par_dsp[lh5_path]["dplms"]["length"] = dplms_dict["length"] + par_dsp[lh5_path]["dplms"]["coefficients"] = x.tolist() + log.info( + f"Filter synthesis in {time.time()-t_tmp:.1f} s, filter area", np.sum(x) + ) + + t_tmp = time.time() + dsp_opt = run_one_dsp(raw_bls, dsp_config, db_dict=par_dsp[lh5_path]) + energies = dsp_opt[ene_par].nda + enc_results = calculate_spread(energies, 10, 90, 1000) + enc, enc_err = enc_results["fom"], enc_results["fom_err"] + log.info( + f"ENC: mean = {energies.mean():.2f} ADC, FOM = {enc:.2f} ± {enc_err:.2f} ADC, evaluated in {time.time()-t_tmp:.1f} s" + ) + grid_dict[i]["enc"] = enc + grid_dict[i]["enc_err"] = enc_err + + if display > 0: + hist, bins, var = get_hist(energies, range=(-20, 20), dx=0.1) + bc = (bins[:-1] + bins[1:]) / 2.0 + ax.plot( + bc, + hist, + ds="steps", + label=f"{ene_par} - ENC = {enc:.3f} ± {enc_err:.3f} ADC", + ) + ax.set_xlabel("energy (ADC)") + ax.set_ylabel("counts") + ax.legend(loc="upper right") + + t_tmp = time.time() + dsp_opt = run_one_dsp(raw_cal, dsp_config, db_dict=par_dsp[lh5_path]) + + try: + res = fom_FWHM_with_dt_corr_fit( + dsp_opt, + peak_dict, + "QDrift", + idxs=np.where(~np.isnan(dsp_opt["dt_eff"].nda))[0], + ) + except: + log.debug("FWHM not calculated") + continue + + fwhm, fwhm_err, alpha, chisquare = ( + res["fwhm"], + res["fwhm_err"], + res["alpha"], + res["chisquare"], + ) + log.info( + f"FWHM = {fwhm:.2f} ± {fwhm_err:.2f} keV, evaluated in {time.time()-t_tmp:.1f} s" + ) + + grid_dict[i]["fwhm"] = fwhm + grid_dict[i]["fwhm_err"] = fwhm_err + grid_dict[i]["alpha"] = alpha + + if ( + fwhm < dplms_dict["fwhm_limit"] + and fwhm_err < dplms_dict["err_limit"] + and chisquare < dplms_dict["chi_limit"] + ): + if fwhm < min_fom: + min_idx, min_fom = i, fwhm + + if min_idx is not None: + min_result = grid_dict[min_idx] + best_case_values = {key: min_result[key] for key in min_result.keys()} + + enc = best_case_values.get("enc", None) + enc_err = best_case_values.get("enc_err", 0) + fwhm = best_case_values.get("fwhm", None) + fwhm_err = best_case_values.get("fwhm_err", 0) + alpha = best_case_values.get("alpha", 0) + nm_coeff = best_case_values.get("nm", dplms_dict["dp_def"]["nm"]) + ft_coeff = best_case_values.get("ft", dplms_dict["dp_def"]["nm"]) + rt_coeff = best_case_values.get("rt", dplms_dict["dp_def"]["rt"]) + pt_coeff = best_case_values.get("pt", dplms_dict["dp_def"]["pt"]) + + if all( + v is not None + for v in [ + enc, + enc_err, + fwhm, + fwhm_err, + alpha, + nm_coeff, + ft_coeff, + rt_coeff, + pt_coeff, + ] + ): + log.info( + f"\nBest case: FWHM = {fwhm:.2f} ± {fwhm_err:.2f} keV, ctc {alpha}" + ) + else: + log.error("Some values are missing in the best case results") + else: + log.error("Filter synthesis failed") + nm_coeff = dplms_dict["dp_def"]["nm"] + ft_coeff = dplms_dict["dp_def"]["ft"] + rt_coeff = dplms_dict["dp_def"]["rt"] + pt_coeff = dplms_dict["dp_def"]["pt"] + + # filter synthesis + sel_dict = signal_selection(dsp_cal, dplms_dict, best_case_values) + idxs = sel_dict["idxs"] + wfs = dsp_cal[wf_field].nda[idxs, :] + ref, rmat, pmat, fmat = signal_matrices(wfs, dplms_dict["length"], decay_const) + + x, y, refy = filter_synthesis( + ref, + nm_coeff * nmat, + rmat, + za_coeff, + pmat, + ft_coeff * fmat, + dplms_dict["length"], + wsize, + ) + + sto.write_object( + Array(x), + name="dplms", + lh5_file=par_dsp_lh5, + wo_mode="overwrite", + group=lh5_path, + ) + + out_dict = { + "dplms": { + "length": dplms_dict["length"], + "coefficients": f"loadlh5('{par_dsp_lh5}', '{lh5_path}/dplms')", + "dp_coeffs": { + "nm": nm_coeff, + "za": za_coeff, + "ft": ft_coeff, + "rt": rt_coeff, + "pt": pt_coeff, + }, + } + } + out_alpha_dict = { + f"{ene_par}_ctc": { + "expression": f"{ene_par}*(1+dt_eff*a)", + "parameters": {"a": round(alpha, 9)}, + } + } + out_dict.update({"ctc_params": out_alpha_dict}) + + log.info(f"Time to complete DPLMS filter synthesis {time.time()-t0:.1f}") + + if display > 0: + plot_dict["dplms"]["enc_hist"] = fig + plot_dict["dplms"]["enc"] = enc + plot_dict["dplms"]["enc_err"] = enc_err + plot_dict["dplms"]["ref"] = ref + plot_dict["dplms"]["coefficients"] = x + + bl_idxs = np.random.choice(len(bls), dplms_dict["n_plot"]) + bls = bls[bl_idxs] + fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white") + for ii, wf in enumerate(bls): + if ii < 10: + ax.plot(wf, label=f"mean = {wf.mean():.1f}") + else: + ax.plot(wf) + ax.legend(title=f"{lh5_path}", loc="upper right") + plot_dict["dplms"]["bls"] = fig + fig, ax = plt.subplots(nrows=2, ncols=3, figsize=(16, 9), facecolor="white") + for ii, par in enumerate(bls_cut_pars): + mean = cut_dict[par]["Mean Value"] + llo, lup = cut_dict[par]["Lower Boundary"], cut_dict[par]["Upper Boundary"] + plo, pup = mean - 2 * (mean - llo), mean + 2 * (lup - mean) + hh, bb = np.histogram(bls_par[par], bins=np.linspace(plo, pup, 200)) + ax.flat[ii].plot(bb[1:], hh, ds="steps", label=f"cut on {par}") + ax.flat[ii].axvline(lup, color="k", linestyle=":", label="selection") + ax.flat[ii].axvline(llo, color="k", linestyle=":") + ax.flat[ii].set_xlabel(par) + ax.flat[ii].set_yscale("log") + ax.flat[ii].legend(title=f"{lh5_path}", loc="upper right") + plot_dict["dplms"]["bl_sel"] = fig + + wf_idxs = np.random.choice(len(wfs), dplms_dict["n_plot"]) + wfs = wfs[wf_idxs] + peak_pos = dsp_cal["peak_pos"].nda + peak_pos_neg = dsp_cal["peak_pos_neg"].nda + centroid = dsp_cal["centroid"].nda + risetime = dsp_cal["tp_90"].nda - dsp_cal["tp_10"].nda + rt_low = dplms_dict["rt_low"] + rt_high = dplms_dict["rt_high"] + peak_lim = dplms_dict["peak_lim"] + cal_par = {} + wfs_cut_pars = [par for par in dplms_dict["wfs_cut_pars"].keys()] + for par in wfs_cut_pars: + cal_par[par] = dsp_cal[par].nda + fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white") + for ii, wf in enumerate(wfs): + if ii < 10: + ax.plot(wf, label=f"centr = {centroid[ii]}") + else: + ax.plot(wf) + ax.legend(title=f"{lh5_path}", loc="upper right") + axin = ax.inset_axes([0.1, 0.15, 0.35, 0.5]) + for wf in wfs: + axin.plot(wf) + axin.set_xlim(wsize / 2 - dplms_dict["zoom"], wsize / 2 + dplms_dict["zoom"]) + axin.set_yticklabels("") + plot_dict["dplms"]["wfs"] = fig + fig, ax = plt.subplots(nrows=2, ncols=3, figsize=(16, 9), facecolor="white") + wfs_cut_pars.append("centroid") + wfs_cut_pars.append("peak_pos") + wfs_cut_pars.append("risetime") + for ii, par in enumerate(wfs_cut_pars): + pspace = np.linspace( + wsize / 2 - peak_lim, wsize / 2 + peak_lim, 2 * peak_lim + ) + if par == "centroid": + llo, lup = sel_dict["ct_ll"], sel_dict["ct_hh"] + hh, bb = np.histogram(centroid, bins=pspace) + elif par == "peak_pos": + llo, lup = sel_dict["pp_ll"], sel_dict["pp_hh"] + hh, bb = np.histogram(peak_pos, bins=pspace) + elif par == "risetime": + llo, lup = sel_dict["rt_ll"], sel_dict["rt_hh"] + rt_bins = int((rt_high - rt_low) / dplms_dict["period"]) + rt_space = np.linspace(rt_low, rt_high, rt_bins) + hh, bb = np.histogram(risetime, bins=rt_space) + else: + llo, lup = np.min(cal_par[par]), np.max(cal_par[par]) + hh, bb = np.histogram(cal_par[par], bins=np.linspace(llo, lup, 200)) + ax.flat[ii + 1].plot(bb[1:], hh, ds="steps", label=f"cut on {par}") + ax.flat[ii + 1].axvline( + llo, color="k", linestyle=":", label=f"sel. {llo:.1f} {lup:.1f}" + ) + if par != "centroid": + ax.flat[ii + 1].axvline(lup, color="k", linestyle=":") + ax.flat[ii + 1].set_xlabel(par) + ax.flat[ii + 1].set_yscale("log") + ax.flat[ii + 1].legend(title=f"{lh5_path}", loc="upper right") + roughenergy = dsp_cal["trapTmax"].nda + roughenergy_sel = roughenergy[idxs] + ell, ehh = roughenergy.min(), roughenergy.max() + he, be = np.histogram(roughenergy, bins=np.linspace(ell, ehh, 1000)) + hs, be = np.histogram(roughenergy_sel, bins=np.linspace(ell, ehh, 1000)) + ax.flat[0].plot(be[1:], he, c="b", ds="steps", label="initial") + ax.flat[0].plot(be[1:], hs, c="r", ds="steps", label="selected") + ax.flat[0].set_xlabel("rough energy (ADC)") + ax.flat[0].set_yscale("log") + ax.flat[0].legend(loc="upper right", title=f"{lh5_path}") + plot_dict["dplms"]["wf_sel"] = fig + + fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white") + ax.plot(np.flip(x), "r-", label=f"filter") + ax.axhline(0, color="black", linestyle=":") + ax.legend(loc="upper right", title=f"{lh5_path}") + axin = ax.inset_axes([0.6, 0.1, 0.35, 0.33]) + axin.plot(np.flip(x), "r-") + axin.set_xlim( + dplms_dict["length"] / 2 - dplms_dict["zoom"], + dplms_dict["length"] / 2 + dplms_dict["zoom"], + ) + axin.set_yticklabels("") + ax.indicate_inset_zoom(axin) + + return out_dict, plot_dict + else: + return out_dict + + +def load_data( + raw_file: list[str], + lh5_path: str, + sel_type: str, + peaks: np.array = [], + n_events: int = 5000, + e_lower_lim: float = 1200, + e_upper_lim: float = 2700, + raw_wf_field: str = "waveform", +) -> lgdo.Table: + sto = lh5.LH5Store() + df = lh5.load_dfs(raw_file, ["daqenergy", "timestamp"], f"{lh5_path}/raw") + + if sel_type == "bls": + cuts = np.where(df.daqenergy.values == 0)[0] + idx_list = [] + waveforms = sto.read_object( + f"{lh5_path}/raw/{raw_wf_field}", raw_file, n_rows=n_events, idx=cuts + )[0] + daqenergy = sto.read_object( + f"{lh5_path}/raw/daqenergy", raw_file, n_rows=n_events, idx=cuts + )[0] + tb_data = lh5.Table(col_dict={"waveform": waveforms, "daqenergy": daqenergy}) + return tb_data + else: + pulser_props = find_pulser_properties(df, energy="daqenergy") + if len(pulser_props) > 0: + final_mask = None + for entry in pulser_props: + pulser_e, pulser_err = entry[0], entry[1] + if pulser_err < 10: + pulser_err = 10 + e_cut = (df.daqenergy.values < pulser_e + pulser_err) & ( + df.daqenergy.values > pulser_e - pulser_err + ) + if final_mask is None: + final_mask = e_cut + else: + final_mask = final_mask | e_cut + ids = final_mask + log.debug(f"pulser found: {pulser_props}") + else: + log.debug("no pulser") + ids = np.zeros(len(df.daqenergy.values), dtype=bool) + if sel_type == "pul": + cuts = np.where(ids == True)[0] + log.debug(f"{len(cuts)} events found for pulser") + waveforms = sto.read_object( + f"{lh5_path}/raw/waveform", raw_file, n_rows=n_events, idx=cuts + )[0] + daqenergy = sto.read_object( + f"{lh5_path}/raw/daqenergy", raw_file, n_rows=n_events, idx=cuts + )[0] + tb_data = lh5.Table( + col_dict={"waveform": waveforms, "daqenergy": daqenergy} + ) + return tb_data + else: + # Get events around peak using raw file values + initial_mask = (df.daqenergy.values > 0) & (~ids) + rough_energy = df.daqenergy.values[initial_mask] + initial_idxs = np.where(initial_mask)[0] + + guess_keV = 2620 / np.nanpercentile(rough_energy, 99) + Euc_min = 0 # threshold / guess_keV * 0.6 + Euc_max = 2620 / guess_keV * 1.1 + dEuc = 1 # / guess_keV + hist, bins, var = get_hist(rough_energy, range=(Euc_min, Euc_max), dx=dEuc) + detected_peaks_locs, detected_peaks_keV, roughpars = hpge_find_E_peaks( + hist, bins, var, peaks + ) + log.debug( + f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}" + ) + e_lower_lim = (e_lower_lim - roughpars[1]) / roughpars[0] + e_upper_lim = (e_upper_lim - roughpars[1]) / roughpars[0] + log.debug(f"lower_lim: {e_lower_lim}, upper_lim: {e_upper_lim}") + mask = (rough_energy > e_lower_lim) & (rough_energy < e_upper_lim) + cuts = initial_idxs[mask][:] + log.debug(f"{len(cuts)} events found in energy range") + rough_energy = rough_energy[mask] + rough_energy = rough_energy[:n_events] + rough_energy = rough_energy * roughpars[0] + roughpars[1] + waveforms = sto.read_object( + f"{lh5_path}/raw/waveform", raw_file, n_rows=n_events, idx=cuts + )[0] + daqenergy = sto.read_object( + f"{lh5_path}/raw/daqenergy", raw_file, n_rows=n_events, idx=cuts + )[0] + tb_data = lh5.Table( + col_dict={"waveform": waveforms, "daqenergy": daqenergy} + ) + return tb_data, rough_energy + + +def is_valid_centroid( + centroid: np.array, lim: int, size: int, full_size: int +) -> list[bool]: + llim = size / 2 - lim + hlim = full_size - size / 2 + idxs = (centroid > llim) & (centroid < hlim) + return idxs, llim, hlim + + +def is_not_pile_up( + peak_pos: np.array, peak_pos_neg: np.array, thr: int, lim: int, size: int +) -> list[bool]: + bin_edges = np.linspace(size / 2 - lim, size / 2 + lim, 2 * lim) + hist, bin_edges = np.histogram(peak_pos, bins=bin_edges) + + thr = thr * hist.max() / 100 + low_thr_idxs = np.where(hist[: hist.argmax()] < thr)[0] + upp_thr_idxs = np.where(hist[hist.argmax() :] < thr)[0] + + idx_low = low_thr_idxs[-1] if low_thr_idxs.size > 0 else 0 + idx_upp = ( + upp_thr_idxs[0] + hist.argmax() if upp_thr_idxs.size > 0 else len(hist) - 1 + ) + + llow, lupp = bin_edges[idx_low], bin_edges[idx_upp] + + idxs = [] + for n, nn in zip(peak_pos, peak_pos_neg): + condition1 = np.count_nonzero(n > 0) == 1 + condition2 = ( + np.count_nonzero((n > 0) & ((n < llow) | (n > lupp) & (n < size))) == 0 + ) + condition3 = np.count_nonzero(nn > 0) == 0 + idxs.append(condition1 and condition2 and condition3) + return idxs, llow, lupp + + +def is_valid_risetime(risetime: np.array, llim: int, perc: float): + hlim = np.percentile(risetime[~np.isnan(risetime)], perc) + idxs = (risetime >= llim) & (risetime <= hlim) + return idxs, llim, hlim + + +def signal_selection(dsp_cal, dplms_dict, coeff_values): + peak_pos = dsp_cal["peak_pos"].nda + peak_pos_neg = dsp_cal["peak_pos_neg"].nda + centroid = dsp_cal["centroid"].nda + risetime = dsp_cal["tp_90"].nda - dsp_cal["tp_10"].nda + + rt_low = dplms_dict["rt_low"] + rt_high = dplms_dict["rt_high"] + peak_lim = dplms_dict["peak_lim"] + wsize = dplms_dict["wsize"] + bsize = dplms_dict["bsize"] + + centroid_lim = dplms_dict["centroid_lim"] + if "rt" in coeff_values: + perc = coeff_values["rt"] + else: + perc = dplms_dict["dp_def"]["rt"] + if "pt" in coeff_values: + thr = coeff_values["pt"] + else: + thr = dplms_dict["dp_def"]["rt"] + + idxs_ct, ct_ll, ct_hh = is_valid_centroid(centroid, centroid_lim, wsize, bsize) + log.info(f"... {len(peak_pos[idxs_ct,:])} signals after alignment") + + idxs_pp, pp_ll, pp_hh = is_not_pile_up(peak_pos, peak_pos_neg, thr, peak_lim, wsize) + log.info(f"... {len(peak_pos[idxs_pp,:])} signals after pile-up cut") + + idxs_rt, rt_ll, rt_hh = is_valid_risetime(risetime, rt_low, perc) + log.info(f"... {len(peak_pos[idxs_rt,:])} signals after risetime cut") + + idxs = idxs_ct & idxs_pp & idxs_rt + sel_dict = { + "idxs": idxs, + "ct_ll": ct_ll, + "ct_hh": ct_hh, + "pp_ll": pp_ll, + "pp_hh": pp_hh, + "rt_ll": rt_ll, + "rt_hh": rt_hh, + } + return sel_dict + + +def noise_matrix(bls: np.array, length: int) -> np.array: + nev, size = bls.shape + ref = np.mean(bls, axis=0) + offset = np.mean(ref) + bls = bls - offset + nmat = np.matmul(bls.T, bls, dtype=float) / nev + kernel = np.identity(size - length + 1) + nmat = convolve2d(nmat, kernel, boundary="symm", mode="valid") / (size - length + 1) + return nmat + + +def signal_matrices( + wfs: np.array, length: int, decay_const: float, ff: int = 2 +) -> np.array: + nev, size = wfs.shape + lo = size // 2 - 100 + flo = size // 2 - length // 2 + fhi = size // 2 + length // 2 + offsets = np.mean(wfs[:, :lo], axis=1) + wfs = wfs - offsets[:, np.newaxis] + + # Reference signal + ref = np.sum(wfs, axis=0) + ref /= np.max(ref) + rmat = np.outer(ref[flo:fhi], ref[flo:fhi]) + + # Pile-up matrix + if decay_const > 0: + decay = np.exp(-np.arange(length) / decay_const) + else: + decay = np.zeros(length) + pmat = np.outer(decay, decay) + + # Flat top matrix + flo -= ff // 2 + fhi += ff // 2 + wfs = wfs[:, flo:fhi] + fmat = np.matmul(wfs.T, wfs, dtype=float) / nev + m1 = ((1, -1), (-1, 1)) + fmat = convolve2d(fmat, m1, boundary="symm", mode="valid") + if ff > 0: + fmat = convolve2d(fmat, np.identity(ff), boundary="symm", mode="valid") / ff + return ref, rmat, pmat, fmat + + +def filter_synthesis( + ref: np.array, + nmat: np.array, + rmat: np.array, + za: int, + pmat: np.array, + fmat: np.array, + length: int, + size: int, +) -> np.array: + mat = nmat + rmat + za * np.ones([length, length]) + pmat + fmat + flo = (size // 2) - (length // 2) + fhi = (size // 2) + (length // 2) + x = np.linalg.solve(mat, ref[flo:fhi]) + y = convolve(ref, np.flip(x), mode="valid") + maxy = np.max(y) + x /= maxy + y /= maxy + refy = ref[(size // 2) - (len(y) // 2) : (size // 2) + (len(y) // 2)] + return x, y, refy diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index 1c34901d9..5da39c84f 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -857,6 +857,7 @@ def fom_FWHM_fit(tb_in, kwarg_dict): csqr, n_sig, n_sig_err, + _, ) = get_peak_fwhm_with_dt_corr( Energies, alpha, dt, func, gof_func, peak=peak, kev_width=kev_width, kev=True ) @@ -938,6 +939,7 @@ def event_selection( else: final_mask = final_mask | e_cut ids = final_mask + print(f"pulser found: {pulser_props}") log.debug(f"pulser found: {pulser_props}") else: log.debug("no_pulser") @@ -950,18 +952,14 @@ def event_selection( initial_idxs = np.where(initial_mask)[0] guess_keV = 2620 / np.nanpercentile(rough_energy, 99) - Euc_min = threshold / guess_keV + Euc_min = 0 # threshold / guess_keV Euc_max = 2620 / guess_keV * 1.1 - dEuc = 5 / guess_keV + dEuc = 1 / guess_keV hist, bins, var = pgh.get_hist(rough_energy, range=(Euc_min, Euc_max), dx=dEuc) detected_peaks_locs, detected_peaks_keV, roughpars = pgc.hpge_find_E_peaks( - hist, - bins, - var, - np.array( - [238.632, 583.191, 727.330, 860.564, 1592.5, 1620.5, 2103.53, 2614.553] - ), + hist, bins, var, peaks_keV, n_sigma=3 ) + print(f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}") log.debug(f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}") masks = [] @@ -1075,11 +1073,11 @@ def event_selection( return out_events, idx_list -def fwhm_slope(x, m0, m1, m2): +def fwhm_slope(x, m0, m1): """ Fit the energy resolution curve """ - return np.sqrt(m0 + m1 * x + m2 * (x**2)) + return np.sqrt(m0 + m1 * x) def interpolate_energy(peak_energies, points, err_points, energy): @@ -1087,7 +1085,7 @@ def interpolate_energy(peak_energies, points, err_points, energy): if len(points[~nan_mask]) < 3: return np.nan, np.nan, np.nan else: - param_guess = [2, 0.001, 0.000001] # + param_guess = [2, 0.001] # param_bounds = (0, [10., 1. ])# try: fit_pars, fit_covs = curve_fit( @@ -1137,6 +1135,11 @@ def fom_FWHM(tb_in, kwarg_dict, ctc_parameter, alpha, idxs=None, display=0): dt = np.subtract(tb_in["tp_99"].nda, tb_in["tp_0_est"].nda, dtype="float64") elif ctc_parameter == "rt": dt = np.subtract(tb_in["tp_99"].nda, tb_in["tp_01"].nda, dtype="float64") + + if idxs is not None: + Energies = Energies[idxs] + dt = dt[idxs] + if np.isnan(Energies).any() or np.isnan(dt).any(): if np.isnan(Energies).any(): log.debug(f"nan energy values for peak {peak}") @@ -1151,10 +1154,6 @@ def fom_FWHM(tb_in, kwarg_dict, ctc_parameter, alpha, idxs=None, display=0): "n_sig_err": np.nan, } - if idxs is not None: - Energies = Energies[idxs] - dt = dt[idxs] - # Return fwhm of optimal alpha in kev with error try: ( @@ -1207,39 +1206,37 @@ def single_peak_fom(data, kwarg_dict): return out_dict -def new_fom(data, kwarg_dict): +def new_fom(data, kwarg_dict, alpha=None): peaks = kwarg_dict["peaks_keV"] idx_list = kwarg_dict["idx_list"] ctc_param = kwarg_dict["ctc_param"] peak_dicts = kwarg_dict["peak_dicts"] - out_dict = fom_FWHM_with_dt_corr_fit( - data, peak_dicts[-1], ctc_param, idxs=idx_list[-1], display=0 - ) - alpha = out_dict["alpha"] + if alpha is None: + out_dict = fom_FWHM_with_dt_corr_fit( + data, peak_dicts[-1], ctc_param, idxs=idx_list[-1], display=0 + ) + alpha = out_dict["alpha"] + log.info(alpha) fwhms = [] fwhm_errs = [] n_sig = [] n_sig_err = [] - for i, peak in enumerate(peaks[:-1]): + chisquares = [] + for i, peak in enumerate(peaks): out_peak_dict = fom_FWHM( data, peak_dicts[i], ctc_param, alpha, idxs=idx_list[i], display=0 ) - # n_sig_minimum = peak_dicts[i]["n_sig_minimum"] - # if peak_dict["n_sig"] Date: Tue, 28 Nov 2023 15:41:39 +0100 Subject: [PATCH 139/191] removed dependency on nopt routine --- src/pygama/pargen/dplms_ge_dict.py | 32 ------------------------------ 1 file changed, 32 deletions(-) diff --git a/src/pygama/pargen/dplms_ge_dict.py b/src/pygama/pargen/dplms_ge_dict.py index 8651ddc2f..71311f4ce 100644 --- a/src/pygama/pargen/dplms_ge_dict.py +++ b/src/pygama/pargen/dplms_ge_dict.py @@ -36,7 +36,6 @@ fom_FWHM_with_dt_corr_fit, index_data, ) -from pygama.pargen.noise_optimization import calculate_spread log = logging.getLogger(__name__) sto = lh5.LH5Store() @@ -217,30 +216,6 @@ def dplms_ge_dict( f"Filter synthesis in {time.time()-t_tmp:.1f} s, filter area", np.sum(x) ) - t_tmp = time.time() - dsp_opt = run_one_dsp(raw_bls, dsp_config, db_dict=par_dsp[lh5_path]) - energies = dsp_opt[ene_par].nda - enc_results = calculate_spread(energies, 10, 90, 1000) - enc, enc_err = enc_results["fom"], enc_results["fom_err"] - log.info( - f"ENC: mean = {energies.mean():.2f} ADC, FOM = {enc:.2f} ± {enc_err:.2f} ADC, evaluated in {time.time()-t_tmp:.1f} s" - ) - grid_dict[i]["enc"] = enc - grid_dict[i]["enc_err"] = enc_err - - if display > 0: - hist, bins, var = get_hist(energies, range=(-20, 20), dx=0.1) - bc = (bins[:-1] + bins[1:]) / 2.0 - ax.plot( - bc, - hist, - ds="steps", - label=f"{ene_par} - ENC = {enc:.3f} ± {enc_err:.3f} ADC", - ) - ax.set_xlabel("energy (ADC)") - ax.set_ylabel("counts") - ax.legend(loc="upper right") - t_tmp = time.time() dsp_opt = run_one_dsp(raw_cal, dsp_config, db_dict=par_dsp[lh5_path]) @@ -281,8 +256,6 @@ def dplms_ge_dict( min_result = grid_dict[min_idx] best_case_values = {key: min_result[key] for key in min_result.keys()} - enc = best_case_values.get("enc", None) - enc_err = best_case_values.get("enc_err", 0) fwhm = best_case_values.get("fwhm", None) fwhm_err = best_case_values.get("fwhm_err", 0) alpha = best_case_values.get("alpha", 0) @@ -294,8 +267,6 @@ def dplms_ge_dict( if all( v is not None for v in [ - enc, - enc_err, fwhm, fwhm_err, alpha, @@ -366,9 +337,6 @@ def dplms_ge_dict( log.info(f"Time to complete DPLMS filter synthesis {time.time()-t0:.1f}") if display > 0: - plot_dict["dplms"]["enc_hist"] = fig - plot_dict["dplms"]["enc"] = enc - plot_dict["dplms"]["enc_err"] = enc_err plot_dict["dplms"]["ref"] = ref plot_dict["dplms"]["coefficients"] = x From e800f393a87b478fbeeda37053c467091b0d216b Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 28 Nov 2023 16:19:52 +0100 Subject: [PATCH 140/191] loading full raw table --- src/pygama/pargen/dplms_ge_dict.py | 98 +++++++++++------------------- 1 file changed, 35 insertions(+), 63 deletions(-) diff --git a/src/pygama/pargen/dplms_ge_dict.py b/src/pygama/pargen/dplms_ge_dict.py index 71311f4ce..3ca56cb66 100644 --- a/src/pygama/pargen/dplms_ge_dict.py +++ b/src/pygama/pargen/dplms_ge_dict.py @@ -80,13 +80,7 @@ def dplms_ge_dict( t0 = time.time() log.info(f"\nSelecting baselines") - raw_bls = load_data( - fft_files, - lh5_path, - "bls", - n_events=dplms_dict["n_baselines"], - raw_wf_field=dplms_dict["raw_wf_field"], - ) + raw_bls = load_data(fft_files, lh5_path, "bls", n_events=dplms_dict["n_baselines"]) dsp_bls = run_one_dsp(raw_bls, dsp_config, db_dict=par_dsp[lh5_path]) cut_dict = generate_cuts(dsp_bls, parameters=dplms_dict["bls_cut_pars"]) @@ -216,6 +210,10 @@ def dplms_ge_dict( f"Filter synthesis in {time.time()-t_tmp:.1f} s, filter area", np.sum(x) ) + t_tmp = time.time() + dsp_opt = run_one_dsp(raw_bls, dsp_config, db_dict=par_dsp[lh5_path]) + energies = dsp_opt[ene_par].nda + t_tmp = time.time() dsp_opt = run_one_dsp(raw_cal, dsp_config, db_dict=par_dsp[lh5_path]) @@ -459,7 +457,6 @@ def load_data( n_events: int = 5000, e_lower_lim: float = 1200, e_upper_lim: float = 2700, - raw_wf_field: str = "waveform", ) -> lgdo.Table: sto = lh5.LH5Store() df = lh5.load_dfs(raw_file, ["daqenergy", "timestamp"], f"{lh5_path}/raw") @@ -467,13 +464,9 @@ def load_data( if sel_type == "bls": cuts = np.where(df.daqenergy.values == 0)[0] idx_list = [] - waveforms = sto.read_object( - f"{lh5_path}/raw/{raw_wf_field}", raw_file, n_rows=n_events, idx=cuts + tb_data = sto.read_object( + f"{lh5_path}/raw", raw_file, n_rows=n_events, idx=cuts )[0] - daqenergy = sto.read_object( - f"{lh5_path}/raw/daqenergy", raw_file, n_rows=n_events, idx=cuts - )[0] - tb_data = lh5.Table(col_dict={"waveform": waveforms, "daqenergy": daqenergy}) return tb_data else: pulser_props = find_pulser_properties(df, energy="daqenergy") @@ -495,55 +488,34 @@ def load_data( else: log.debug("no pulser") ids = np.zeros(len(df.daqenergy.values), dtype=bool) - if sel_type == "pul": - cuts = np.where(ids == True)[0] - log.debug(f"{len(cuts)} events found for pulser") - waveforms = sto.read_object( - f"{lh5_path}/raw/waveform", raw_file, n_rows=n_events, idx=cuts - )[0] - daqenergy = sto.read_object( - f"{lh5_path}/raw/daqenergy", raw_file, n_rows=n_events, idx=cuts - )[0] - tb_data = lh5.Table( - col_dict={"waveform": waveforms, "daqenergy": daqenergy} - ) - return tb_data - else: - # Get events around peak using raw file values - initial_mask = (df.daqenergy.values > 0) & (~ids) - rough_energy = df.daqenergy.values[initial_mask] - initial_idxs = np.where(initial_mask)[0] - - guess_keV = 2620 / np.nanpercentile(rough_energy, 99) - Euc_min = 0 # threshold / guess_keV * 0.6 - Euc_max = 2620 / guess_keV * 1.1 - dEuc = 1 # / guess_keV - hist, bins, var = get_hist(rough_energy, range=(Euc_min, Euc_max), dx=dEuc) - detected_peaks_locs, detected_peaks_keV, roughpars = hpge_find_E_peaks( - hist, bins, var, peaks - ) - log.debug( - f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}" - ) - e_lower_lim = (e_lower_lim - roughpars[1]) / roughpars[0] - e_upper_lim = (e_upper_lim - roughpars[1]) / roughpars[0] - log.debug(f"lower_lim: {e_lower_lim}, upper_lim: {e_upper_lim}") - mask = (rough_energy > e_lower_lim) & (rough_energy < e_upper_lim) - cuts = initial_idxs[mask][:] - log.debug(f"{len(cuts)} events found in energy range") - rough_energy = rough_energy[mask] - rough_energy = rough_energy[:n_events] - rough_energy = rough_energy * roughpars[0] + roughpars[1] - waveforms = sto.read_object( - f"{lh5_path}/raw/waveform", raw_file, n_rows=n_events, idx=cuts - )[0] - daqenergy = sto.read_object( - f"{lh5_path}/raw/daqenergy", raw_file, n_rows=n_events, idx=cuts - )[0] - tb_data = lh5.Table( - col_dict={"waveform": waveforms, "daqenergy": daqenergy} - ) - return tb_data, rough_energy + + # Get events around peak using raw file values + initial_mask = (df.daqenergy.values > 0) & (~ids) + rough_energy = df.daqenergy.values[initial_mask] + initial_idxs = np.where(initial_mask)[0] + + guess_keV = 2620 / np.nanpercentile(rough_energy, 99) + Euc_min = 0 # threshold / guess_keV * 0.6 + Euc_max = 2620 / guess_keV * 1.1 + dEuc = 1 # / guess_keV + hist, bins, var = get_hist(rough_energy, range=(Euc_min, Euc_max), dx=dEuc) + detected_peaks_locs, detected_peaks_keV, roughpars = hpge_find_E_peaks( + hist, bins, var, peaks + ) + log.debug(f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}") + e_lower_lim = (e_lower_lim - roughpars[1]) / roughpars[0] + e_upper_lim = (e_upper_lim - roughpars[1]) / roughpars[0] + log.debug(f"lower_lim: {e_lower_lim}, upper_lim: {e_upper_lim}") + mask = (rough_energy > e_lower_lim) & (rough_energy < e_upper_lim) + cuts = initial_idxs[mask][:] + log.debug(f"{len(cuts)} events found in energy range") + rough_energy = rough_energy[mask] + rough_energy = rough_energy[:n_events] + rough_energy = rough_energy * roughpars[0] + roughpars[1] + tb_data = sto.read_object( + f"{lh5_path}/raw", raw_file, n_rows=n_events, idx=cuts + )[0] + return tb_data, rough_energy def is_valid_centroid( From ffa4617e71f2a97ec09c5219296955922a726db4 Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 28 Nov 2023 16:45:53 +0100 Subject: [PATCH 141/191] smale change on loading data --- src/pygama/pargen/dplms_ge_dict.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/pygama/pargen/dplms_ge_dict.py b/src/pygama/pargen/dplms_ge_dict.py index 3ca56cb66..75ee98e04 100644 --- a/src/pygama/pargen/dplms_ge_dict.py +++ b/src/pygama/pargen/dplms_ge_dict.py @@ -459,10 +459,11 @@ def load_data( e_upper_lim: float = 2700, ) -> lgdo.Table: sto = lh5.LH5Store() - df = lh5.load_dfs(raw_file, ["daqenergy", "timestamp"], f"{lh5_path}/raw") + + daqenergy = sto.read_object(f"{lh5_path}/raw/daqenergy", raw_file)[0].nda if sel_type == "bls": - cuts = np.where(df.daqenergy.values == 0)[0] + cuts = np.where(daqenergy == 0)[0] idx_list = [] tb_data = sto.read_object( f"{lh5_path}/raw", raw_file, n_rows=n_events, idx=cuts @@ -476,8 +477,8 @@ def load_data( pulser_e, pulser_err = entry[0], entry[1] if pulser_err < 10: pulser_err = 10 - e_cut = (df.daqenergy.values < pulser_e + pulser_err) & ( - df.daqenergy.values > pulser_e - pulser_err + e_cut = (daqenergy < pulser_e + pulser_err) & ( + daqenergy > pulser_e - pulser_err ) if final_mask is None: final_mask = e_cut @@ -487,11 +488,11 @@ def load_data( log.debug(f"pulser found: {pulser_props}") else: log.debug("no pulser") - ids = np.zeros(len(df.daqenergy.values), dtype=bool) + ids = np.zeros(len(daqenergy), dtype=bool) # Get events around peak using raw file values - initial_mask = (df.daqenergy.values > 0) & (~ids) - rough_energy = df.daqenergy.values[initial_mask] + initial_mask = (daqenergy > 0) & (~ids) + rough_energy = daqenergy[initial_mask] initial_idxs = np.where(initial_mask)[0] guess_keV = 2620 / np.nanpercentile(rough_energy, 99) From e4f7d6822e6e9f611a668bd6c3c0f0bfd62f25f3 Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 28 Nov 2023 18:48:11 +0100 Subject: [PATCH 142/191] moved load data out of pargen routine --- src/pygama/pargen/dplms_ge_dict.py | 130 +++-------------------- src/pygama/pargen/energy_optimisation.py | 13 +-- 2 files changed, 23 insertions(+), 120 deletions(-) diff --git a/src/pygama/pargen/dplms_ge_dict.py b/src/pygama/pargen/dplms_ge_dict.py index 75ee98e04..67caf4ced 100644 --- a/src/pygama/pargen/dplms_ge_dict.py +++ b/src/pygama/pargen/dplms_ge_dict.py @@ -34,7 +34,6 @@ event_selection, fom_FWHM, fom_FWHM_with_dt_corr_fit, - index_data, ) log = logging.getLogger(__name__) @@ -43,8 +42,8 @@ def dplms_ge_dict( lh5_path: str, - fft_files: list[str], - cal_files: list[str], + raw_fft: lgdo.Table, + raw_cal: lgdo.Table, dsp_config: dict, par_dsp: dict, par_dsp_lh5: str, @@ -60,10 +59,10 @@ def dplms_ge_dict( ---------- lh5_path: str Name of channel to process, should be name of lh5 group in raw files - fft_files : list[str] - raw files with fft data - cal_files : list[str] - raw files with cal data + fft_files : lgdo.Table + table with fft data + raw_cal : lgdo.Table + table with cal data dsp_config: dict dsp config file par_dsp: dict @@ -80,22 +79,20 @@ def dplms_ge_dict( t0 = time.time() log.info(f"\nSelecting baselines") - raw_bls = load_data(fft_files, lh5_path, "bls", n_events=dplms_dict["n_baselines"]) - - dsp_bls = run_one_dsp(raw_bls, dsp_config, db_dict=par_dsp[lh5_path]) - cut_dict = generate_cuts(dsp_bls, parameters=dplms_dict["bls_cut_pars"]) - idxs = get_cut_indexes(dsp_bls, cut_dict) + dsp_fft = run_one_dsp(raw_fft, dsp_config, db_dict=par_dsp[lh5_path]) + cut_dict = generate_cuts(dsp_fft, parameters=dplms_dict["bls_cut_pars"]) + idxs = get_cut_indexes(dsp_fft, cut_dict) bl_field = dplms_dict["bl_field"] - log.info(f"... {len(dsp_bls[bl_field].values.nda[idxs,:])} baselines after cuts") + log.info(f"... {len(dsp_fft[bl_field].values.nda[idxs,:])} baselines after cuts") - bls = dsp_bls[bl_field].values.nda[idxs, : dplms_dict["bsize"]] + bls = dsp_fft[bl_field].values.nda[idxs, : dplms_dict["bsize"]] bls_par = {} bls_cut_pars = [par for par in dplms_dict["bls_cut_pars"].keys()] for par in bls_cut_pars: - bls_par[par] = dsp_bls[par].nda + bls_par[par] = dsp_fft[par].nda t1 = time.time() log.info( - f"total events {len(raw_bls)}, {len(bls)} baseline selected in {(t1-t0):.2f} s" + f"total events {len(raw_fft)}, {len(bls)} baseline selected in {(t1-t0):.2f} s" ) log.info( @@ -111,36 +108,16 @@ def dplms_ge_dict( log.info(f"Time to calculate noise matrix {(t2-t1):.2f} s") log.info("\nSelecting signals") - peaks_keV = np.array(dplms_dict["peaks_keV"]) wsize = dplms_dict["wsize"] wf_field = dplms_dict["wf_field"] + peaks_keV = np.array(dplms_dict["peaks_keV"]) kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]] - raw_cal, idx_list = event_selection( - cal_files, - f"{lh5_path}/raw", - dsp_config, - par_dsp[lh5_path], - peaks_keV, - np.arange(0, len(peaks_keV), 1).tolist(), - kev_widths, - cut_parameters=dplms_dict["wfs_cut_pars"], - n_events=dplms_dict["n_signals"], - ) - t3 = time.time() - log.info( - f"Time to run event selection {(t3-t2):.2f} s, total events {len(raw_cal)}" - ) - - raw_cal = index_data(raw_cal, idx_list[-1]) log.info(f"Produce dsp data for {len(raw_cal)} events") dsp_cal = run_one_dsp(raw_cal, dsp_config, db_dict=par_dsp[lh5_path]) - t4 = time.time() - log.info(f"Time to run dsp production {(t4-t3):.2f} s") + t3 = time.time() + log.info(f"Time to run dsp production {(t3-t2):.2f} s") - # minimal processing chain - with open(dsp_config) as r: - dsp_config = json.load(r) dsp_config["outputs"] = [ene_par, "dt_eff"] # dictionary for peak fitting @@ -155,7 +132,6 @@ def dplms_ge_dict( if display > 0: plot_dict = {} plot_dict["dplms"] = {} - fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white") # penalized coefficients dp_coeffs = dplms_dict["dp_coeffs"] @@ -210,10 +186,6 @@ def dplms_ge_dict( f"Filter synthesis in {time.time()-t_tmp:.1f} s, filter area", np.sum(x) ) - t_tmp = time.time() - dsp_opt = run_one_dsp(raw_bls, dsp_config, db_dict=par_dsp[lh5_path]) - energies = dsp_opt[ene_par].nda - t_tmp = time.time() dsp_opt = run_one_dsp(raw_cal, dsp_config, db_dict=par_dsp[lh5_path]) @@ -449,76 +421,6 @@ def dplms_ge_dict( return out_dict -def load_data( - raw_file: list[str], - lh5_path: str, - sel_type: str, - peaks: np.array = [], - n_events: int = 5000, - e_lower_lim: float = 1200, - e_upper_lim: float = 2700, -) -> lgdo.Table: - sto = lh5.LH5Store() - - daqenergy = sto.read_object(f"{lh5_path}/raw/daqenergy", raw_file)[0].nda - - if sel_type == "bls": - cuts = np.where(daqenergy == 0)[0] - idx_list = [] - tb_data = sto.read_object( - f"{lh5_path}/raw", raw_file, n_rows=n_events, idx=cuts - )[0] - return tb_data - else: - pulser_props = find_pulser_properties(df, energy="daqenergy") - if len(pulser_props) > 0: - final_mask = None - for entry in pulser_props: - pulser_e, pulser_err = entry[0], entry[1] - if pulser_err < 10: - pulser_err = 10 - e_cut = (daqenergy < pulser_e + pulser_err) & ( - daqenergy > pulser_e - pulser_err - ) - if final_mask is None: - final_mask = e_cut - else: - final_mask = final_mask | e_cut - ids = final_mask - log.debug(f"pulser found: {pulser_props}") - else: - log.debug("no pulser") - ids = np.zeros(len(daqenergy), dtype=bool) - - # Get events around peak using raw file values - initial_mask = (daqenergy > 0) & (~ids) - rough_energy = daqenergy[initial_mask] - initial_idxs = np.where(initial_mask)[0] - - guess_keV = 2620 / np.nanpercentile(rough_energy, 99) - Euc_min = 0 # threshold / guess_keV * 0.6 - Euc_max = 2620 / guess_keV * 1.1 - dEuc = 1 # / guess_keV - hist, bins, var = get_hist(rough_energy, range=(Euc_min, Euc_max), dx=dEuc) - detected_peaks_locs, detected_peaks_keV, roughpars = hpge_find_E_peaks( - hist, bins, var, peaks - ) - log.debug(f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}") - e_lower_lim = (e_lower_lim - roughpars[1]) / roughpars[0] - e_upper_lim = (e_upper_lim - roughpars[1]) / roughpars[0] - log.debug(f"lower_lim: {e_lower_lim}, upper_lim: {e_upper_lim}") - mask = (rough_energy > e_lower_lim) & (rough_energy < e_upper_lim) - cuts = initial_idxs[mask][:] - log.debug(f"{len(cuts)} events found in energy range") - rough_energy = rough_energy[mask] - rough_energy = rough_energy[:n_events] - rough_energy = rough_energy * roughpars[0] + roughpars[1] - tb_data = sto.read_object( - f"{lh5_path}/raw", raw_file, n_rows=n_events, idx=cuts - )[0] - return tb_data, rough_energy - - def is_valid_centroid( centroid: np.array, lim: int, size: int, full_size: int ) -> list[bool]: diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index 5da39c84f..0fc12de87 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -939,7 +939,6 @@ def event_selection( else: final_mask = final_mask | e_cut ids = final_mask - print(f"pulser found: {pulser_props}") log.debug(f"pulser found: {pulser_props}") else: log.debug("no_pulser") @@ -952,14 +951,13 @@ def event_selection( initial_idxs = np.where(initial_mask)[0] guess_keV = 2620 / np.nanpercentile(rough_energy, 99) - Euc_min = 0 # threshold / guess_keV + Euc_min = threshold / guess_keV * 0.6 Euc_max = 2620 / guess_keV * 1.1 - dEuc = 1 / guess_keV + dEuc = 1 # / guess_keV hist, bins, var = pgh.get_hist(rough_energy, range=(Euc_min, Euc_max), dx=dEuc) detected_peaks_locs, detected_peaks_keV, roughpars = pgc.hpge_find_E_peaks( hist, bins, var, peaks_keV, n_sigma=3 ) - print(f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}") log.debug(f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}") masks = [] @@ -1073,11 +1071,14 @@ def event_selection( return out_events, idx_list -def fwhm_slope(x, m0, m1): +def fwhm_slope(x, m0, m1, m2=None): """ Fit the energy resolution curve """ - return np.sqrt(m0 + m1 * x) + if m2 is None: + return np.sqrt(m0 + m1 * x) + else: + return np.sqrt(m0 + m1 * x + m2 * (x**2)) def interpolate_energy(peak_energies, points, err_points, energy): From 8811839bba907ea3244d267a34d53dc2d8da904f Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 28 Nov 2023 18:53:40 +0100 Subject: [PATCH 143/191] small changes --- src/pygama/pargen/energy_optimisation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index 0fc12de87..fb990dc9e 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -956,7 +956,10 @@ def event_selection( dEuc = 1 # / guess_keV hist, bins, var = pgh.get_hist(rough_energy, range=(Euc_min, Euc_max), dx=dEuc) detected_peaks_locs, detected_peaks_keV, roughpars = pgc.hpge_find_E_peaks( - hist, bins, var, peaks_keV, n_sigma=3 + hist, + bins, + var, + np.array([238.632, 583.191, 727.330, 860.564, 1620.5, 2103.53, 2614.553]), ) log.debug(f"detected {detected_peaks_keV} keV peaks at {detected_peaks_locs}") From d2b162d0fdb04513113fcf2005ac4688ebe08826 Mon Sep 17 00:00:00 2001 From: valerioda Date: Fri, 12 Jan 2024 10:37:51 +0100 Subject: [PATCH 144/191] update the LH5 file writes/reads to match the new LH5Store syntax according suggestions --- src/pygama/pargen/dplms_ge_dict.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/pygama/pargen/dplms_ge_dict.py b/src/pygama/pargen/dplms_ge_dict.py index 67caf4ced..0c1f9fcbc 100644 --- a/src/pygama/pargen/dplms_ge_dict.py +++ b/src/pygama/pargen/dplms_ge_dict.py @@ -13,11 +13,9 @@ import time from collections import OrderedDict -import lgdo -import lgdo.lh5_store as lh5 import matplotlib.pyplot as plt import numpy as np -from lgdo import Array +from lgdo import Array, Table, lh5 from scipy.signal import convolve, convolve2d from pygama.math.histogram import get_hist @@ -42,8 +40,8 @@ def dplms_ge_dict( lh5_path: str, - raw_fft: lgdo.Table, - raw_cal: lgdo.Table, + raw_fft: Table, + raw_cal: Table, dsp_config: dict, par_dsp: dict, par_dsp_lh5: str, @@ -57,19 +55,19 @@ def dplms_ge_dict( Parameters ---------- - lh5_path: str + lh5_path Name of channel to process, should be name of lh5 group in raw files - fft_files : lgdo.Table + fft_files table with fft data - raw_cal : lgdo.Table + raw_cal table with cal data - dsp_config: dict + dsp_config dsp config file - par_dsp: dict + par_dsp Dictionary with db parameters for dsp processing - par_dsp_lh5: str + par_dsp_lh5 Path for saving dplms coefficients - dplms_dict: dict + dplms_dict Dictionary with various parameters Returns @@ -275,7 +273,7 @@ def dplms_ge_dict( wsize, ) - sto.write_object( + sto.write( Array(x), name="dplms", lh5_file=par_dsp_lh5, From 2aa52e2380206cd4da3c3cb28ebf64ee7da99f9f Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 16 Jan 2024 14:44:56 +0100 Subject: [PATCH 145/191] modification for dsp processing --- src/pygama/pargen/dplms_ge_dict.py | 37 +++++++++--------------- src/pygama/pargen/energy_optimisation.py | 10 +++---- 2 files changed, 19 insertions(+), 28 deletions(-) diff --git a/src/pygama/pargen/dplms_ge_dict.py b/src/pygama/pargen/dplms_ge_dict.py index 0c1f9fcbc..6a155d239 100644 --- a/src/pygama/pargen/dplms_ge_dict.py +++ b/src/pygama/pargen/dplms_ge_dict.py @@ -8,10 +8,7 @@ import json import logging import os -import pathlib -import pickle import time -from collections import OrderedDict import matplotlib.pyplot as plt import numpy as np @@ -25,14 +22,9 @@ gauss_step_pdf, radford_pdf, ) -from pygama.pargen.cuts import find_pulser_properties, generate_cuts, get_cut_indexes +from pygama.pargen.cuts import generate_cuts, get_cut_indexes from pygama.pargen.dsp_optimize import run_one_dsp -from pygama.pargen.energy_cal import hpge_find_E_peaks -from pygama.pargen.energy_optimisation import ( - event_selection, - fom_FWHM, - fom_FWHM_with_dt_corr_fit, -) +from pygama.pargen.energy_optimisation import fom_FWHM_with_dt_corr_fit log = logging.getLogger(__name__) sto = lh5.LH5Store() @@ -72,11 +64,12 @@ def dplms_ge_dict( Returns ------- - out_dict : dict + out_dict """ t0 = time.time() log.info(f"\nSelecting baselines") + dsp_fft = run_one_dsp(raw_fft, dsp_config, db_dict=par_dsp[lh5_path]) cut_dict = generate_cuts(dsp_fft, parameters=dplms_dict["bls_cut_pars"]) idxs = get_cut_indexes(dsp_fft, cut_dict) @@ -133,11 +126,7 @@ def dplms_ge_dict( # penalized coefficients dp_coeffs = dplms_dict["dp_coeffs"] - if lh5_path in dplms_dict["noisy_bl"]: - log.info("Setting explicit zero area condition") - za_coeff = dp_coeffs["za"] - else: - za_coeff = dplms_dict["dp_def"]["za"] + za_coeff = dplms_dict["dp_def"]["za"] dp_coeffs.pop("za") coeff_keys = [key for key in dp_coeffs.keys()] lists = [dp_coeffs[key] for key in dp_coeffs.keys()] @@ -177,9 +166,7 @@ def dplms_ge_dict( dplms_dict["length"], wsize, ) - par_dsp[lh5_path]["dplms"] = {} - par_dsp[lh5_path]["dplms"]["length"] = dplms_dict["length"] - par_dsp[lh5_path]["dplms"]["coefficients"] = x.tolist() + par_dsp[lh5_path]["dplms"] = {"length": dplms_dict["length"], "coefficients": x} log.info( f"Filter synthesis in {time.time()-t_tmp:.1f} s, filter area", np.sum(x) ) @@ -402,11 +389,11 @@ def dplms_ge_dict( plot_dict["dplms"]["wf_sel"] = fig fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white") - ax.plot(np.flip(x), "r-", label=f"filter") + ax.plot(x, "r-", label=f"filter") ax.axhline(0, color="black", linestyle=":") ax.legend(loc="upper right", title=f"{lh5_path}") axin = ax.inset_axes([0.6, 0.1, 0.35, 0.33]) - axin.plot(np.flip(x), "r-") + axin.plot(x, "r-") axin.set_xlim( dplms_dict["length"] / 2 - dplms_dict["zoom"], dplms_dict["length"] / 2 + dplms_dict["zoom"], @@ -560,14 +547,18 @@ def filter_synthesis( fmat: np.array, length: int, size: int, + flip: bool = True, ) -> np.array: mat = nmat + rmat + za * np.ones([length, length]) + pmat + fmat flo = (size // 2) - (length // 2) fhi = (size // 2) + (length // 2) - x = np.linalg.solve(mat, ref[flo:fhi]) + x = np.linalg.solve(mat, ref[flo:fhi]).astype(np.float32) y = convolve(ref, np.flip(x), mode="valid") maxy = np.max(y) x /= maxy y /= maxy refy = ref[(size // 2) - (len(y) // 2) : (size // 2) + (len(y) // 2)] - return x, y, refy + if flip: + return np.flip(x), y, refy + else: + return x, y, refy diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index fb990dc9e..e84c93f36 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -13,12 +13,12 @@ import sys from collections import namedtuple -import lgdo.lh5 as lh5 import matplotlib as mpl import matplotlib.pyplot as plt import numpy as np import pandas as pd from iminuit import Minuit, cost, util +from lgdo import Array, Table, WaveformTable, lh5 from matplotlib.backends.backend_pdf import PdfPages from matplotlib.colors import LogNorm from scipy.optimize import curve_fit, minimize @@ -892,14 +892,14 @@ def get_wf_indexes(sorted_indexs, n_events): def index_data(data, indexes, wf_field="waveform"): - new_baselines = lh5.Array(data["baseline"].nda[indexes]) + new_baselines = Array(data["baseline"].nda[indexes]) new_waveform_values = data[wf_field]["values"].nda[indexes] new_waveform_dts = data[wf_field]["dt"].nda[indexes] new_waveform_t0 = data[wf_field]["t0"].nda[indexes] - new_waveform = lh5.WaveformTable( + new_waveform = WaveformTable( None, new_waveform_t0, "ns", new_waveform_dts, "ns", new_waveform_values ) - new_data = lh5.Table(col_dict={wf_field: new_waveform, "baseline": new_baselines}) + new_data = Table(col_dict={wf_field: new_waveform, "baseline": new_baselines}) return new_data @@ -1068,7 +1068,7 @@ def event_selection( log.warning("Less than half number of specified events found") elif len(peak_ids[final_mask]) < 0.1 * n_events: log.error("Less than 10% number of specified events found") - out_events = np.unique(np.array(out_events).flatten()) + out_events = np.unique(np.concatenate(out_events)) sort_index = np.argsort(np.concatenate(final_events)) idx_list = get_wf_indexes(sort_index, [len(mask) for mask in final_events]) return out_events, idx_list From e255e1ec327d3f1231386840f92d28c64b244963 Mon Sep 17 00:00:00 2001 From: valerioda Date: Tue, 16 Jan 2024 15:24:55 +0100 Subject: [PATCH 146/191] revert modification on ene_opt --- src/pygama/pargen/energy_optimisation.py | 57 +++++++++++------------- 1 file changed, 26 insertions(+), 31 deletions(-) diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index e84c93f36..ecad4bbd7 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -13,12 +13,12 @@ import sys from collections import namedtuple +import lgdo.lh5 as lh5 import matplotlib as mpl import matplotlib.pyplot as plt import numpy as np import pandas as pd from iminuit import Minuit, cost, util -from lgdo import Array, Table, WaveformTable, lh5 from matplotlib.backends.backend_pdf import PdfPages from matplotlib.colors import LogNorm from scipy.optimize import curve_fit, minimize @@ -857,7 +857,6 @@ def fom_FWHM_fit(tb_in, kwarg_dict): csqr, n_sig, n_sig_err, - _, ) = get_peak_fwhm_with_dt_corr( Energies, alpha, dt, func, gof_func, peak=peak, kev_width=kev_width, kev=True ) @@ -892,14 +891,14 @@ def get_wf_indexes(sorted_indexs, n_events): def index_data(data, indexes, wf_field="waveform"): - new_baselines = Array(data["baseline"].nda[indexes]) + new_baselines = lh5.Array(data["baseline"].nda[indexes]) new_waveform_values = data[wf_field]["values"].nda[indexes] new_waveform_dts = data[wf_field]["dt"].nda[indexes] new_waveform_t0 = data[wf_field]["t0"].nda[indexes] - new_waveform = WaveformTable( + new_waveform = lh5.WaveformTable( None, new_waveform_t0, "ns", new_waveform_dts, "ns", new_waveform_values ) - new_data = Table(col_dict={wf_field: new_waveform, "baseline": new_baselines}) + new_data = lh5.Table(col_dict={wf_field: new_waveform, "baseline": new_baselines}) return new_data @@ -1068,20 +1067,17 @@ def event_selection( log.warning("Less than half number of specified events found") elif len(peak_ids[final_mask]) < 0.1 * n_events: log.error("Less than 10% number of specified events found") - out_events = np.unique(np.concatenate(out_events)) + out_events = np.unique(np.array(out_events).flatten()) sort_index = np.argsort(np.concatenate(final_events)) idx_list = get_wf_indexes(sort_index, [len(mask) for mask in final_events]) return out_events, idx_list -def fwhm_slope(x, m0, m1, m2=None): +def fwhm_slope(x, m0, m1, m2): """ Fit the energy resolution curve """ - if m2 is None: - return np.sqrt(m0 + m1 * x) - else: - return np.sqrt(m0 + m1 * x + m2 * (x**2)) + return np.sqrt(m0 + m1 * x + m2 * (x**2)) def interpolate_energy(peak_energies, points, err_points, energy): @@ -1089,7 +1085,7 @@ def interpolate_energy(peak_energies, points, err_points, energy): if len(points[~nan_mask]) < 3: return np.nan, np.nan, np.nan else: - param_guess = [2, 0.001] + param_guess = [2, 0.001, 0.000001] # # param_bounds = (0, [10., 1. ])# try: fit_pars, fit_covs = curve_fit( @@ -1139,11 +1135,6 @@ def fom_FWHM(tb_in, kwarg_dict, ctc_parameter, alpha, idxs=None, display=0): dt = np.subtract(tb_in["tp_99"].nda, tb_in["tp_0_est"].nda, dtype="float64") elif ctc_parameter == "rt": dt = np.subtract(tb_in["tp_99"].nda, tb_in["tp_01"].nda, dtype="float64") - - if idxs is not None: - Energies = Energies[idxs] - dt = dt[idxs] - if np.isnan(Energies).any() or np.isnan(dt).any(): if np.isnan(Energies).any(): log.debug(f"nan energy values for peak {peak}") @@ -1158,6 +1149,10 @@ def fom_FWHM(tb_in, kwarg_dict, ctc_parameter, alpha, idxs=None, display=0): "n_sig_err": np.nan, } + if idxs is not None: + Energies = Energies[idxs] + dt = dt[idxs] + # Return fwhm of optimal alpha in kev with error try: ( @@ -1210,37 +1205,39 @@ def single_peak_fom(data, kwarg_dict): return out_dict -def new_fom(data, kwarg_dict, alpha=None): +def new_fom(data, kwarg_dict): peaks = kwarg_dict["peaks_keV"] idx_list = kwarg_dict["idx_list"] ctc_param = kwarg_dict["ctc_param"] peak_dicts = kwarg_dict["peak_dicts"] - if alpha is None: - out_dict = fom_FWHM_with_dt_corr_fit( - data, peak_dicts[-1], ctc_param, idxs=idx_list[-1], display=0 - ) - alpha = out_dict["alpha"] - + out_dict = fom_FWHM_with_dt_corr_fit( + data, peak_dicts[-1], ctc_param, idxs=idx_list[-1], display=0 + ) + alpha = out_dict["alpha"] log.info(alpha) fwhms = [] fwhm_errs = [] n_sig = [] n_sig_err = [] - chisquares = [] - for i, peak in enumerate(peaks): + for i, peak in enumerate(peaks[:-1]): out_peak_dict = fom_FWHM( data, peak_dicts[i], ctc_param, alpha, idxs=idx_list[i], display=0 ) + # n_sig_minimum = peak_dicts[i]["n_sig_minimum"] + # if peak_dict["n_sig"] Date: Tue, 16 Jan 2024 17:15:59 +0100 Subject: [PATCH 147/191] change load data --- src/pygama/pargen/utils.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/pygama/pargen/utils.py b/src/pygama/pargen/utils.py index e58785e4e..5c8f8c101 100644 --- a/src/pygama/pargen/utils.py +++ b/src/pygama/pargen/utils.py @@ -3,10 +3,10 @@ import logging from types import FunctionType -import lgdo.lh5_store as lh5 import numpy as np import pandas as pd from iminuit import Minuit, cost, util +from lgdo import Table, lh5 log = logging.getLogger(__name__) @@ -70,15 +70,20 @@ def load_data( masks = np.array([], dtype=bool) for tstamp, tfiles in files.items(): table = sto.read(lh5_path, tfiles)[0] + file_df = pd.DataFrame(columns=params) if tstamp in cal_dict: - file_df = table.eval(cal_dict[tstamp]).get_dataframe() + cal_dict_ts = cal_dict[tstamp] else: - file_df = table.eval(cal_dict).get_dataframe() + cal_dict_ts = cal_dict + for param in params: + if param in cal_dict_ts: + expression = cal_dict_ts[param]["expression"] + parameters = cal_dict_ts[param].get("parameters", None) + file_df[param] = table.eval(expression, parameters) + else: + file_df[param] = table[param] file_df["run_timestamp"] = np.full(len(file_df), tstamp, dtype=object) params.append("run_timestamp") - for param in params: - if param not in file_df: - file_df[param] = lh5.load_nda(tfiles, [param], lh5_path)[param] if threshold is not None: mask = file_df[cal_energy_param] > threshold file_df.drop(np.where(~mask)[0], inplace=True) @@ -96,10 +101,14 @@ def load_data( params = get_params(keys + list(cal_dict.keys()), params) table = sto.read(lh5_path, files)[0] - df = table.eval(cal_dict).get_dataframe() + df = pd.DataFrame(columns=params) for param in params: - if param not in df: - df[param] = lh5.load_nda(files, [param], lh5_path)[param] + if param in cal_dict: + expression = cal_dict[param]["expression"] + parameters = cal_dict[param].get("parameters", None) + df[param] = table.eval(expression, parameters) + else: + df[param] = table[param] if threshold is not None: masks = df[cal_energy_param] > threshold df.drop(np.where(~masks)[0], inplace=True) From 5523ac72107b01e74734f088e87bf67724c5dc38 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Fri, 19 Jan 2024 02:45:36 +0100 Subject: [PATCH 148/191] added lh5 output format for skm tier --- src/pygama/skm/build_skm.py | 87 +++++++++++++++---------- tests/skm/configs/basic-skm-config.json | 3 +- tests/skm/test_build_skm.py | 87 +++++++++++++++++++++++++ 3 files changed, 140 insertions(+), 37 deletions(-) diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index aed71e1eb..5ed4166f7 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -12,12 +12,11 @@ import awkward as ak import numpy as np import pandas as pd -from lgdo import Array, lh5 +from lgdo import Array, lh5, Table from lgdo.lh5 import LH5Store log = logging.getLogger(__name__) - def build_skm( f_evt: str, f_hit: str, @@ -27,6 +26,7 @@ def build_skm( skm_conf: dict | str, wo_mode="w", skim_format: str = "parquet", + group: str = "/skm/", ) -> None: """Builds a skimmed file from a (set) of evt/hit/dsp tier file(s). @@ -94,7 +94,9 @@ def build_skm( - ``overwrite`` or ``o``: replaces existing file. skim_format - data format of the skimmed output (``hdf`` or ``parquet``). + data format of the skimmed output (``hdf``, ``lh5`` or ``parquet``). + group + LH5 root group name (only used if ``skim_format`` is ``lh5``). """ f_dict = {"evt": f_evt, "hit": f_hit, "dsp": f_dsp, "tcm": f_tcm} log = logging.getLogger(__name__) @@ -113,8 +115,8 @@ def build_skm( multi = int(tbl_cfg["multiplicity"]) store = LH5Store() - df = pd.DataFrame() - + # df = pd.DataFrame() + table = Table() if "operations" in tbl_cfg.keys(): for op in tbl_cfg["operations"].keys(): miss_val = np.nan @@ -194,46 +196,59 @@ def build_skm( obj = ak.pad_none(obj, multi, clip=True) obj = ak.to_numpy(ak.fill_none(obj, miss_val)) - nms = [op] if obj.ndim > 1: if "postfixes" in tbl_cfg.keys(): nms = [f"{op}{x}" for x in tbl_cfg["postfixes"]] else: nms = [f"{op}_{x}" for x in range(multi)] - - df = df.join(pd.DataFrame(data=obj, columns=nms), how="outer") - - # Set an index column if specified - if "index_field" in tbl_cfg.keys(): - log.debug("Setting index") - if tbl_cfg["index_field"] in df.keys(): - df = df.set_index(tbl_cfg["index_field"]) - else: - raise ValueError( - "index field not found. Needs to be a previously defined skm field" - ) + + for i in range(len(nms)): + # add attribute if present + ob = Array(nda=obj[:,i]) + if "lgdo_attrs" in tbl_cfg["operations"][op].keys(): + ob.attrs |= tbl_cfg["operations"][op]["lgdo_attrs"] + table.add_field(nms[i], ob,True) + else: + obj = Array(nda=obj) + if "lgdo_attrs" in tbl_cfg["operations"][op].keys(): + obj.attrs |= tbl_cfg["operations"][op]["lgdo_attrs"] + table.add_field(op, obj,True) # last thing missing is writing it out log.debug("saving skm file") - if skim_format not in ["parquet", "hdf"]: - raise ValueError("Not supported skim data format. Operations are hdf, parquet") - if wo_mode in ["w", "write_safe"]: - if os.path.exists(f_skm): - raise FileExistsError(f"Write_safe mode: {f_skm} exists.") - else: - if "hdf" == skim_format: - df.to_hdf(f_skm, key="df", mode="w") - elif "parquet" == skim_format: - df.to_parquet(f_skm) - elif wo_mode in ["o", "overwrite"]: - if "hdf" == skim_format: - df.to_hdf(f_skm, key="df", mode="w") - elif "parquet" == skim_format: - df.to_parquet(f_skm) - elif wo_mode in ["a", "append"]: + if skim_format not in ["parquet", "hdf","lh5"]: + raise ValueError("Not supported skim data format. Operations are hdf, lh5, parquet") + + if (wo_mode in ["w", "write_safe"]) and os.path.exists(f_skm): + raise FileExistsError(f"Write_safe mode: {f_skm} exists.") + + if skim_format in ["hdf","parquet"]: + df = table.view_as("pd") + # Set an index column if specified + if "index_field" in tbl_cfg.keys(): + log.debug("Setting index") + if tbl_cfg["index_field"] in df.keys(): + df = df.set_index(tbl_cfg["index_field"]) + else: + raise ValueError( + "index field not found. Needs to be a previously defined skm field" + ) + if "hdf" == skim_format: - df.to_hdf(f_skm, key="df", mode="a") + if wo_mode in ["w", "write_safe","o", "overwrite"]: + df.to_hdf(f_skm, key="df", mode="w") + elif wo_mode in ["a", "append"]: + df.to_hdf(f_skm, key="df", mode="a") + elif "parquet" == skim_format: - df.to_parquet(f_skm, append=True) + if wo_mode in ["w", "write_safe","o", "overwrite"]: + df.to_parquet(f_skm) + elif wo_mode in ["a", "append"]: + df.to_parquet(f_skm, append=True) + + elif "lh5" == skim_format: + wo = wo_mode if wo_mode not in ["o", "overwrite"] else "of" + store.write(obj=table, name=group, lh5_file=f_skm, wo_mode=wo) + else: raise ValueError(f"wo_mode {wo_mode} not valid.") diff --git a/tests/skm/configs/basic-skm-config.json b/tests/skm/configs/basic-skm-config.json index faf5e56cb..8e57660cd 100644 --- a/tests/skm/configs/basic-skm-config.json +++ b/tests/skm/configs/basic-skm-config.json @@ -3,7 +3,8 @@ "index_field": "timestamp", "operations": { "timestamp": { - "forward_field": "evt.timestamp" + "forward_field": "evt.timestamp", + "lgdo_attrs": {"info":"pk was here"} }, "energy_sum": { "forward_field": "evt.energy_sum" diff --git a/tests/skm/test_build_skm.py b/tests/skm/test_build_skm.py index 678fe2c41..45eaad4d9 100644 --- a/tests/skm/test_build_skm.py +++ b/tests/skm/test_build_skm.py @@ -74,3 +74,90 @@ def test_basics(lgnd_test_data, tmptestdir): assert (vov_eid[:, 0] == df.energy_id_0.to_numpy()).all() assert (vov_eid[:, 1] == df.energy_id_1.to_numpy()).all() assert (vov_eid[:, 2] == df.energy_id_2.to_numpy()).all() + +def test_df_to_table_conversion(lgnd_test_data, tmptestdir): + outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" + tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" + if os.path.exists(outfile): + os.remove(outfile) + build_evt( + f_tcm=lgnd_test_data.get_path(tcm_path), + f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), + f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), + f_evt=outfile, + evt_config=f"{evt_config_dir}/vov-test-evt-config.json", + wo_mode="o", + group="/evt/", + tcm_group="hardware_tcm_1", + ) + + skm_conf = f"{config_dir}/basic-skm-config.json" + skm_out = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.parquet" + skm_out2 = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.lh5" + build_skm( + outfile, + lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), + lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), + lgnd_test_data.get_path(tcm_path), + skm_out, + skm_conf, + wo_mode="o", + skim_format="hdf", + ) + build_skm( + outfile, + lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), + lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), + lgnd_test_data.get_path(tcm_path), + skm_out2, + skm_conf, + wo_mode="o", + skim_format="lh5", + ) + + assert os.path.exists(skm_out) + assert os.path.exists(skm_out2) + df = pd.read_hdf(skm_out) + tbl = store.read("/skm/",skm_out2)[0].view_as("pd") + assert isinstance(tbl,pd.DataFrame) + assert df.reset_index().equals(tbl) + +def test_attribute_passing(lgnd_test_data, tmptestdir): + outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" + tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" + if os.path.exists(outfile): + os.remove(outfile) + build_evt( + f_tcm=lgnd_test_data.get_path(tcm_path), + f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), + f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), + f_evt=outfile, + evt_config=f"{evt_config_dir}/vov-test-evt-config.json", + wo_mode="o", + group="/evt/", + tcm_group="hardware_tcm_1", + ) + + skm_conf = f"{config_dir}/basic-skm-config.json" + + skm_out = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.lh5" + + build_skm( + outfile, + lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), + lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), + lgnd_test_data.get_path(tcm_path), + skm_out, + skm_conf, + wo_mode="o", + skim_format="lh5", + ) + + assert os.path.exists(skm_out) + assert "info" in store.read("/skm/timestamp", skm_out)[0].getattrs().keys() + assert ( + store.read("/skm/timestamp", skm_out)[0].getattrs()["info"] + == "pk was here" + ) + + From b6bd4e60c80c70283baad8108798a604277c3200 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Fri, 19 Jan 2024 12:41:51 +0100 Subject: [PATCH 149/191] full sparse mode compatibility --- src/pygama/evt/build_evt.py | 65 +++++++++------------ src/pygama/evt/modules/spm.py | 75 ++++++++++++------------- src/pygama/skm/build_skm.py | 34 +++++------ tests/skm/configs/basic-skm-config.json | 2 +- tests/skm/test_build_skm.py | 15 ++--- 5 files changed, 88 insertions(+), 103 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index c39ddffb4..6db40d5d8 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -317,7 +317,6 @@ def get_data_at_channel( is_evaluated: bool, f_hit: str, f_dsp: str, - outsize: int, defv, ) -> np.ndarray: """Evaluates an expression and returns the result. @@ -343,14 +342,13 @@ def get_data_at_channel( path to `hit` tier file. f_dsp path to `dsp` tier file. - outsize - size of the return array. defv default value. """ # get index list for this channel to be loaded idx_ch = idx[ids == int(ch[2:])] + outsize = len(idx_ch) if not is_evaluated: res = np.full(outsize, defv, dtype=type(defv)) @@ -393,8 +391,7 @@ def get_mask_from_query( qry: str | NDArray, length: int, ch: str, - ids: NDArray, - idx: NDArray, + idx_ch: NDArray, f_hit: str, f_dsp: str, ) -> np.ndarray: @@ -408,17 +405,13 @@ def get_mask_from_query( length of the return mask. ch "rawid" of channel to be evaluated. - idx - `tcm` index array. - ids - `tcm` id array. + idx_ch + channel indices to be read. f_hit path to `hit` tier file. f_dsp path to `dsp` tier file. """ - # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] # get sub evt based query condition if needed if isinstance(qry, str): @@ -523,12 +516,11 @@ def evaluate_to_first_or_last( ch not in chns_rm, f_hit, f_dsp, - len(out), defv, ) # get mask from query - limarr = get_mask_from_query(qry, len(res), ch, ids, idx, f_hit, f_dsp) + limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) # find if sorter is in hit or dsp t0 = store.read( @@ -618,12 +610,11 @@ def evaluate_to_scalar( ch not in chns_rm, f_hit, f_dsp, - len(out), defv, ) # get mask from query - limarr = get_mask_from_query(qry, len(res), ch, ids, idx, f_hit, f_dsp) + limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) # switch through modes if "sum" == mode: @@ -686,7 +677,7 @@ def evaluate_at_channel( # skip default value if f"ch{ch}" not in lh5.ls(f_hit): continue - + idx_ch = idx[ids == ch] res = get_data_at_channel( f"ch{ch}", ids, @@ -697,11 +688,10 @@ def evaluate_at_channel( f"ch{ch}" not in chns_rm, f_hit, f_dsp, - len(out), defv, ) - out = np.where(ch == ch_comp.nda, res, out) + out[idx_ch] = np.where(ch == ch_comp.nda[idx_ch], res, out[idx_ch]) return Array(nda=out) @@ -746,12 +736,14 @@ def evaluate_at_channel_vov( """ # blow up vov to aoesa - out = ch_comp.to_aoesa().view_as("np") + out = ak.Array([[] for x in range(len(ch_comp))]) - chns = np.unique(out[~np.isnan(out)]).astype(int) + chns = np.unique(ch_comp.flattened_data.nda).astype(int) + ch_comp = ch_comp.view_as("ak") type_name = None for ch in chns: + idx_ch = idx[ids == ch] res = get_data_at_channel( f"ch{ch}", ids, @@ -762,23 +754,22 @@ def evaluate_at_channel_vov( f"ch{ch}" not in chns_rm, f_hit, f_dsp, - len(out), defv, ) # see in which events the current channel is present - mask = (out == ch).any(axis=1) - out[out == ch] = res[mask] + mask = ak.to_numpy(ak.any(ch_comp == ch, axis=-1), allow_missing=False) + cv = np.full(len(ch_comp), np.nan) + cv[idx_ch] = res + cv[~mask] = np.nan + cv = ak.drop_none(ak.nan_to_none(ak.Array(cv)[:, None])) + + out = ak.concatenate((out, cv), axis=-1) if ch == chns[0]: type_name = res.dtype - # ok now implode the table again - out = VectorOfVectors( - flattened_data=out.flatten()[~np.isnan(out.flatten())].astype(type_name), - cumulative_length=np.cumsum(np.count_nonzero(~np.isnan(out), axis=1)), - ) - return out + return VectorOfVectors(ak.values_astype(out, type_name)) def evaluate_to_aoesa( @@ -837,6 +828,7 @@ def evaluate_to_aoesa( i = 0 for ch in chns: + idx_ch = idx[ids == int(ch[2:])] res = get_data_at_channel( ch, ids, @@ -847,15 +839,13 @@ def evaluate_to_aoesa( ch not in chns_rm, f_hit, f_dsp, - len(out), defv, ) # get mask from query - limarr = get_mask_from_query(qry, len(res), ch, ids, idx, f_hit, f_dsp) + limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) - # append to out according to mode == vov - out[:, i][limarr] = res[limarr] + out[idx_ch, i] = np.where(limarr, res, out[idx_ch, i]) i += 1 @@ -954,13 +944,10 @@ def evaluate_to_vector( "sorter values can only have 'ascend_by' or 'descend_by' prefixes" ) - out = VectorOfVectors( - flattened_data=out.flatten()[~np.isnan(out.flatten())].astype(type(defv)), - cumulative_length=np.cumsum(np.count_nonzero(~np.isnan(out), axis=1)), + return VectorOfVectors( + ak.values_astype(ak.drop_none(ak.nan_to_none(ak.Array(out))), type(defv)) ) - return out - def build_evt( f_tcm: str, @@ -1108,7 +1095,7 @@ def build_evt( table = Table(size=nrows) for k, v in tbl_cfg["operations"].items(): - log.debug("Processing field" + k) + log.debug("Processing field " + k) # if mode not defined in operation, it can only be an operation on the evt level. if "aggregation_mode" not in v.keys(): diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 96a1098b6..a4020548c 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -88,18 +88,16 @@ def get_masked_tcm_idx( for ch in chs: idx_ch = idx[ids == int(ch[2:])] - pe = ak.drop_none( - ak.nan_to_none( - store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as("ak") - ) - ) + pe = store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as("np") + tmp = np.full((np.max(idx) + 1, len(pe[0])), np.nan) + tmp[idx_ch] = pe + pe = ak.drop_none(ak.nan_to_none(ak.Array(tmp))) # times are in sample units - times = ak.drop_none( - ak.nan_to_none( - store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as("ak") - ) - ) + times = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as("np") + tmp = np.full((np.max(idx) + 1, len(times[0])), np.nan) + tmp[idx_ch] = times + times = ak.drop_none(ak.nan_to_none(ak.Array(tmp))) mask = get_spm_mask(lim, tge, tmin, tmax, pe, times) @@ -107,16 +105,20 @@ def get_masked_tcm_idx( out_idx = ak.local_index(mask)[mask] elif mode == 1: - out_idx = ak.Array(np.where(ids == int(ch[2:]))[0]) - out_idx = out_idx[:, None][mask[mask] - 1] + out_idx = np.full((np.max(idx) + 1), np.nan) + out_idx[idx_ch] = np.where(ids == int(ch[2:]))[0] + out_idx = ak.drop_none(ak.nan_to_none(ak.Array(out_idx)[:, None])) + out_idx = out_idx[mask[mask] - 1] elif mode == 2: out_idx = ak.Array([int(ch[2:])] * len(mask)) out_idx = out_idx[:, None][mask[mask] - 1] elif mode == 3: - out_idx = ak.Array(idx_ch) - out_idx = out_idx[:, None][mask[mask] - 1] + out_idx = np.full((np.max(idx) + 1), np.nan) + out_idx[idx_ch] = idx_ch + out_idx = ak.drop_none(ak.nan_to_none(ak.Array(out_idx)[:, None])) + out_idx = out_idx[mask[mask] - 1] else: raise ValueError("Unknown mode") @@ -180,7 +182,7 @@ def get_spm_ene_or_maj(f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, mode) ) ) - mask = get_spm_mask(lim, tge, tmin, tmax, pe, times) + mask = get_spm_mask(lim, tge[idx_ch], tmin, tmax, pe, times) pe = pe[mask] if mode in ["energy_hc", "energy_dplms"]: @@ -265,18 +267,16 @@ def get_etc( for ch in chs: idx_ch = idx[ids == int(ch[2:])] - pe = ak.drop_none( - ak.nan_to_none( - store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as("ak") - ) - ) + pe = store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as("np") + tmp = np.full((np.max(idx) + 1, len(pe[0])), np.nan) + tmp[idx_ch] = pe + pe = ak.drop_none(ak.nan_to_none(ak.Array(tmp))) # times are in sample units - times = ak.drop_none( - ak.nan_to_none( - store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as("ak") - ) - ) + times = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as("np") + tmp = np.full((np.max(idx) + 1, len(times[0])), np.nan) + tmp[idx_ch] = times + times = ak.drop_none(ak.nan_to_none(ak.Array(tmp))) mask = get_spm_mask(lim, tge, tmin, tmax, pe, times) @@ -315,7 +315,7 @@ def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> # load TCM data to define an event ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") - time_lst = [] + time_all = ak.Array([[] for x in range(np.max(idx) + 1)]) if isinstance(trgr, (float, int)): tge = cast_trigger(trgr, tdefault, length=np.max(idx) + 1) @@ -325,25 +325,24 @@ def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> for ch in chs: idx_ch = idx[ids == int(ch[2:])] - pe = ak.drop_none( - ak.nan_to_none( - store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as("ak") - ) - ) + pe = store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as("np") + tmp = np.full((np.max(idx) + 1, len(pe[0])), np.nan) + tmp[idx_ch] = pe + pe = ak.drop_none(ak.nan_to_none(ak.Array(tmp))) # times are in sample units - times = ak.drop_none( - ak.nan_to_none( - store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as("ak") - ) - ) + times = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as("np") + tmp = np.full((np.max(idx) + 1, len(times[0])), np.nan) + tmp[idx_ch] = times + times = ak.drop_none(ak.nan_to_none(ak.Array(tmp))) mask = get_spm_mask(lim, tge, tmin, tmax, pe, times) # apply mask and convert sample units to ns - time_lst.append(times[mask] * 16) + times = times[mask] * 16 + + time_all = ak.concatenate((time_all, times), axis=-1) - time_all = ak.concatenate(time_lst, axis=-1) out = ak.min(time_all, axis=-1) # Convert to 1D numpy array diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index 5ed4166f7..0a2965493 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -11,12 +11,12 @@ import awkward as ak import numpy as np -import pandas as pd -from lgdo import Array, lh5, Table +from lgdo import Array, Table, lh5 from lgdo.lh5 import LH5Store log = logging.getLogger(__name__) + def build_skm( f_evt: str, f_hit: str, @@ -201,28 +201,30 @@ def build_skm( nms = [f"{op}{x}" for x in tbl_cfg["postfixes"]] else: nms = [f"{op}_{x}" for x in range(multi)] - + for i in range(len(nms)): # add attribute if present - ob = Array(nda=obj[:,i]) + ob = Array(nda=obj[:, i]) if "lgdo_attrs" in tbl_cfg["operations"][op].keys(): ob.attrs |= tbl_cfg["operations"][op]["lgdo_attrs"] - table.add_field(nms[i], ob,True) + table.add_field(nms[i], ob, True) else: obj = Array(nda=obj) if "lgdo_attrs" in tbl_cfg["operations"][op].keys(): obj.attrs |= tbl_cfg["operations"][op]["lgdo_attrs"] - table.add_field(op, obj,True) + table.add_field(op, obj, True) # last thing missing is writing it out log.debug("saving skm file") - if skim_format not in ["parquet", "hdf","lh5"]: - raise ValueError("Not supported skim data format. Operations are hdf, lh5, parquet") - + if skim_format not in ["parquet", "hdf", "lh5"]: + raise ValueError( + "Not supported skim data format. Operations are hdf, lh5, parquet" + ) + if (wo_mode in ["w", "write_safe"]) and os.path.exists(f_skm): raise FileExistsError(f"Write_safe mode: {f_skm} exists.") - - if skim_format in ["hdf","parquet"]: + + if skim_format in ["hdf", "parquet"]: df = table.view_as("pd") # Set an index column if specified if "index_field" in tbl_cfg.keys(): @@ -233,22 +235,22 @@ def build_skm( raise ValueError( "index field not found. Needs to be a previously defined skm field" ) - + if "hdf" == skim_format: - if wo_mode in ["w", "write_safe","o", "overwrite"]: + if wo_mode in ["w", "write_safe", "o", "overwrite"]: df.to_hdf(f_skm, key="df", mode="w") elif wo_mode in ["a", "append"]: df.to_hdf(f_skm, key="df", mode="a") elif "parquet" == skim_format: - if wo_mode in ["w", "write_safe","o", "overwrite"]: + if wo_mode in ["w", "write_safe", "o", "overwrite"]: df.to_parquet(f_skm) elif wo_mode in ["a", "append"]: df.to_parquet(f_skm, append=True) - + elif "lh5" == skim_format: wo = wo_mode if wo_mode not in ["o", "overwrite"] else "of" store.write(obj=table, name=group, lh5_file=f_skm, wo_mode=wo) - + else: raise ValueError(f"wo_mode {wo_mode} not valid.") diff --git a/tests/skm/configs/basic-skm-config.json b/tests/skm/configs/basic-skm-config.json index 8e57660cd..feb29bc17 100644 --- a/tests/skm/configs/basic-skm-config.json +++ b/tests/skm/configs/basic-skm-config.json @@ -4,7 +4,7 @@ "operations": { "timestamp": { "forward_field": "evt.timestamp", - "lgdo_attrs": {"info":"pk was here"} + "lgdo_attrs": { "info": "pk was here" } }, "energy_sum": { "forward_field": "evt.energy_sum" diff --git a/tests/skm/test_build_skm.py b/tests/skm/test_build_skm.py index 45eaad4d9..56499ba94 100644 --- a/tests/skm/test_build_skm.py +++ b/tests/skm/test_build_skm.py @@ -75,6 +75,7 @@ def test_basics(lgnd_test_data, tmptestdir): assert (vov_eid[:, 1] == df.energy_id_1.to_numpy()).all() assert (vov_eid[:, 2] == df.energy_id_2.to_numpy()).all() + def test_df_to_table_conversion(lgnd_test_data, tmptestdir): outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" @@ -118,10 +119,11 @@ def test_df_to_table_conversion(lgnd_test_data, tmptestdir): assert os.path.exists(skm_out) assert os.path.exists(skm_out2) df = pd.read_hdf(skm_out) - tbl = store.read("/skm/",skm_out2)[0].view_as("pd") - assert isinstance(tbl,pd.DataFrame) + tbl = store.read("/skm/", skm_out2)[0].view_as("pd") + assert isinstance(tbl, pd.DataFrame) assert df.reset_index().equals(tbl) + def test_attribute_passing(lgnd_test_data, tmptestdir): outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" @@ -141,7 +143,7 @@ def test_attribute_passing(lgnd_test_data, tmptestdir): skm_conf = f"{config_dir}/basic-skm-config.json" skm_out = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.lh5" - + build_skm( outfile, lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), @@ -155,9 +157,4 @@ def test_attribute_passing(lgnd_test_data, tmptestdir): assert os.path.exists(skm_out) assert "info" in store.read("/skm/timestamp", skm_out)[0].getattrs().keys() - assert ( - store.read("/skm/timestamp", skm_out)[0].getattrs()["info"] - == "pk was here" - ) - - + assert store.read("/skm/timestamp", skm_out)[0].getattrs()["info"] == "pk was here" From b4672eea161973ddeff70bbbf92d8c3131b20133 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Fri, 19 Jan 2024 19:33:12 +0100 Subject: [PATCH 150/191] Friday evening changes --- src/pygama/evt/build_evt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 6db40d5d8..039cc756f 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -935,10 +935,10 @@ def evaluate_to_vector( nrows, ).view_as("np") if "ascend_by" == md: - out[np.arange(len(out))[:, None], np.argsort(s_val)] + out = out[np.arange(len(out))[:, None], np.argsort(s_val)] elif "descend_by" == md: - out[np.arange(len(out))[:, None], np.argsort(-s_val)] + out = out[np.arange(len(out))[:, None], np.argsort(-s_val)] else: raise ValueError( "sorter values can only have 'ascend_by' or 'descend_by' prefixes" From b5240f4a558b8642575aeb0b53478d94b0aa3303 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Mon, 22 Jan 2024 18:39:13 +0100 Subject: [PATCH 151/191] allow passing of env vars in legend meta module --- src/pygama/evt/modules/legend.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pygama/evt/modules/legend.py b/src/pygama/evt/modules/legend.py index f2f8137ef..2ee2d7e8e 100644 --- a/src/pygama/evt/modules/legend.py +++ b/src/pygama/evt/modules/legend.py @@ -3,13 +3,15 @@ """ from importlib import import_module +from lgdo.lh5 import utils + def metadata(params: dict) -> list: # only import legend meta data when needed. # LEGEND collaborators can use the meta keyword # While for users w/o access to the LEGEND meta data this is still working lm = import_module("legendmeta") - lmeta = lm.LegendMetadata(path=params["meta_path"]) + lmeta = lm.LegendMetadata(path=utils.expand_path(params["meta_path"])) chmap = lmeta.channelmap(params["time_key"]) tmp = [ From a7856e5867e48678063cd5c82d1ee16929854375 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 23 Jan 2024 11:40:21 +0100 Subject: [PATCH 152/191] add pyarrow dependence --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index 6582215a1..74c036924 100644 --- a/setup.cfg +++ b/setup.cfg @@ -42,6 +42,7 @@ install_requires = numpy>=1.21 pandas>=1.4.4 pint + pyarrow scikit-learn scipy>=1.0.1 tables From ccc1b71b31269bf3464430ef9c6329c65c9de06f Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 23 Jan 2024 12:17:09 +0100 Subject: [PATCH 153/191] error parameter deprecation in pandas 2.2 to_numeric function --- src/pygama/flow/file_db.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/pygama/flow/file_db.py b/src/pygama/flow/file_db.py index 4047f8c97..66545c419 100644 --- a/src/pygama/flow/file_db.py +++ b/src/pygama/flow/file_db.py @@ -272,7 +272,10 @@ def scan_files(self, dirs: list[str] = None) -> None: # convert cols to numeric dtypes where possible for col in self.df.columns: - self.df[col] = pd.to_numeric(self.df[col], errors="ignore") + try: + self.df[col] = pd.to_numeric(self.df[col]) + except ValueError: + continue # sort rows according to timestamps utils.inplace_sort(self.df, self.sortby) @@ -669,7 +672,10 @@ def scan_daq_files(self, daq_dir: str, daq_template: str) -> None: # convert cols to numeric dtypes where possible for col in self.df.columns: - self.df[col] = pd.to_numeric(self.df[col], errors="ignore") + try: + self.df[col] = pd.to_numeric(self.df[col]) + except ValueError: + continue def get_table_name(self, tier: str, tb: str) -> str: """Get the table name for a tier given its table identifier. From 56ec1d4089ac1428f1baff0d9078973b76ce2196 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 23 Jan 2024 14:41:30 +0100 Subject: [PATCH 154/191] removed other output format options than lh5 from build_skm --- src/pygama/skm/build_skm.py | 47 +++----------------- tests/skm/configs/basic-skm-config.json | 1 - tests/skm/test_build_skm.py | 57 ++----------------------- 3 files changed, 8 insertions(+), 97 deletions(-) diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index 0a2965493..049012985 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -25,7 +25,6 @@ def build_skm( f_skm: str, skm_conf: dict | str, wo_mode="w", - skim_format: str = "parquet", group: str = "/skm/", ) -> None: """Builds a skimmed file from a (set) of evt/hit/dsp tier file(s). @@ -50,8 +49,6 @@ def build_skm( - ``postfixes`` list of postfixes must be list of ``len(multiplicity)``. If not given, numbers from 0 to ``multiplicity -1`` are used - - ``index_field`` sets the index of the output table. If not given - the index are set es increasing integers. - ``operations`` are forwarded from lower tiers and clipped/padded according to ``missing_value`` if needed. If the forwarded field is not an evt tier, ``tcm_idx`` must be passed that specifies the @@ -64,7 +61,6 @@ def build_skm( { "multiplicity": 2, "postfixes":["","aux"], - "index_field": "timestamp", "operations": { "timestamp":{ "forward_field": "evt.timestamp" @@ -93,14 +89,12 @@ def build_skm( - ``append`` or ``a``: append to file. - ``overwrite`` or ``o``: replaces existing file. - skim_format - data format of the skimmed output (``hdf``, ``lh5`` or ``parquet``). group LH5 root group name (only used if ``skim_format`` is ``lh5``). """ f_dict = {"evt": f_evt, "hit": f_hit, "dsp": f_dsp, "tcm": f_tcm} log = logging.getLogger(__name__) - log.debug(f"I am skimning {len(f_evt) if isinstance(f_evt,list) else 1} files") + log.debug(f"I am skimming {len(f_evt) if isinstance(f_evt,list) else 1} files") tbl_cfg = skm_conf if not isinstance(tbl_cfg, (str, dict)): @@ -215,42 +209,11 @@ def build_skm( table.add_field(op, obj, True) # last thing missing is writing it out + if wo_mode not in ["w", "write_safe", "o", "overwrite", "a", "append"]: + raise ValueError(f"wo_mode {wo_mode} not valid.") log.debug("saving skm file") - if skim_format not in ["parquet", "hdf", "lh5"]: - raise ValueError( - "Not supported skim data format. Operations are hdf, lh5, parquet" - ) - if (wo_mode in ["w", "write_safe"]) and os.path.exists(f_skm): raise FileExistsError(f"Write_safe mode: {f_skm} exists.") - if skim_format in ["hdf", "parquet"]: - df = table.view_as("pd") - # Set an index column if specified - if "index_field" in tbl_cfg.keys(): - log.debug("Setting index") - if tbl_cfg["index_field"] in df.keys(): - df = df.set_index(tbl_cfg["index_field"]) - else: - raise ValueError( - "index field not found. Needs to be a previously defined skm field" - ) - - if "hdf" == skim_format: - if wo_mode in ["w", "write_safe", "o", "overwrite"]: - df.to_hdf(f_skm, key="df", mode="w") - elif wo_mode in ["a", "append"]: - df.to_hdf(f_skm, key="df", mode="a") - - elif "parquet" == skim_format: - if wo_mode in ["w", "write_safe", "o", "overwrite"]: - df.to_parquet(f_skm) - elif wo_mode in ["a", "append"]: - df.to_parquet(f_skm, append=True) - - elif "lh5" == skim_format: - wo = wo_mode if wo_mode not in ["o", "overwrite"] else "of" - store.write(obj=table, name=group, lh5_file=f_skm, wo_mode=wo) - - else: - raise ValueError(f"wo_mode {wo_mode} not valid.") + wo = wo_mode if wo_mode not in ["o", "overwrite"] else "of" + store.write(obj=table, name=group, lh5_file=f_skm, wo_mode=wo) diff --git a/tests/skm/configs/basic-skm-config.json b/tests/skm/configs/basic-skm-config.json index feb29bc17..8037b21bf 100644 --- a/tests/skm/configs/basic-skm-config.json +++ b/tests/skm/configs/basic-skm-config.json @@ -1,6 +1,5 @@ { "multiplicity": 3, - "index_field": "timestamp", "operations": { "timestamp": { "forward_field": "evt.timestamp", diff --git a/tests/skm/test_build_skm.py b/tests/skm/test_build_skm.py index 56499ba94..6957e3333 100644 --- a/tests/skm/test_build_skm.py +++ b/tests/skm/test_build_skm.py @@ -3,7 +3,6 @@ import awkward as ak import numpy as np -import pandas as pd from lgdo.lh5 import LH5Store from pygama.evt import build_evt @@ -31,7 +30,7 @@ def test_basics(lgnd_test_data, tmptestdir): ) skm_conf = f"{config_dir}/basic-skm-config.json" - skm_out = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.parquet" + skm_out = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.lh5" build_skm( outfile, lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), @@ -40,12 +39,11 @@ def test_basics(lgnd_test_data, tmptestdir): skm_out, skm_conf, wo_mode="o", - skim_format="hdf", ) assert os.path.exists(skm_out) - df = pd.read_hdf(skm_out) - assert df.index.name == "timestamp" + df = store.read("/skm/", skm_out)[0].view_as("pd") + assert "timestamp" in df.keys() assert "energy_0" in df.keys() assert "energy_1" in df.keys() assert "energy_2" in df.keys() @@ -76,54 +74,6 @@ def test_basics(lgnd_test_data, tmptestdir): assert (vov_eid[:, 2] == df.energy_id_2.to_numpy()).all() -def test_df_to_table_conversion(lgnd_test_data, tmptestdir): - outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" - tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" - if os.path.exists(outfile): - os.remove(outfile) - build_evt( - f_tcm=lgnd_test_data.get_path(tcm_path), - f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), - f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), - f_evt=outfile, - evt_config=f"{evt_config_dir}/vov-test-evt-config.json", - wo_mode="o", - group="/evt/", - tcm_group="hardware_tcm_1", - ) - - skm_conf = f"{config_dir}/basic-skm-config.json" - skm_out = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.parquet" - skm_out2 = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.lh5" - build_skm( - outfile, - lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), - lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), - lgnd_test_data.get_path(tcm_path), - skm_out, - skm_conf, - wo_mode="o", - skim_format="hdf", - ) - build_skm( - outfile, - lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), - lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), - lgnd_test_data.get_path(tcm_path), - skm_out2, - skm_conf, - wo_mode="o", - skim_format="lh5", - ) - - assert os.path.exists(skm_out) - assert os.path.exists(skm_out2) - df = pd.read_hdf(skm_out) - tbl = store.read("/skm/", skm_out2)[0].view_as("pd") - assert isinstance(tbl, pd.DataFrame) - assert df.reset_index().equals(tbl) - - def test_attribute_passing(lgnd_test_data, tmptestdir): outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" @@ -152,7 +102,6 @@ def test_attribute_passing(lgnd_test_data, tmptestdir): skm_out, skm_conf, wo_mode="o", - skim_format="lh5", ) assert os.path.exists(skm_out) From a8c8393e921ecdb8a3fe0b516b7173e91723fbde Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 23 Jan 2024 16:58:26 +0100 Subject: [PATCH 155/191] spm module cleanup --- src/pygama/evt/modules/spm.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index a4020548c..c0df03470 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -9,7 +9,6 @@ additional parameters are free to the user and need to be defined in the JSON """ -import warnings import awkward as ak import numpy as np @@ -247,11 +246,6 @@ def get_etc( trail, min_first_pls_ene, ) -> Array: - # ignore stupid numpy warnings - warnings.filterwarnings("ignore", r"All-NaN slice encountered") - warnings.filterwarnings("ignore", r"invalid value encountered in true_divide") - warnings.filterwarnings("ignore", r"invalid value encountered in divide") - # load TCM data to define an event store = LH5Store() ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") From 18df7b96201266a631639efbe69796de5eadfcc2 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 23 Jan 2024 18:54:36 +0100 Subject: [PATCH 156/191] making channel table names agnostic --- src/pygama/evt/build_evt.py | 111 ++++++++++++++++++++++++++++++++---- tests/evt/test_build_evt.py | 24 ++++++++ 2 files changed, 124 insertions(+), 11 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 039cc756f..d13fb49af 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -19,6 +19,23 @@ log = logging.getLogger(__name__) +def get_tcm_id_by_pattern(tcm_id_table_pattern: str, ch: str) -> int: + pre = tcm_id_table_pattern.split("{")[0] + post = tcm_id_table_pattern.split("}")[1] + return int(ch.strip(pre).strip(post)) + + +def get_table_name_by_pattern(tcm_id_table_pattern: str, ch_id: int) -> str: + # check tcm_id_table_pattern validity + pattern_check = re.findall(r"{([^}]*?)}", tcm_id_table_pattern)[0] + if pattern_check == "" or ":" == pattern_check[0]: + return tcm_id_table_pattern.format(ch_id) + else: + raise NotImplementedError( + "Only empty placeholders with format specifications are currently implemented" + ) + + def num_and_pars(value: str, par_dic: dict): # function tries to convert a string to a int, float, bool # or returns the value if value is a key in par_dic @@ -51,6 +68,7 @@ def evaluate_expression( qry: str = None, defv: bool | int | float = np.nan, sorter: str = None, + tcm_id_table_pattern: str = "ch{}", ) -> Array | ArrayOfEqualSizedArrays | VectorOfVectors: """Evaluates the expression defined by the user across all channels according to the mode. @@ -107,6 +125,9 @@ def evaluate_expression( sorter can be used to sort vector outputs according to sorter expression (see :func:`evaluate_to_vector`). + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. """ store = LH5Store() @@ -186,6 +207,7 @@ def evaluate_expression( ch_comp, var_ph, defv, + tcm_id_table_pattern, ) elif isinstance(ch_comp, VectorOfVectors): return evaluate_at_channel_vov( @@ -199,6 +221,7 @@ def evaluate_expression( chns_rm, var_ph, defv, + tcm_id_table_pattern, ) else: raise NotImplementedError( @@ -226,6 +249,7 @@ def evaluate_expression( var_ph, defv, is_first=True if "first_at:" in mode else False, + tcm_id_table_pattern=tcm_id_table_pattern, ) elif mode in ["sum", "any", "all"]: return evaluate_to_scalar( @@ -242,6 +266,7 @@ def evaluate_expression( nrows, var_ph, defv, + tcm_id_table_pattern, ) elif "gather" == mode: return evaluate_to_vector( @@ -258,6 +283,7 @@ def evaluate_expression( var_ph, defv, sorter, + tcm_id_table_pattern, ) else: raise ValueError(mode + " not a valid mode") @@ -318,6 +344,7 @@ def get_data_at_channel( f_hit: str, f_dsp: str, defv, + tcm_id_table_pattern: str = "ch{}", ) -> np.ndarray: """Evaluates an expression and returns the result. @@ -344,18 +371,23 @@ def get_data_at_channel( path to `dsp` tier file. defv default value. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. """ # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] + idx_ch = idx[ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] outsize = len(idx_ch) if not is_evaluated: res = np.full(outsize, defv, dtype=type(defv)) elif "tcm.array_id" == expr: - res = np.full(outsize, int(ch[2:]), dtype=int) + res = np.full( + outsize, get_tcm_id_by_pattern(tcm_id_table_pattern, ch), dtype=int + ) elif "tcm.index" == expr: - res = np.where(ids == int(ch[2:]))[0] + res = np.where(ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch))[0] else: var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) @@ -459,6 +491,7 @@ def evaluate_to_first_or_last( var_ph: dict = None, defv: bool | int | float = np.nan, is_first: bool = True, + tcm_id_table_pattern: str = "ch{}", ) -> Array: """Aggregates across channels by returning the expression of the channel with value of `sorter`. @@ -493,6 +526,9 @@ def evaluate_to_first_or_last( default value. is_first defines if sorted by smallest or largest value of `sorter` + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. """ # define dimension of output array @@ -503,7 +539,7 @@ def evaluate_to_first_or_last( for ch in chns: # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] + idx_ch = idx[ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] # evaluate at channel res = get_data_at_channel( @@ -517,6 +553,7 @@ def evaluate_to_first_or_last( f_hit, f_dsp, defv, + tcm_id_table_pattern, ) # get mask from query @@ -560,6 +597,7 @@ def evaluate_to_scalar( nrows: int, var_ph: dict = None, defv: bool | int | float = np.nan, + tcm_id_table_pattern: str = "ch{}", ) -> Array: """Aggregates by summation across channels. @@ -591,6 +629,9 @@ def evaluate_to_scalar( dictionary of evt and additional parameters and their values. defv default value. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. """ # define dimension of output array @@ -598,7 +639,7 @@ def evaluate_to_scalar( for ch in chns: # get index list for this channel to be loaded - idx_ch = idx[ids == int(ch[2:])] + idx_ch = idx[ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] res = get_data_at_channel( ch, @@ -611,6 +652,7 @@ def evaluate_to_scalar( f_hit, f_dsp, defv, + tcm_id_table_pattern, ) # get mask from query @@ -644,6 +686,7 @@ def evaluate_at_channel( ch_comp: Array, var_ph: dict = None, defv: bool | int | float = np.nan, + tcm_id_table_pattern: str = "ch{}", ) -> Array: """Aggregates by evaluating the expression at a given channel. @@ -669,26 +712,30 @@ def evaluate_at_channel( dictionary of `evt` and additional parameters and their values. defv default value. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. """ out = np.full(len(ch_comp.nda), defv, dtype=type(defv)) for ch in np.unique(ch_comp.nda.astype(int)): # skip default value - if f"ch{ch}" not in lh5.ls(f_hit): + if get_table_name_by_pattern(tcm_id_table_pattern, ch) not in lh5.ls(f_hit): continue idx_ch = idx[ids == ch] res = get_data_at_channel( - f"ch{ch}", + get_table_name_by_pattern(tcm_id_table_pattern, ch), ids, idx, expr, exprl, var_ph, - f"ch{ch}" not in chns_rm, + get_table_name_by_pattern(tcm_id_table_pattern, ch) not in chns_rm, f_hit, f_dsp, defv, + tcm_id_table_pattern, ) out[idx_ch] = np.where(ch == ch_comp.nda[idx_ch], res, out[idx_ch]) @@ -707,6 +754,7 @@ def evaluate_at_channel_vov( chns_rm: list, var_ph: dict = None, defv: bool | int | float = np.nan, + tcm_id_table_pattern: str = "ch{}", ) -> VectorOfVectors: """Same as :func:`evaluate_at_channel` but evaluates expression at non flat channels :class:`.VectorOfVectors`. @@ -733,6 +781,9 @@ def evaluate_at_channel_vov( dictionary of `evt` and additional parameters and their values. defv default value. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. """ # blow up vov to aoesa @@ -745,16 +796,17 @@ def evaluate_at_channel_vov( for ch in chns: idx_ch = idx[ids == ch] res = get_data_at_channel( - f"ch{ch}", + get_table_name_by_pattern(tcm_id_table_pattern, ch), ids, idx, expr, exprl, var_ph, - f"ch{ch}" not in chns_rm, + get_table_name_by_pattern(tcm_id_table_pattern, ch) not in chns_rm, f_hit, f_dsp, defv, + tcm_id_table_pattern, ) # see in which events the current channel is present @@ -786,6 +838,7 @@ def evaluate_to_aoesa( var_ph: dict = None, defv: bool | int | float = np.nan, missv=np.nan, + tcm_id_table_pattern: str = "ch{}", ) -> ArrayOfEqualSizedArrays: """Aggregates by returning an :class:`.ArrayOfEqualSizedArrays` of evaluated expressions of channels that fulfill a query expression. @@ -822,13 +875,16 @@ def evaluate_to_aoesa( missing value. sorter sorts the entries in the vector according to sorter expression. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. """ # define dimension of output array out = np.full((nrows, len(chns)), missv) i = 0 for ch in chns: - idx_ch = idx[ids == int(ch[2:])] + idx_ch = idx[ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] res = get_data_at_channel( ch, ids, @@ -840,6 +896,7 @@ def evaluate_to_aoesa( f_hit, f_dsp, defv, + tcm_id_table_pattern, ) # get mask from query @@ -866,6 +923,7 @@ def evaluate_to_vector( var_ph: dict = None, defv: bool | int | float = np.nan, sorter: str = None, + tcm_id_table_pattern: str = "ch{}", ) -> VectorOfVectors: """Aggregates by returning a :class:`.VectorOfVector` of evaluated expressions of channels that fulfill a query expression. @@ -902,6 +960,9 @@ def evaluate_to_vector( sorts the entries in the vector according to sorter expression. ``ascend_by:`` results in an vector ordered ascending, ``decend_by:`` sorts descending. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. """ out = evaluate_to_aoesa( idx, @@ -917,6 +978,7 @@ def evaluate_to_vector( var_ph, defv, np.nan, + tcm_id_table_pattern, ).view_as("np") # if a sorter is given sort accordingly @@ -933,6 +995,7 @@ def evaluate_to_vector( [tuple(fld.split("."))], None, nrows, + tcm_id_table_pattern=tcm_id_table_pattern, ).view_as("np") if "ascend_by" == md: out = out[np.arange(len(out))[:, None], np.argsort(s_val)] @@ -958,6 +1021,7 @@ def build_evt( wo_mode: str = "write_safe", group: str = "/evt/", tcm_group: str = "/hardware_tcm_1/", + tcm_id_table_pattern: str = "ch{}", ) -> None: """Transform data from the `hit` and `dsp` levels which a channel sorted to a event sorted data format. @@ -1040,6 +1104,9 @@ def build_evt( LH5 root group name. tcm_group LH5 root group in tcm file. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. """ store = LH5Store() tbl_cfg = evt_config @@ -1054,6 +1121,28 @@ def build_evt( if "operations" not in tbl_cfg.keys(): raise ValueError("operations field needs to be specified in the config") + # check tcm_id_table_pattern validity + pattern_check = re.findall(r"{([^}]*?)}", tcm_id_table_pattern) + if len(pattern_check) != 1: + raise ValueError( + f"tcm_id_table_pattern must have exactly one placeholder. {tcm_id_table_pattern} is invalid." + ) + elif "{" in pattern_check[0] or "}" in pattern_check[0]: + raise ValueError( + f"tcm_id_table_pattern {tcm_id_table_pattern} has an invalid placeholder." + ) + + if ( + get_table_name_by_pattern( + tcm_id_table_pattern, + get_tcm_id_by_pattern(tcm_id_table_pattern, lh5.ls(f_hit)[0]), + ) + != lh5.ls(f_hit)[0] + ): + raise ValueError( + f"tcm_id_table_pattern {tcm_id_table_pattern} does not match keys in data!" + ) + # create channel list according to config # This can be either read from the meta data # or a list of channel names diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 2a7269e9d..ae7570f9f 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -274,3 +274,27 @@ def test_vector_sort(lgnd_test_data, tmptestdir): vov_t0, _ = store.read("/evt/t0_decend", outfile) nda_t0 = vov_t0.to_aoesa().view_as("np") assert ((np.diff(nda_t0) <= 0) | (np.isnan(np.diff(nda_t0)))).all() + + +def test_tcm_id_table_pattern(lgnd_test_data, tmptestdir): + outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5" + tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" + if os.path.exists(outfile): + os.remove(outfile) + f_tcm = lgnd_test_data.get_path(tcm_path) + f_dsp = lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")) + f_hit = lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")) + f_config = f"{config_dir}/basic-evt-config.json" + + with pytest.raises(ValueError): + build_evt(f_tcm, f_dsp, f_hit, outfile, f_config, tcm_id_table_pattern="ch{{}}") + with pytest.raises(ValueError): + build_evt(f_tcm, f_dsp, f_hit, outfile, f_config, tcm_id_table_pattern="ch{}{}") + with pytest.raises(NotImplementedError): + build_evt( + f_tcm, f_dsp, f_hit, outfile, f_config, tcm_id_table_pattern="ch{tcm_id}" + ) + with pytest.raises(ValueError): + build_evt( + f_tcm, f_dsp, f_hit, outfile, f_config, tcm_id_table_pattern="apple{}banana" + ) From 513871190526ff65b6efa83c2425b861b2c1d028 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 24 Jan 2024 01:55:22 +0100 Subject: [PATCH 157/191] handle divide by 0 warnings correctly --- src/pygama/evt/modules/spm.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index c0df03470..90033209e 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -296,11 +296,17 @@ def get_etc( mask_total = time_all > tge mask_singlet = (time_all > tge) & (time_all < tge + swin) - pe_singlet = ak.nansum(pe_all[mask_singlet], axis=-1) - pe_total = ak.nansum(pe_all[mask_total], axis=-1) - etc = ak.where(pe_total > 0, pe_singlet / pe_total, np.nan) + pe_singlet = ak.to_numpy( + ak.fill_none(ak.nansum(pe_all[mask_singlet], axis=-1), 0), allow_missing=False + ) + pe_total = ak.to_numpy( + ak.fill_none(ak.nansum(pe_all[mask_total], axis=-1), 0), allow_missing=False + ) + etc = np.divide( + pe_singlet, pe_total, out=np.full_like(pe_total, np.nan), where=pe_total != 0 + ) - return Array(nda=ak.to_numpy(ak.fill_none(etc, np.nan), allow_missing=False)) + return Array(nda=etc) # returns relative time shift of the first LAr pulse relative to the Ge trigger From 8fcd1fc2d722e1f71d6ceab211de318fe4dbd8a8 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 24 Jan 2024 14:22:19 +0100 Subject: [PATCH 158/191] become agnostic to group nameing --- src/pygama/evt/build_evt.py | 591 +++++++++++++++++++++++----------- src/pygama/evt/modules/spm.py | 207 +++++++++--- tests/evt/test_build_evt.py | 23 +- tests/skm/test_build_skm.py | 8 +- 4 files changed, 594 insertions(+), 235 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index d13fb49af..2cb54b2fe 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -69,6 +69,10 @@ def evaluate_expression( defv: bool | int | float = np.nan, sorter: str = None, tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", + tcm_group: str = "tcm", ) -> Array | ArrayOfEqualSizedArrays | VectorOfVectors: """Evaluates the expression defined by the user across all channels according to the mode. @@ -128,12 +132,22 @@ def evaluate_expression( tcm_id_table_pattern Pattern to format tcm id values to table name in higher tiers. Must have one placeholder which is the tcm id. + evt group + LH5 root group name of evt tier. + tcm_group + LH5 root group in tcm file. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. """ store = LH5Store() # find parameters in evt file or in parameters - exprl = re.findall(r"(evt|hit|dsp).([a-zA-Z_$][\w$]*)", expr) + exprl = re.findall( + rf"({evt_group}|{hit_group}|{dsp_group}).([a-zA-Z_$][\w$]*)", expr + ) var_ph = {} if table: var_ph = var_ph | { @@ -148,11 +162,19 @@ def evaluate_expression( # evaluate expression func, params = expr.split("(") params = ( - params.replace("dsp.", "dsp_").replace("hit.", "hit_").replace("evt.", "") + params.replace(f"{dsp_group}.", f"{dsp_group}_") + .replace(f"{hit_group}.", f"{hit_group}_") + .replace(f"{evt_group}.", "") ) - params = [f_hit, f_dsp, f_tcm, [x for x in chns if x not in chns_rm]] + [ - num_and_pars(e, var_ph) for e in params[:-1].split(",") - ] + params = [ + f_hit, + f_dsp, + f_tcm, + hit_group, + dsp_group, + tcm_group, + [x for x in chns if x not in chns_rm], + ] + [num_and_pars(e, var_ph) for e in params[:-1].split(",")] # load function dynamically p, m = func.rsplit(".", 1) @@ -163,23 +185,27 @@ def evaluate_expression( # check if query is either on channel basis or evt basis (and not a mix) qry_mask = qry if qry is not None: - if "evt." in qry and ("hit." in qry or "dsp." in qry): - raise ValueError("Query can't be a mix of evt tier and lower tiers.") + if f"{evt_group}." in qry and ( + f"{hit_group}." in qry or f"{dsp_group}." in qry + ): + raise ValueError( + f"Query can't be a mix of {evt_group} tier and lower tiers." + ) # if it is an evt query we can evaluate it directly here - if table and "evt." in qry: - qry_mask = eval(qry.replace("evt.", ""), table) + if table and f"{evt_group}." in qry: + qry_mask = eval(qry.replace(f"{evt_group}.", ""), table) # load TCM data to define an event - ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") - idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") + ids = store.read(f"/{tcm_group}/array_id", f_tcm)[0].view_as("np") + idx = store.read(f"/{tcm_group}/array_idx", f_tcm)[0].view_as("np") # switch through modes if table and (("keep_at_ch:" == mode[:11]) or ("keep_at_idx:" == mode[:12])): if "keep_at_ch:" == mode[:11]: - ch_comp = table[mode[11:].replace("evt.", "")] + ch_comp = table[mode[11:].replace(f"{evt_group}.", "")] else: - ch_comp = table[mode[12:].replace("evt.", "")] + ch_comp = table[mode[12:].replace(f"{evt_group}.", "")] if isinstance(ch_comp, Array): ch_comp = Array(nda=ids[ch_comp.view_as("np")]) elif isinstance(ch_comp, VectorOfVectors): @@ -197,31 +223,37 @@ def evaluate_expression( if isinstance(ch_comp, Array): return evaluate_at_channel( - idx, - ids, - f_hit, - f_dsp, - chns_rm, - expr, - exprl, - ch_comp, - var_ph, - defv, - tcm_id_table_pattern, + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + chns_rm=chns_rm, + expr=expr, + exprl=exprl, + ch_comp=ch_comp, + var_ph=var_ph, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) elif isinstance(ch_comp, VectorOfVectors): return evaluate_at_channel_vov( - idx, - ids, - f_hit, - f_dsp, - expr, - exprl, - ch_comp, - chns_rm, - var_ph, - defv, - tcm_id_table_pattern, + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + expr=expr, + exprl=exprl, + ch_comp=ch_comp, + chns_rm=chns_rm, + var_ph=var_ph, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) else: raise NotImplementedError( @@ -231,59 +263,69 @@ def evaluate_expression( elif "first_at:" in mode or "last_at:" in mode: sorter = tuple( re.findall( - r"(evt|hit|dsp).([a-zA-Z_$][\w$]*)", mode.split("first_at:")[-1] + rf"({evt_group}|{hit_group}|{dsp_group}).([a-zA-Z_$][\w$]*)", + mode.split("first_at:")[-1], )[0] ) return evaluate_to_first_or_last( - idx, - ids, - f_hit, - f_dsp, - chns, - chns_rm, - expr, - exprl, - qry_mask, - nrows, - sorter, - var_ph, - defv, + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + chns=chns, + chns_rm=chns_rm, + expr=expr, + exprl=exprl, + qry=qry_mask, + nrows=nrows, + sorter=sorter, + var_ph=var_ph, + defv=defv, is_first=True if "first_at:" in mode else False, tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) elif mode in ["sum", "any", "all"]: return evaluate_to_scalar( - mode, - idx, - ids, - f_hit, - f_dsp, - chns, - chns_rm, - expr, - exprl, - qry_mask, - nrows, - var_ph, - defv, - tcm_id_table_pattern, + mode=mode, + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + chns=chns, + chns_rm=chns_rm, + expr=expr, + exprl=exprl, + qry=qry_mask, + nrows=nrows, + var_ph=var_ph, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) elif "gather" == mode: return evaluate_to_vector( - idx, - ids, - f_hit, - f_dsp, - chns, - chns_rm, - expr, - exprl, - qry_mask, - nrows, - var_ph, - defv, - sorter, - tcm_id_table_pattern, + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + chns=chns, + chns_rm=chns_rm, + expr=expr, + exprl=exprl, + qry=qry_mask, + nrows=nrows, + var_ph=var_ph, + defv=defv, + sorter=sorter, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) else: raise ValueError(mode + " not a valid mode") @@ -295,6 +337,8 @@ def find_parameters( ch: str, idx_ch: NDArray, exprl: list, + hit_group: str = "hit", + dsp_group: str = "dsp", ) -> dict: """Wraps :func:`load_vars_to_nda` to return parameters from `hit` and `dsp` tiers. @@ -311,24 +355,32 @@ def find_parameters( index array of entries to be read from files. exprl list of tuples ``(tier, field)`` to be found in the `hit/dsp` tiers. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. """ # find fields in either dsp, hit - dsp_flds = [e[1] for e in exprl if e[0] == "dsp"] - hit_flds = [e[1] for e in exprl if e[0] == "hit"] + dsp_flds = [e[1] for e in exprl if e[0] == dsp_group] + hit_flds = [e[1] for e in exprl if e[0] == hit_group] store = LH5Store() hit_dict, dsp_dict = {}, {} if len(hit_flds) > 0: hit_ak = store.read( - f"{ch.replace('/','')}/hit/", f_hit, field_mask=hit_flds, idx=idx_ch + f"{ch.replace('/','')}/{hit_group}/", f_hit, field_mask=hit_flds, idx=idx_ch )[0].view_as("ak") - hit_dict = dict(zip(["hit_" + e for e in ak.fields(hit_ak)], ak.unzip(hit_ak))) + hit_dict = dict( + zip([f"{hit_group}_" + e for e in ak.fields(hit_ak)], ak.unzip(hit_ak)) + ) if len(dsp_flds) > 0: dsp_ak = store.read( - f"{ch.replace('/','')}/dsp/", f_dsp, field_mask=dsp_flds, idx=idx_ch + f"{ch.replace('/','')}/{dsp_group}/", f_dsp, field_mask=dsp_flds, idx=idx_ch )[0].view_as("ak") - dsp_dict = dict(zip(["dsp_" + e for e in ak.fields(dsp_ak)], ak.unzip(dsp_ak))) + dsp_dict = dict( + zip([f"{dsp_group}_" + e for e in ak.fields(dsp_ak)], ak.unzip(dsp_ak)) + ) return hit_dict | dsp_dict @@ -345,6 +397,9 @@ def get_data_at_channel( f_dsp: str, defv, tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", ) -> np.ndarray: """Evaluates an expression and returns the result. @@ -374,6 +429,12 @@ def get_data_at_channel( tcm_id_table_pattern Pattern to format tcm id values to table name in higher tiers. Must have one placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. """ # get index list for this channel to be loaded @@ -389,7 +450,15 @@ def get_data_at_channel( elif "tcm.index" == expr: res = np.where(ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch))[0] else: - var = find_parameters(f_hit, f_dsp, ch, idx_ch, exprl) + var = find_parameters( + f_hit=f_hit, + f_dsp=f_dsp, + ch=ch, + idx_ch=idx_ch, + exprl=exprl, + hit_group=hit_group, + dsp_group=dsp_group, + ) if var_ph is not None: var = var | var_ph @@ -397,7 +466,9 @@ def get_data_at_channel( # evaluate expression # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) res = eval( - expr.replace("dsp.", "dsp_").replace("hit.", "hit_").replace("evt.", ""), + expr.replace(f"{dsp_group}.", f"{dsp_group}_") + .replace(f"{hit_group}.", f"{hit_group}_") + .replace(f"{evt_group}.", ""), var, ) @@ -426,6 +497,8 @@ def get_mask_from_query( idx_ch: NDArray, f_hit: str, f_dsp: str, + hit_group: str = "hit", + dsp_group: str = "dsp", ) -> np.ndarray: """Evaluates a query expression and returns a mask accordingly. @@ -443,13 +516,30 @@ def get_mask_from_query( path to `hit` tier file. f_dsp path to `dsp` tier file. + hit_group + LH5 root group in hit file. + dsp_group + LH5 root group in dsp file. """ # get sub evt based query condition if needed if isinstance(qry, str): qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) - qry_var = find_parameters(f_hit, f_dsp, ch, idx_ch, qry_lst) - limarr = eval(qry.replace("dsp.", "dsp_").replace("hit.", "hit_"), qry_var) + qry_var = find_parameters( + f_hit=f_hit, + f_dsp=f_dsp, + ch=ch, + idx_ch=idx_ch, + exprl=qry_lst, + hit_group=hit_group, + dsp_group=dsp_group, + ) + limarr = eval( + qry.replace(f"{dsp_group}.", f"{dsp_group}_").replace( + f"{hit_group}.", f"{hit_group}_" + ), + qry_var, + ) # in case the expression evaluates to a single value blow it up if (not hasattr(limarr, "__len__")) or (isinstance(limarr, str)): @@ -492,6 +582,9 @@ def evaluate_to_first_or_last( defv: bool | int | float = np.nan, is_first: bool = True, tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", ) -> Array: """Aggregates across channels by returning the expression of the channel with value of `sorter`. @@ -529,6 +622,12 @@ def evaluate_to_first_or_last( tcm_id_table_pattern Pattern to format tcm id values to table name in higher tiers. Must have one placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. """ # define dimension of output array @@ -543,26 +642,38 @@ def evaluate_to_first_or_last( # evaluate at channel res = get_data_at_channel( - ch, - ids, - idx, - expr, - exprl, - var_ph, - ch not in chns_rm, - f_hit, - f_dsp, - defv, - tcm_id_table_pattern, + ch=ch, + ids=ids, + idx=idx, + expr=expr, + exprl=exprl, + var_ph=var_ph, + is_evaluated=ch not in chns_rm, + f_hit=f_hit, + f_dsp=f_dsp, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) # get mask from query - limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) + limarr = get_mask_from_query( + qry=qry, + length=len(res), + ch=ch, + idx_ch=idx_ch, + f_hit=f_hit, + f_dsp=f_dsp, + hit_group=hit_group, + dsp_group=dsp_group, + ) # find if sorter is in hit or dsp t0 = store.read( f"{ch}/{sorter[0]}/{sorter[1]}", - f_hit if "hit" == sorter[0] else f_dsp, + f_hit if f"{hit_group}" == sorter[0] else f_dsp, idx=idx_ch, )[0].view_as("np") @@ -598,6 +709,9 @@ def evaluate_to_scalar( var_ph: dict = None, defv: bool | int | float = np.nan, tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", ) -> Array: """Aggregates by summation across channels. @@ -632,6 +746,12 @@ def evaluate_to_scalar( tcm_id_table_pattern Pattern to format tcm id values to table name in higher tiers. Must have one placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. """ # define dimension of output array @@ -642,21 +762,33 @@ def evaluate_to_scalar( idx_ch = idx[ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] res = get_data_at_channel( - ch, - ids, - idx, - expr, - exprl, - var_ph, - ch not in chns_rm, - f_hit, - f_dsp, - defv, - tcm_id_table_pattern, + ch=ch, + ids=ids, + idx=idx, + expr=expr, + exprl=exprl, + var_ph=var_ph, + is_evaluated=ch not in chns_rm, + f_hit=f_hit, + f_dsp=f_dsp, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) # get mask from query - limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) + limarr = get_mask_from_query( + qry=qry, + length=len(res), + ch=ch, + idx_ch=idx_ch, + f_hit=f_hit, + f_dsp=f_dsp, + hit_group=hit_group, + dsp_group=dsp_group, + ) # switch through modes if "sum" == mode: @@ -687,6 +819,9 @@ def evaluate_at_channel( var_ph: dict = None, defv: bool | int | float = np.nan, tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", ) -> Array: """Aggregates by evaluating the expression at a given channel. @@ -715,6 +850,12 @@ def evaluate_at_channel( tcm_id_table_pattern Pattern to format tcm id values to table name in higher tiers. Must have one placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. """ out = np.full(len(ch_comp.nda), defv, dtype=type(defv)) @@ -725,17 +866,21 @@ def evaluate_at_channel( continue idx_ch = idx[ids == ch] res = get_data_at_channel( - get_table_name_by_pattern(tcm_id_table_pattern, ch), - ids, - idx, - expr, - exprl, - var_ph, - get_table_name_by_pattern(tcm_id_table_pattern, ch) not in chns_rm, - f_hit, - f_dsp, - defv, - tcm_id_table_pattern, + ch=get_table_name_by_pattern(tcm_id_table_pattern, ch), + ids=ids, + idx=idx, + expr=expr, + exprl=exprl, + var_ph=var_ph, + is_evaluated=get_table_name_by_pattern(tcm_id_table_pattern, ch) + not in chns_rm, + f_hit=f_hit, + f_dsp=f_dsp, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) out[idx_ch] = np.where(ch == ch_comp.nda[idx_ch], res, out[idx_ch]) @@ -755,6 +900,9 @@ def evaluate_at_channel_vov( var_ph: dict = None, defv: bool | int | float = np.nan, tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", ) -> VectorOfVectors: """Same as :func:`evaluate_at_channel` but evaluates expression at non flat channels :class:`.VectorOfVectors`. @@ -784,10 +932,16 @@ def evaluate_at_channel_vov( tcm_id_table_pattern Pattern to format tcm id values to table name in higher tiers. Must have one placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. """ # blow up vov to aoesa - out = ak.Array([[] for x in range(len(ch_comp))]) + out = ak.Array([[] for _ in range(len(ch_comp))]) chns = np.unique(ch_comp.flattened_data.nda).astype(int) ch_comp = ch_comp.view_as("ak") @@ -796,17 +950,21 @@ def evaluate_at_channel_vov( for ch in chns: idx_ch = idx[ids == ch] res = get_data_at_channel( - get_table_name_by_pattern(tcm_id_table_pattern, ch), - ids, - idx, - expr, - exprl, - var_ph, - get_table_name_by_pattern(tcm_id_table_pattern, ch) not in chns_rm, - f_hit, - f_dsp, - defv, - tcm_id_table_pattern, + ch=get_table_name_by_pattern(tcm_id_table_pattern, ch), + ids=ids, + idx=idx, + expr=expr, + exprl=exprl, + var_ph=var_ph, + is_evaluated=get_table_name_by_pattern(tcm_id_table_pattern, ch) + not in chns_rm, + f_hit=f_hit, + f_dsp=f_dsp, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) # see in which events the current channel is present @@ -839,6 +997,9 @@ def evaluate_to_aoesa( defv: bool | int | float = np.nan, missv=np.nan, tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", ) -> ArrayOfEqualSizedArrays: """Aggregates by returning an :class:`.ArrayOfEqualSizedArrays` of evaluated expressions of channels that fulfill a query expression. @@ -878,6 +1039,12 @@ def evaluate_to_aoesa( tcm_id_table_pattern Pattern to format tcm id values to table name in higher tiers. Must have one placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. """ # define dimension of output array out = np.full((nrows, len(chns)), missv) @@ -886,21 +1053,33 @@ def evaluate_to_aoesa( for ch in chns: idx_ch = idx[ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] res = get_data_at_channel( - ch, - ids, - idx, - expr, - exprl, - var_ph, - ch not in chns_rm, - f_hit, - f_dsp, - defv, - tcm_id_table_pattern, + ch=ch, + ids=ids, + idx=idx, + expr=expr, + exprl=exprl, + var_ph=var_ph, + is_evaluated=ch not in chns_rm, + f_hit=f_hit, + f_dsp=f_dsp, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) # get mask from query - limarr = get_mask_from_query(qry, len(res), ch, idx_ch, f_hit, f_dsp) + limarr = get_mask_from_query( + qry=qry, + length=len(res), + ch=ch, + idx_ch=idx_ch, + f_hit=f_hit, + f_dsp=f_dsp, + hit_group=hit_group, + dsp_group=dsp_group, + ) out[idx_ch, i] = np.where(limarr, res, out[idx_ch, i]) @@ -924,6 +1103,9 @@ def evaluate_to_vector( defv: bool | int | float = np.nan, sorter: str = None, tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", ) -> VectorOfVectors: """Aggregates by returning a :class:`.VectorOfVector` of evaluated expressions of channels that fulfill a query expression. @@ -963,39 +1145,52 @@ def evaluate_to_vector( tcm_id_table_pattern Pattern to format tcm id values to table name in higher tiers. Must have one placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. """ out = evaluate_to_aoesa( - idx, - ids, - f_hit, - f_dsp, - chns, - chns_rm, - expr, - exprl, - qry, - nrows, - var_ph, - defv, - np.nan, - tcm_id_table_pattern, + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + chns=chns, + chns_rm=chns_rm, + expr=expr, + exprl=exprl, + qry=qry, + nrows=nrows, + var_ph=var_ph, + defv=defv, + missv=np.nan, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ).view_as("np") # if a sorter is given sort accordingly if sorter is not None: md, fld = sorter.split(":") s_val = evaluate_to_aoesa( - idx, - ids, - f_hit, - f_dsp, - chns, - chns_rm, - fld, - [tuple(fld.split("."))], - None, - nrows, + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + chns=chns, + chns_rm=chns_rm, + expr=fld, + exprl=[tuple(fld.split("."))], + qry=None, + nrows=nrows, + missv=np.nan, tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ).view_as("np") if "ascend_by" == md: out = out[np.arange(len(out))[:, None], np.argsort(s_val)] @@ -1019,8 +1214,10 @@ def build_evt( f_evt: str, evt_config: str | dict, wo_mode: str = "write_safe", - group: str = "/evt/", - tcm_group: str = "/hardware_tcm_1/", + evt_group: str = "evt", + tcm_group: str = "hardware_tcm_1", + dsp_group: str = "dsp", + hit_group: str = "hit", tcm_id_table_pattern: str = "ch{}", ) -> None: """Transform data from the `hit` and `dsp` levels which a channel sorted to a @@ -1100,14 +1297,19 @@ def build_evt( wo_mode writing mode. - group - LH5 root group name. + evt group + LH5 root group name of evt tier. tcm_group LH5 root group in tcm file. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. tcm_id_table_pattern Pattern to format tcm id values to table name in higher tiers. Must have one placeholder which is the tcm id. """ + store = LH5Store() tbl_cfg = evt_config if not isinstance(tbl_cfg, (str, dict)): @@ -1179,7 +1381,7 @@ def build_evt( elif isinstance(v, list): chns[k] = [e for e in v] - nrows = store.read_n_rows(f"{tcm_group}/cumulative_length", f_tcm) + nrows = store.read_n_rows(f"/{tcm_group}/cumulative_length", f_tcm) table = Table(size=nrows) @@ -1191,7 +1393,7 @@ def build_evt( var = {} if "parameters" in v.keys(): var = var | v["parameters"] - res = table.eval(v["expression"].replace("evt.", ""), var) + res = table.eval(v["expression"].replace(f"{evt_group}.", ""), var) # add attribute if present if "lgdo_attrs" in v.keys(): @@ -1235,19 +1437,24 @@ def build_evt( srter = v["sort"] obj = evaluate_expression( - f_tcm, - f_hit, - f_dsp, - chns_e, - chns_rm, - v["aggregation_mode"], - v["expression"], - nrows, - table, - pars, - qry, - defaultv, - srter, + f_tcm=f_tcm, + f_hit=f_hit, + f_dsp=f_dsp, + chns=chns_e, + chns_rm=chns_rm, + mode=v["aggregation_mode"], + expr=v["expression"], + nrows=nrows, + table=table, + para=pars, + qry=qry, + defv=defaultv, + sorter=srter, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, + tcm_group=tcm_group, ) # add attribute if present @@ -1264,7 +1471,9 @@ def build_evt( clms_to_remove = [e for e in table.keys() if e not in tbl_cfg["outputs"]] for fld in clms_to_remove: table.remove_field(fld, True) - store.write(obj=table, name=group, lh5_file=f_evt, wo_mode=wo_mode) + store.write( + obj=table, name=f"/{evt_group}/", lh5_file=f_evt, wo_mode=wo_mode + ) else: log.warning("No output fields specified, no file will be written.") diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 90033209e..b72198a6f 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -5,6 +5,9 @@ - path to the hit file - path to the dsp file - path to the tcm file +- hit LH5 root group +- dsp LH5 root group +- tcm LH5 root group - list of channels processed additional parameters are free to the user and need to be defined in the JSON """ @@ -70,12 +73,24 @@ def get_spm_mask( # mode 2 -> return rawids # mode 3 -> return tcm_idx def get_masked_tcm_idx( - f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, mode=0 + f_hit, + f_dsp, + f_tcm, + hit_group, + dsp_group, + tcm_group, + chs, + lim, + trgr, + tdefault, + tmin, + tmax, + mode=0, ) -> VectorOfVectors: # load TCM data to define an event store = LH5Store() - ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") - idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") + ids = store.read(f"/{tcm_group}/array_id", f_tcm)[0].view_as("np") + idx = store.read(f"/{tcm_group}/array_idx", f_tcm)[0].view_as("np") arr_lst = [] @@ -87,13 +102,17 @@ def get_masked_tcm_idx( for ch in chs: idx_ch = idx[ids == int(ch[2:])] - pe = store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as("np") + pe = store.read(f"{ch}/{hit_group}/energy_in_pe", f_hit, idx=idx_ch)[0].view_as( + "np" + ) tmp = np.full((np.max(idx) + 1, len(pe[0])), np.nan) tmp[idx_ch] = pe pe = ak.drop_none(ak.nan_to_none(ak.Array(tmp))) # times are in sample units - times = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as("np") + times = store.read(f"{ch}/{hit_group}/trigger_pos", f_hit, idx=idx_ch)[ + 0 + ].view_as("np") tmp = np.full((np.max(idx) + 1, len(times[0])), np.nan) tmp[idx_ch] = times times = ak.drop_none(ak.nan_to_none(ak.Array(tmp))) @@ -127,14 +146,16 @@ def get_masked_tcm_idx( return VectorOfVectors(array=ak.concatenate(arr_lst, axis=-1)) -def get_spm_ene_or_maj(f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, mode): +def get_spm_ene_or_maj( + f_hit, f_tcm, hit_group, tcm_group, chs, lim, trgr, tdefault, tmin, tmax, mode +): if mode not in ["energy_hc", "energy_dplms", "majority_hc", "majority_dplms"]: raise ValueError("Unknown mode") # load TCM data to define an event store = LH5Store() - ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") - idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") + ids = store.read(f"/{tcm_group}/array_id", f_tcm)[0].view_as("np") + idx = store.read(f"/{tcm_group}/array_idx", f_tcm)[0].view_as("np") out = np.zeros(np.max(idx) + 1) if isinstance(trgr, (float, int)): @@ -148,36 +169,36 @@ def get_spm_ene_or_maj(f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, mode) if mode in ["energy_dplms", "majority_dplms"]: pe = ak.drop_none( ak.nan_to_none( - store.read(f"{ch}/hit/energy_in_pe_dplms", f_hit, idx=idx_ch)[ - 0 - ].view_as("ak") + store.read( + f"{ch}/{hit_group}/energy_in_pe_dplms", f_hit, idx=idx_ch + )[0].view_as("ak") ) ) # times are in sample units times = ak.drop_none( ak.nan_to_none( - store.read(f"{ch}/hit/trigger_pos_dplms", f_hit, idx=idx_ch)[ - 0 - ].view_as("ak") + store.read( + f"{ch}/{hit_group}/trigger_pos_dplms", f_hit, idx=idx_ch + )[0].view_as("ak") ) ) else: pe = ak.drop_none( ak.nan_to_none( - store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as( - "ak" - ) + store.read(f"{ch}/{hit_group}/energy_in_pe", f_hit, idx=idx_ch)[ + 0 + ].view_as("ak") ) ) # times are in sample units times = ak.drop_none( ak.nan_to_none( - store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as( - "ak" - ) + store.read(f"{ch}/{hit_group}/trigger_pos", f_hit, idx=idx_ch)[ + 0 + ].view_as("ak") ) ) @@ -196,34 +217,122 @@ def get_spm_ene_or_maj(f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, mode) # get LAr energy per event over all channels -def get_energy(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: +def get_energy( + f_hit, + f_dsp, + f_tcm, + hit_group, + dsp_group, + tcm_group, + chs, + lim, + trgr, + tdefault, + tmin, + tmax, +) -> Array: return get_spm_ene_or_maj( - f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, "energy_hc" + f_hit, + f_tcm, + hit_group, + tcm_group, + chs, + lim, + trgr, + tdefault, + tmin, + tmax, + "energy_hc", ) # get LAr majority per event over all channels -def get_majority(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: +def get_majority( + f_hit, + f_dsp, + f_tcm, + hit_group, + dsp_group, + tcm_group, + chs, + lim, + trgr, + tdefault, + tmin, + tmax, +) -> Array: return get_spm_ene_or_maj( - f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, "majority_hc" + f_hit, + f_tcm, + hit_group, + tcm_group, + chs, + lim, + trgr, + tdefault, + tmin, + tmax, + "majority_hc", ) # get LAr energy per event over all channels def get_energy_dplms( - f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax + f_hit, + f_dsp, + f_tcm, + hit_group, + dsp_group, + tcm_group, + chs, + lim, + trgr, + tdefault, + tmin, + tmax, ) -> Array: return get_spm_ene_or_maj( - f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, "energy_dplms" + f_hit, + f_tcm, + hit_group, + tcm_group, + chs, + lim, + trgr, + tdefault, + tmin, + tmax, + "energy_dplms", ) # get LAr majority per event over all channels def get_majority_dplms( - f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax + f_hit, + f_dsp, + f_tcm, + hit_group, + dsp_group, + tcm_group, + chs, + lim, + trgr, + tdefault, + tmin, + tmax, ) -> Array: return get_spm_ene_or_maj( - f_hit, f_tcm, chs, lim, trgr, tdefault, tmin, tmax, "majority_dplms" + f_hit, + f_tcm, + hit_group, + tcm_group, + chs, + lim, + trgr, + tdefault, + tmin, + tmax, + "majority_dplms", ) @@ -236,6 +345,9 @@ def get_etc( f_hit, f_dsp, f_tcm, + hit_group, + dsp_group, + tcm_group, chs, lim, trgr, @@ -248,8 +360,8 @@ def get_etc( ) -> Array: # load TCM data to define an event store = LH5Store() - ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") - idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") + ids = store.read(f"/{tcm_group}/array_id", f_tcm)[0].view_as("np") + idx = store.read(f"/{tcm_group}/array_idx", f_tcm)[0].view_as("np") pe_lst = [] time_lst = [] @@ -261,13 +373,17 @@ def get_etc( for ch in chs: idx_ch = idx[ids == int(ch[2:])] - pe = store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as("np") + pe = store.read(f"{ch}/{hit_group}/energy_in_pe", f_hit, idx=idx_ch)[0].view_as( + "np" + ) tmp = np.full((np.max(idx) + 1, len(pe[0])), np.nan) tmp[idx_ch] = pe pe = ak.drop_none(ak.nan_to_none(ak.Array(tmp))) # times are in sample units - times = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as("np") + times = store.read(f"{ch}/{hit_group}/trigger_pos", f_hit, idx=idx_ch)[ + 0 + ].view_as("np") tmp = np.full((np.max(idx) + 1, len(times[0])), np.nan) tmp[idx_ch] = times times = ak.drop_none(ak.nan_to_none(ak.Array(tmp))) @@ -310,11 +426,24 @@ def get_etc( # returns relative time shift of the first LAr pulse relative to the Ge trigger -def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> Array: +def get_time_shift( + f_hit, + f_dsp, + f_tcm, + hit_group, + dsp_group, + tcm_group, + chs, + lim, + trgr, + tdefault, + tmin, + tmax, +) -> Array: store = LH5Store() # load TCM data to define an event - ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("np") - idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("np") + ids = store.read(f"/{tcm_group}/array_id", f_tcm)[0].view_as("np") + idx = store.read(f"/{tcm_group}/array_idx", f_tcm)[0].view_as("np") time_all = ak.Array([[] for x in range(np.max(idx) + 1)]) if isinstance(trgr, (float, int)): @@ -325,13 +454,17 @@ def get_time_shift(f_hit, f_dsp, f_tcm, chs, lim, trgr, tdefault, tmin, tmax) -> for ch in chs: idx_ch = idx[ids == int(ch[2:])] - pe = store.read(f"{ch}/hit/energy_in_pe", f_hit, idx=idx_ch)[0].view_as("np") + pe = store.read(f"{ch}/{hit_group}/energy_in_pe", f_hit, idx=idx_ch)[0].view_as( + "np" + ) tmp = np.full((np.max(idx) + 1, len(pe[0])), np.nan) tmp[idx_ch] = pe pe = ak.drop_none(ak.nan_to_none(ak.Array(tmp))) # times are in sample units - times = store.read(f"{ch}/hit/trigger_pos", f_hit, idx=idx_ch)[0].view_as("np") + times = store.read(f"{ch}/{hit_group}/trigger_pos", f_hit, idx=idx_ch)[ + 0 + ].view_as("np") tmp = np.full((np.max(idx) + 1, len(times[0])), np.nan) tmp[idx_ch] = times times = ak.drop_none(ak.nan_to_none(ak.Array(tmp))) diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index ae7570f9f..0f193074c 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -25,7 +25,9 @@ def test_basics(lgnd_test_data, tmptestdir): f_evt=outfile, evt_config=f"{config_dir}/basic-evt-config.json", wo_mode="o", - group="/evt/", + evt_group="evt", + hit_group="hit", + dsp_group="dsp", tcm_group="hardware_tcm_1", ) assert "statement" in store.read("/evt/multiplicity", outfile)[0].getattrs().keys() @@ -75,7 +77,10 @@ def test_lar_module(lgnd_test_data, tmptestdir): f_evt=outfile, evt_config=f"{config_dir}/module-test-evt-config.json", wo_mode="o", - group="/evt/", + evt_group="evt", + hit_group="hit", + dsp_group="dsp", + tcm_group="hardware_tcm_1", ) assert os.path.exists(outfile) @@ -101,7 +106,10 @@ def test_lar_t0_vov_module(lgnd_test_data, tmptestdir): f_evt=outfile, evt_config=f"{config_dir}/module-test-t0-vov-evt-config.json", wo_mode="o", - group="/evt/", + evt_group="evt", + hit_group="hit", + dsp_group="dsp", + tcm_group="hardware_tcm_1", ) assert os.path.exists(outfile) @@ -131,7 +139,10 @@ def test_vov(lgnd_test_data, tmptestdir): f_evt=outfile, evt_config=f"{config_dir}/vov-test-evt-config.json", wo_mode="o", - group="/evt/", + evt_group="evt", + hit_group="hit", + dsp_group="dsp", + tcm_group="hardware_tcm_1", ) assert os.path.exists(outfile) @@ -221,7 +232,9 @@ def test_query(lgnd_test_data, tmptestdir): f_evt=outfile, evt_config=f"{config_dir}/query-test-evt-config.json", wo_mode="o", - group="/evt/", + evt_group="evt", + hit_group="hit", + dsp_group="dsp", tcm_group="hardware_tcm_1", ) assert len(lh5.ls(outfile, "/evt/")) == 12 diff --git a/tests/skm/test_build_skm.py b/tests/skm/test_build_skm.py index 6957e3333..b23137ec6 100644 --- a/tests/skm/test_build_skm.py +++ b/tests/skm/test_build_skm.py @@ -25,7 +25,9 @@ def test_basics(lgnd_test_data, tmptestdir): f_evt=outfile, evt_config=f"{evt_config_dir}/vov-test-evt-config.json", wo_mode="o", - group="/evt/", + evt_group="evt", + hit_group="hit", + dsp_group="dsp", tcm_group="hardware_tcm_1", ) @@ -86,7 +88,9 @@ def test_attribute_passing(lgnd_test_data, tmptestdir): f_evt=outfile, evt_config=f"{evt_config_dir}/vov-test-evt-config.json", wo_mode="o", - group="/evt/", + evt_group="evt", + hit_group="hit", + dsp_group="dsp", tcm_group="hardware_tcm_1", ) From eaa6cb4347f258e8fc28c727ec29b0eb64be0cff Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 24 Jan 2024 15:19:08 +0100 Subject: [PATCH 159/191] split build_evt into sub modules --- src/pygama/evt/aggregators.py | 653 ++++++++++++ src/pygama/evt/build_evt.py | 1815 ++++++++------------------------- src/pygama/evt/modules/spm.py | 47 +- src/pygama/evt/utils.py | 278 +++++ 4 files changed, 1422 insertions(+), 1371 deletions(-) create mode 100644 src/pygama/evt/aggregators.py create mode 100644 src/pygama/evt/utils.py diff --git a/src/pygama/evt/aggregators.py b/src/pygama/evt/aggregators.py new file mode 100644 index 000000000..f9131ed96 --- /dev/null +++ b/src/pygama/evt/aggregators.py @@ -0,0 +1,653 @@ +""" +This module provides aggregators to build the `evt` tier. +""" + +from __future__ import annotations +import re +import numpy as np +from numpy.typing import NDArray +import awkward as ak +from lgdo.lh5 import LH5Store +from lgdo import Array, ArrayOfEqualSizedArrays, Table, VectorOfVectors, lh5 + +from . import utils + +def evaluate_to_first_or_last( + idx: NDArray, + ids: NDArray, + f_hit: str, + f_dsp: str, + chns: list, + chns_rm: list, + expr: str, + exprl: list, + qry: str | NDArray, + nrows: int, + sorter: tuple, + var_ph: dict = None, + defv: bool | int | float = np.nan, + is_first: bool = True, + tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", +) -> Array: + """Aggregates across channels by returning the expression of the channel + with value of `sorter`. + + Parameters + ---------- + idx + `tcm` index array. + ids + `tcm` id array. + f_hit + path to `hit` tier file. + f_dsp + path to `dsp` tier file. + chns + list of channels to be aggregated. + chns_rm + list of channels to be skipped from evaluation and set to default value. + expr + expression string to be evaluated. + exprl + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. + qry + query expression to mask aggregation. + nrows + length of output array. + sorter + tuple of field in `hit/dsp/evt` tier to evaluate ``(tier, field)``. + var_ph + dictionary of `evt` and additional parameters and their values. + defv + default value. + is_first + defines if sorted by smallest or largest value of `sorter` + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. + """ + + # define dimension of output array + out = np.full(nrows, defv, dtype=type(defv)) + outt = np.zeros(len(out)) + + store = LH5Store() + + for ch in chns: + # get index list for this channel to be loaded + idx_ch = idx[ids == utils.get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] + + # evaluate at channel + res = utils.get_data_at_channel( + ch=ch, + ids=ids, + idx=idx, + expr=expr, + exprl=exprl, + var_ph=var_ph, + is_evaluated=ch not in chns_rm, + f_hit=f_hit, + f_dsp=f_dsp, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, + ) + + # get mask from query + limarr = utils.get_mask_from_query( + qry=qry, + length=len(res), + ch=ch, + idx_ch=idx_ch, + f_hit=f_hit, + f_dsp=f_dsp, + hit_group=hit_group, + dsp_group=dsp_group, + ) + + # find if sorter is in hit or dsp + t0 = store.read( + f"{ch}/{sorter[0]}/{sorter[1]}", + f_hit if f"{hit_group}" == sorter[0] else f_dsp, + idx=idx_ch, + )[0].view_as("np") + + if t0.ndim > 1: + raise ValueError(f"sorter '{sorter[0]}/{sorter[1]}' must be a 1D array") + + if is_first: + if ch == chns[0]: + outt[:] = np.inf + + out[idx_ch] = np.where((t0 < outt) & (limarr), res, out[idx_ch]) + outt[idx_ch] = np.where((t0 < outt) & (limarr), t0, outt[idx_ch]) + + else: + out[idx_ch] = np.where((t0 > outt) & (limarr), res, out[idx_ch]) + outt[idx_ch] = np.where((t0 > outt) & (limarr), t0, outt[idx_ch]) + + return Array(nda=out) + + +def evaluate_to_scalar( + mode: str, + idx: NDArray, + ids: NDArray, + f_hit: str, + f_dsp: str, + chns: list, + chns_rm: list, + expr: str, + exprl: list, + qry: str | NDArray, + nrows: int, + var_ph: dict = None, + defv: bool | int | float = np.nan, + tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", +) -> Array: + """Aggregates by summation across channels. + + Parameters + ---------- + mode + aggregation mode. + idx + tcm index array. + ids + tcm id array. + f_hit + path to `hit` tier file. + f_dsp + path to `dsp` tier file. + chns + list of channels to be aggregated. + chns_rm + list of channels to be skipped from evaluation and set to default value. + expr + expression string to be evaluated. + exprl + list of dsp/hit/evt parameter tuples in expression (tier, field). + qry + query expression to mask aggregation. + nrows + length of output array + var_ph + dictionary of evt and additional parameters and their values. + defv + default value. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. + """ + + # define dimension of output array + out = np.full(nrows, defv, dtype=type(defv)) + + for ch in chns: + # get index list for this channel to be loaded + idx_ch = idx[ids == utils.get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] + + res = utils.get_data_at_channel( + ch=ch, + ids=ids, + idx=idx, + expr=expr, + exprl=exprl, + var_ph=var_ph, + is_evaluated=ch not in chns_rm, + f_hit=f_hit, + f_dsp=f_dsp, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, + ) + + # get mask from query + limarr = utils.get_mask_from_query( + qry=qry, + length=len(res), + ch=ch, + idx_ch=idx_ch, + f_hit=f_hit, + f_dsp=f_dsp, + hit_group=hit_group, + dsp_group=dsp_group, + ) + + # switch through modes + if "sum" == mode: + if res.dtype == bool: + res = res.astype(int) + out[idx_ch] = np.where(limarr, res + out[idx_ch], out[idx_ch]) + if "any" == mode: + if res.dtype != bool: + res = res.astype(bool) + out[idx_ch] = out[idx_ch] | (res & limarr) + if "all" == mode: + if res.dtype != bool: + res = res.astype(bool) + out[idx_ch] = out[idx_ch] & res & limarr + + return Array(nda=out) + + +def evaluate_at_channel( + idx: NDArray, + ids: NDArray, + f_hit: str, + f_dsp: str, + chns_rm: list, + expr: str, + exprl: list, + ch_comp: Array, + var_ph: dict = None, + defv: bool | int | float = np.nan, + tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", +) -> Array: + """Aggregates by evaluating the expression at a given channel. + + Parameters + ---------- + idx + `tcm` index array. + ids + `tcm` id array. + f_hit + path to `hit` tier file. + f_dsp + path to `dsp` tier file. + chns_rm + list of channels to be skipped from evaluation and set to default value. + expr + expression string to be evaluated. + exprl + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. + ch_comp + array of rawids at which the expression is evaluated. + var_ph + dictionary of `evt` and additional parameters and their values. + defv + default value. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. + """ + + out = np.full(len(ch_comp.nda), defv, dtype=type(defv)) + + for ch in np.unique(ch_comp.nda.astype(int)): + # skip default value + if utils.get_table_name_by_pattern(tcm_id_table_pattern, ch) not in lh5.ls(f_hit): + continue + idx_ch = idx[ids == ch] + res = utils.get_data_at_channel( + ch=utils.get_table_name_by_pattern(tcm_id_table_pattern, ch), + ids=ids, + idx=idx, + expr=expr, + exprl=exprl, + var_ph=var_ph, + is_evaluated=utils.get_table_name_by_pattern(tcm_id_table_pattern, ch) + not in chns_rm, + f_hit=f_hit, + f_dsp=f_dsp, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, + ) + + out[idx_ch] = np.where(ch == ch_comp.nda[idx_ch], res, out[idx_ch]) + + return Array(nda=out) + + +def evaluate_at_channel_vov( + idx: NDArray, + ids: NDArray, + f_hit: str, + f_dsp: str, + expr: str, + exprl: list, + ch_comp: VectorOfVectors, + chns_rm: list, + var_ph: dict = None, + defv: bool | int | float = np.nan, + tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", +) -> VectorOfVectors: + """Same as :func:`evaluate_at_channel` but evaluates expression at non + flat channels :class:`.VectorOfVectors`. + + Parameters + ---------- + idx + `tcm` index array. + ids + `tcm` id array. + f_hit + path to `hit` tier file. + f_dsp + path to `dsp` tier file. + expr + expression string to be evaluated. + exprl + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. + ch_comp + array of "rawid"s at which the expression is evaluated. + chns_rm + list of channels to be skipped from evaluation and set to default value. + var_ph + dictionary of `evt` and additional parameters and their values. + defv + default value. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. + """ + + # blow up vov to aoesa + out = ak.Array([[] for _ in range(len(ch_comp))]) + + chns = np.unique(ch_comp.flattened_data.nda).astype(int) + ch_comp = ch_comp.view_as("ak") + + type_name = None + for ch in chns: + idx_ch = idx[ids == ch] + res = utils.get_data_at_channel( + ch=utils.get_table_name_by_pattern(tcm_id_table_pattern, ch), + ids=ids, + idx=idx, + expr=expr, + exprl=exprl, + var_ph=var_ph, + is_evaluated=utils.get_table_name_by_pattern(tcm_id_table_pattern, ch) + not in chns_rm, + f_hit=f_hit, + f_dsp=f_dsp, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, + ) + + # see in which events the current channel is present + mask = ak.to_numpy(ak.any(ch_comp == ch, axis=-1), allow_missing=False) + cv = np.full(len(ch_comp), np.nan) + cv[idx_ch] = res + cv[~mask] = np.nan + cv = ak.drop_none(ak.nan_to_none(ak.Array(cv)[:, None])) + + out = ak.concatenate((out, cv), axis=-1) + + if ch == chns[0]: + type_name = res.dtype + + return VectorOfVectors(ak.values_astype(out, type_name)) + + +def evaluate_to_aoesa( + idx: NDArray, + ids: NDArray, + f_hit: str, + f_dsp: str, + chns: list, + chns_rm: list, + expr: str, + exprl: list, + qry: str | NDArray, + nrows: int, + var_ph: dict = None, + defv: bool | int | float = np.nan, + missv=np.nan, + tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", +) -> ArrayOfEqualSizedArrays: + """Aggregates by returning an :class:`.ArrayOfEqualSizedArrays` of evaluated + expressions of channels that fulfill a query expression. + + Parameters + ---------- + idx + `tcm` index array. + ids + `tcm` id array. + f_hit + path to `hit` tier file. + f_dsp + path to `dsp` tier file. + chns + list of channels to be aggregated. + chns_rm + list of channels to be skipped from evaluation and set to default value. + expr + expression string to be evaluated. + exprl + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. + qry + query expression to mask aggregation. + nrows + length of output :class:`.VectorOfVectors`. + ch_comp + array of "rawid"s at which the expression is evaluated. + var_ph + dictionary of `evt` and additional parameters and their values. + defv + default value. + missv + missing value. + sorter + sorts the entries in the vector according to sorter expression. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. + """ + # define dimension of output array + out = np.full((nrows, len(chns)), missv) + + i = 0 + for ch in chns: + idx_ch = idx[ids == utils.get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] + res = utils.get_data_at_channel( + ch=ch, + ids=ids, + idx=idx, + expr=expr, + exprl=exprl, + var_ph=var_ph, + is_evaluated=ch not in chns_rm, + f_hit=f_hit, + f_dsp=f_dsp, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, + ) + + # get mask from query + limarr = utils.get_mask_from_query( + qry=qry, + length=len(res), + ch=ch, + idx_ch=idx_ch, + f_hit=f_hit, + f_dsp=f_dsp, + hit_group=hit_group, + dsp_group=dsp_group, + ) + + out[idx_ch, i] = np.where(limarr, res, out[idx_ch, i]) + + i += 1 + + return ArrayOfEqualSizedArrays(nda=out) + + +def evaluate_to_vector( + idx: NDArray, + ids: NDArray, + f_hit: str, + f_dsp: str, + chns: list, + chns_rm: list, + expr: str, + exprl: list, + qry: str | NDArray, + nrows: int, + var_ph: dict = None, + defv: bool | int | float = np.nan, + sorter: str = None, + tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", +) -> VectorOfVectors: + """Aggregates by returning a :class:`.VectorOfVector` of evaluated + expressions of channels that fulfill a query expression. + + Parameters + ---------- + idx + `tcm` index array. + ids + `tcm` id array. + f_hit + path to `hit` tier file. + f_dsp + path to `dsp` tier file. + chns + list of channels to be aggregated. + chns_rm + list of channels to be skipped from evaluation and set to default value. + expr + expression string to be evaluated. + exprl + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. + qry + query expression to mask aggregation. + nrows + length of output :class:`.VectorOfVectors`. + ch_comp + array of "rawids" at which the expression is evaluated. + var_ph + dictionary of `evt` and additional parameters and their values. + defv + default value. + sorter + sorts the entries in the vector according to sorter expression. + ``ascend_by:`` results in an vector ordered ascending, + ``decend_by:`` sorts descending. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. + """ + out = evaluate_to_aoesa( + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + chns=chns, + chns_rm=chns_rm, + expr=expr, + exprl=exprl, + qry=qry, + nrows=nrows, + var_ph=var_ph, + defv=defv, + missv=np.nan, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, + ).view_as("np") + + # if a sorter is given sort accordingly + if sorter is not None: + md, fld = sorter.split(":") + s_val = evaluate_to_aoesa( + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + chns=chns, + chns_rm=chns_rm, + expr=fld, + exprl=[tuple(fld.split("."))], + qry=None, + nrows=nrows, + missv=np.nan, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, + ).view_as("np") + if "ascend_by" == md: + out = out[np.arange(len(out))[:, None], np.argsort(s_val)] + + elif "descend_by" == md: + out = out[np.arange(len(out))[:, None], np.argsort(-s_val)] + else: + raise ValueError( + "sorter values can only have 'ascend_by' or 'descend_by' prefixes" + ) + + return VectorOfVectors( + ak.values_astype(ak.drop_none(ak.nan_to_none(ak.Array(out))), type(defv)) + ) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 2cb54b2fe..e0c0dafb3 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -14,124 +14,102 @@ import numpy as np from lgdo import Array, ArrayOfEqualSizedArrays, Table, VectorOfVectors, lh5 from lgdo.lh5 import LH5Store -from numpy.typing import NDArray -log = logging.getLogger(__name__) - - -def get_tcm_id_by_pattern(tcm_id_table_pattern: str, ch: str) -> int: - pre = tcm_id_table_pattern.split("{")[0] - post = tcm_id_table_pattern.split("}")[1] - return int(ch.strip(pre).strip(post)) - - -def get_table_name_by_pattern(tcm_id_table_pattern: str, ch_id: int) -> str: - # check tcm_id_table_pattern validity - pattern_check = re.findall(r"{([^}]*?)}", tcm_id_table_pattern)[0] - if pattern_check == "" or ":" == pattern_check[0]: - return tcm_id_table_pattern.format(ch_id) - else: - raise NotImplementedError( - "Only empty placeholders with format specifications are currently implemented" - ) +from . import aggregators, utils - -def num_and_pars(value: str, par_dic: dict): - # function tries to convert a string to a int, float, bool - # or returns the value if value is a key in par_dic - if value in par_dic.keys(): - return par_dic[value] - try: - value = int(value) - except ValueError: - try: - value = float(value) - except ValueError: - try: - value = bool(value) - except ValueError: - pass - return value +log = logging.getLogger(__name__) -def evaluate_expression( +def build_evt( f_tcm: str, - f_hit: str, f_dsp: str, - chns: list, - chns_rm: list, - mode: str, - expr: str, - nrows: int, - table: Table = None, - para: dict = None, - qry: str = None, - defv: bool | int | float = np.nan, - sorter: str = None, - tcm_id_table_pattern: str = "ch{}", + f_hit: str, + f_evt: str, + evt_config: str | dict, + wo_mode: str = "write_safe", evt_group: str = "evt", - hit_group: str = "hit", + tcm_group: str = "hardware_tcm_1", dsp_group: str = "dsp", - tcm_group: str = "tcm", -) -> Array | ArrayOfEqualSizedArrays | VectorOfVectors: - """Evaluates the expression defined by the user across all channels - according to the mode. + hit_group: str = "hit", + tcm_id_table_pattern: str = "ch{}", +) -> None: + """Transform data from the `hit` and `dsp` levels which a channel sorted to a + event sorted data format. Parameters ---------- f_tcm - path to `tcm` tier file. - f_hit - path to `hit` tier file. + input LH5 file of the tcm level. f_dsp - path to `dsp` tier file. - chns - list of channel names across which expression gets evaluated (form: - ``ch``). - chns_rm - list of channels which get set to default value during evaluation. In - function mode they are removed entirely (form: ``ch``) - mode - The mode determines how the event entry is calculated across channels. - Options are: + input LH5 file of the dsp level. + f_hit + input LH5 file of the hit level. + f_evt + name of the output file. + evt_config + name of configuration file or dictionary defining event fields. Channel + lists can be defined by importing a metadata module. - - ``first_at:sorter``: aggregates across channels by returning the - expression of the channel with smallest value of sorter. - - ``last_at``: aggregates across channels by returning the expression of - the channel with largest value of sorter. - - ``sum``: aggregates by summation. - - ``any``: aggregates by logical or. - - ``all``: aggregates by logical and. - - ``keep_at_ch:ch_field``: aggregates according to passed ch_field. - - ``keep_at_idx:tcm_idx_field``: aggregates according to passed tcm - index field. - - ``gather``: Channels are not combined, but result saved as - :class:`.VectorOfVectors`. + - ``operations`` defines the fields ``name=key``, where ``channels`` + specifies the channels used to for this field (either a string or a + list of strings), + - ``aggregation_mode`` defines how the channels should be combined (see + :func:`evaluate_expression`). + - ``expression`` defnies the mathematical/special function to apply + (see :func:`evaluate_expression`), + - ``query`` defines an expression to mask the aggregation. + - ``parameters`` defines any other parameter used in expression. - qry - a query that can mask the aggregation. - expr - the expression. That can be any mathematical equation/comparison. If - `mode` is ``function``, the expression needs to be a special processing - function defined in modules (e.g. :func:`.modules.spm.get_energy`). In - the expression parameters from either hit, dsp, evt tier (from - operations performed before this one! Dictionary operations order - matters), or from the ``parameters`` field can be used. - nrows - number of rows to be processed. - table - table of 'evt' tier data. - para - dictionary of parameters defined in the ``parameters`` field in the - configuration dictionary. - defv - default value of evaluation. - sorter - can be used to sort vector outputs according to sorter expression (see - :func:`evaluate_to_vector`). - tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. + For example: + + .. code-block:: json + + { + "channels": { + "geds_on": ["ch1084803", "ch1084804", "ch1121600"], + "spms_on": ["ch1057600", "ch1059201", "ch1062405"], + "muon": "ch1027202", + }, + "operations": { + "energy_id":{ + "channels": "geds_on", + "aggregation_mode": "gather", + "query": "hit.cuspEmax_ctc_cal > 25", + "expression": "tcm.array_id", + "sort": "ascend_by:dsp.tp_0_est" + }, + "energy":{ + "aggregation_mode": "keep_at_ch:evt.energy_id", + "expression": "hit.cuspEmax_ctc_cal > 25" + } + "is_muon_rejected":{ + "channels": "muon", + "aggregation_mode": "any", + "expression": "dsp.wf_max>a", + "parameters": {"a":15100}, + "initial": false + }, + "multiplicity":{ + "channels": ["geds_on", "geds_no_psd", "geds_ac"], + "aggregation_mode": "sum", + "expression": "hit.cuspEmax_ctc_cal > a", + "parameters": {"a":25}, + "initial": 0 + }, + "t0":{ + "aggregation_mode": "keep_at_ch:evt.energy_id", + "expression": "dsp.tp_0_est" + }, + "lar_energy":{ + "channels": "spms_on", + "aggregation_mode": "function", + "expression": ".modules.spm.get_energy(0.5, evt.t0, 48000, 1000, 5000)" + }, + } + } + + wo_mode + writing mode. evt group LH5 root group name of evt tier. tcm_group @@ -140,1344 +118,457 @@ def evaluate_expression( LH5 root group in dsp file. hit_group LH5 root group in hit file. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. """ store = LH5Store() + tbl_cfg = evt_config + if not isinstance(tbl_cfg, (str, dict)): + raise TypeError() + if isinstance(tbl_cfg, str): + with open(tbl_cfg) as f: + tbl_cfg = json.load(f) - # find parameters in evt file or in parameters - exprl = re.findall( - rf"({evt_group}|{hit_group}|{dsp_group}).([a-zA-Z_$][\w$]*)", expr - ) - var_ph = {} - if table: - var_ph = var_ph | { - e: table[e].view_as("ak") - for e in table.keys() - if isinstance(table[e], (Array, ArrayOfEqualSizedArrays, VectorOfVectors)) - } - if para: - var_ph = var_ph | para + if "channels" not in tbl_cfg.keys(): + raise ValueError("channel field needs to be specified in the config") + if "operations" not in tbl_cfg.keys(): + raise ValueError("operations field needs to be specified in the config") - if mode == "function": - # evaluate expression - func, params = expr.split("(") - params = ( - params.replace(f"{dsp_group}.", f"{dsp_group}_") - .replace(f"{hit_group}.", f"{hit_group}_") - .replace(f"{evt_group}.", "") + # check tcm_id_table_pattern validity + pattern_check = re.findall(r"{([^}]*?)}", tcm_id_table_pattern) + if len(pattern_check) != 1: + raise ValueError( + f"tcm_id_table_pattern must have exactly one placeholder. {tcm_id_table_pattern} is invalid." + ) + elif "{" in pattern_check[0] or "}" in pattern_check[0]: + raise ValueError( + f"tcm_id_table_pattern {tcm_id_table_pattern} has an invalid placeholder." ) - params = [ - f_hit, - f_dsp, - f_tcm, - hit_group, - dsp_group, - tcm_group, - [x for x in chns if x not in chns_rm], - ] + [num_and_pars(e, var_ph) for e in params[:-1].split(",")] - # load function dynamically - p, m = func.rsplit(".", 1) - met = getattr(import_module(p, package=__package__), m) - return met(*params) + if ( + utils.get_table_name_by_pattern( + tcm_id_table_pattern, + utils.get_tcm_id_by_pattern(tcm_id_table_pattern, lh5.ls(f_hit)[0]), + ) + != lh5.ls(f_hit)[0] + ): + raise ValueError( + f"tcm_id_table_pattern {tcm_id_table_pattern} does not match keys in data!" + ) - else: - # check if query is either on channel basis or evt basis (and not a mix) - qry_mask = qry - if qry is not None: - if f"{evt_group}." in qry and ( - f"{hit_group}." in qry or f"{dsp_group}." in qry - ): + # create channel list according to config + # This can be either read from the meta data + # or a list of channel names + log.debug("Creating channel dictionary") + + chns = {} + + for k, v in tbl_cfg["channels"].items(): + if isinstance(v, dict): + # it is a meta module. module_name must exist + if "module" not in v.keys(): raise ValueError( - f"Query can't be a mix of {evt_group} tier and lower tiers." + "Need module_name to load channel via a meta data module" ) - # if it is an evt query we can evaluate it directly here - if table and f"{evt_group}." in qry: - qry_mask = eval(qry.replace(f"{evt_group}.", ""), table) + attr = {} + # the time_key argument is set to the time key of the DSP file + # in case it is not provided by the config + if "time_key" not in v.keys(): + attr["time_key"] = re.search(r"\d{8}T\d{6}Z", f_dsp).group(0) - # load TCM data to define an event - ids = store.read(f"/{tcm_group}/array_id", f_tcm)[0].view_as("np") - idx = store.read(f"/{tcm_group}/array_idx", f_tcm)[0].view_as("np") + # if "None" do None + elif "None" == v["time_key"]: + attr["time_key"] = None - # switch through modes - if table and (("keep_at_ch:" == mode[:11]) or ("keep_at_idx:" == mode[:12])): - if "keep_at_ch:" == mode[:11]: - ch_comp = table[mode[11:].replace(f"{evt_group}.", "")] - else: - ch_comp = table[mode[12:].replace(f"{evt_group}.", "")] - if isinstance(ch_comp, Array): - ch_comp = Array(nda=ids[ch_comp.view_as("np")]) - elif isinstance(ch_comp, VectorOfVectors): - ch_comp = ch_comp.view_as("ak") - ch_comp = VectorOfVectors( - array=ak.unflatten( - ids[ak.flatten(ch_comp)], ak.count(ch_comp, axis=-1) + # load module + p, m = v["module"].rsplit(".", 1) + met = getattr(import_module(p, package=__package__), m) + chns[k] = met(v | attr) + + elif isinstance(v, str): + chns[k] = [v] + + elif isinstance(v, list): + chns[k] = [e for e in v] + + nrows = store.read_n_rows(f"/{tcm_group}/cumulative_length", f_tcm) + + table = Table(size=nrows) + + for k, v in tbl_cfg["operations"].items(): + log.debug("Processing field " + k) + + # if mode not defined in operation, it can only be an operation on the evt level. + if "aggregation_mode" not in v.keys(): + var = {} + if "parameters" in v.keys(): + var = var | v["parameters"] + res = table.eval(v["expression"].replace(f"{evt_group}.", ""), var) + + # add attribute if present + if "lgdo_attrs" in v.keys(): + res.attrs |= v["lgdo_attrs"] + + table.add_field(k, res) + + # Else we build the event entry + else: + if "channels" not in v.keys(): + chns_e = [] + elif isinstance(v["channels"], str): + chns_e = chns[v["channels"]] + elif isinstance(v["channels"], list): + chns_e = list( + itertools.chain.from_iterable([chns[e] for e in v["channels"]]) + ) + chns_rm = [] + if "exclude_channels" in v.keys(): + if isinstance(v["exclude_channels"], str): + chns_rm = chns[v["exclude_channels"]] + elif isinstance(v["exclude_channels"], list): + chns_rm = list( + itertools.chain.from_iterable( + [chns[e] for e in v["exclude_channels"]] ) ) - else: - raise NotImplementedError( - type(ch_comp) - + " not supported (only Array and VectorOfVectors are supported)" - ) - if isinstance(ch_comp, Array): - return evaluate_at_channel( - idx=idx, - ids=ids, - f_hit=f_hit, - f_dsp=f_dsp, - chns_rm=chns_rm, - expr=expr, - exprl=exprl, - ch_comp=ch_comp, - var_ph=var_ph, - defv=defv, - tcm_id_table_pattern=tcm_id_table_pattern, - evt_group=evt_group, - hit_group=hit_group, - dsp_group=dsp_group, - ) - elif isinstance(ch_comp, VectorOfVectors): - return evaluate_at_channel_vov( - idx=idx, - ids=ids, - f_hit=f_hit, - f_dsp=f_dsp, - expr=expr, - exprl=exprl, - ch_comp=ch_comp, - chns_rm=chns_rm, - var_ph=var_ph, - defv=defv, - tcm_id_table_pattern=tcm_id_table_pattern, - evt_group=evt_group, - hit_group=hit_group, - dsp_group=dsp_group, - ) - else: - raise NotImplementedError( - type(ch_comp) - + " not supported (only Array and VectorOfVectors are supported)" - ) - elif "first_at:" in mode or "last_at:" in mode: - sorter = tuple( - re.findall( - rf"({evt_group}|{hit_group}|{dsp_group}).([a-zA-Z_$][\w$]*)", - mode.split("first_at:")[-1], - )[0] - ) - return evaluate_to_first_or_last( - idx=idx, - ids=ids, - f_hit=f_hit, - f_dsp=f_dsp, - chns=chns, - chns_rm=chns_rm, - expr=expr, - exprl=exprl, - qry=qry_mask, - nrows=nrows, - sorter=sorter, - var_ph=var_ph, - defv=defv, - is_first=True if "first_at:" in mode else False, - tcm_id_table_pattern=tcm_id_table_pattern, - evt_group=evt_group, - hit_group=hit_group, - dsp_group=dsp_group, - ) - elif mode in ["sum", "any", "all"]: - return evaluate_to_scalar( - mode=mode, - idx=idx, - ids=ids, - f_hit=f_hit, - f_dsp=f_dsp, - chns=chns, - chns_rm=chns_rm, - expr=expr, - exprl=exprl, - qry=qry_mask, - nrows=nrows, - var_ph=var_ph, - defv=defv, - tcm_id_table_pattern=tcm_id_table_pattern, - evt_group=evt_group, - hit_group=hit_group, - dsp_group=dsp_group, - ) - elif "gather" == mode: - return evaluate_to_vector( - idx=idx, - ids=ids, + pars, qry, defaultv, srter = None, None, np.nan, None + if "parameters" in v.keys(): + pars = v["parameters"] + if "query" in v.keys(): + qry = v["query"] + if "initial" in v.keys(): + defaultv = v["initial"] + if isinstance(defaultv, str) and ( + defaultv in ["np.nan", "np.inf", "-np.inf"] + ): + defaultv = eval(defaultv) + if "sort" in v.keys(): + srter = v["sort"] + + obj = evaluate_expression( + f_tcm=f_tcm, f_hit=f_hit, f_dsp=f_dsp, - chns=chns, + chns=chns_e, chns_rm=chns_rm, - expr=expr, - exprl=exprl, - qry=qry_mask, + mode=v["aggregation_mode"], + expr=v["expression"], nrows=nrows, - var_ph=var_ph, - defv=defv, - sorter=sorter, + table=table, + para=pars, + qry=qry, + defv=defaultv, + sorter=srter, tcm_id_table_pattern=tcm_id_table_pattern, evt_group=evt_group, hit_group=hit_group, dsp_group=dsp_group, + tcm_group=tcm_group, ) - else: - raise ValueError(mode + " not a valid mode") - - -def find_parameters( - f_hit: str, - f_dsp: str, - ch: str, - idx_ch: NDArray, - exprl: list, - hit_group: str = "hit", - dsp_group: str = "dsp", -) -> dict: - """Wraps :func:`load_vars_to_nda` to return parameters from `hit` and `dsp` - tiers. - Parameters - ---------- - f_hit - path to `hit` tier file. - f_dsp - path to `dsp` tier file. - ch - "rawid" in the tiers. - idx_ch - index array of entries to be read from files. - exprl - list of tuples ``(tier, field)`` to be found in the `hit/dsp` tiers. - dsp_group - LH5 root group in dsp file. - hit_group - LH5 root group in hit file. - """ + # add attribute if present + if "lgdo_attrs" in v.keys(): + obj.attrs |= v["lgdo_attrs"] - # find fields in either dsp, hit - dsp_flds = [e[1] for e in exprl if e[0] == dsp_group] - hit_flds = [e[1] for e in exprl if e[0] == hit_group] + table.add_field(k, obj) - store = LH5Store() - hit_dict, dsp_dict = {}, {} - if len(hit_flds) > 0: - hit_ak = store.read( - f"{ch.replace('/','')}/{hit_group}/", f_hit, field_mask=hit_flds, idx=idx_ch - )[0].view_as("ak") - hit_dict = dict( - zip([f"{hit_group}_" + e for e in ak.fields(hit_ak)], ak.unzip(hit_ak)) - ) - if len(dsp_flds) > 0: - dsp_ak = store.read( - f"{ch.replace('/','')}/{dsp_group}/", f_dsp, field_mask=dsp_flds, idx=idx_ch - )[0].view_as("ak") - dsp_dict = dict( - zip([f"{dsp_group}_" + e for e in ak.fields(dsp_ak)], ak.unzip(dsp_ak)) - ) + # write output fields into f_evt + if "outputs" in tbl_cfg.keys(): + if len(tbl_cfg["outputs"]) < 1: + log.warning("No output fields specified, no file will be written.") + else: + clms_to_remove = [e for e in table.keys() if e not in tbl_cfg["outputs"]] + for fld in clms_to_remove: + table.remove_field(fld, True) + store.write( + obj=table, name=f"/{evt_group}/", lh5_file=f_evt, wo_mode=wo_mode + ) + else: + log.warning("No output fields specified, no file will be written.") - return hit_dict | dsp_dict + key = re.search(r"\d{8}T\d{6}Z", f_hit).group(0) + log.info( + f"Applied {len(tbl_cfg['operations'])} operations to key {key} and saved {len(tbl_cfg['outputs'])} evt fields across {len(chns)} channel groups" + ) -def get_data_at_channel( - ch: str, - ids: NDArray, - idx: NDArray, - expr: str, - exprl: list, - var_ph: dict, - is_evaluated: bool, +def evaluate_expression( + f_tcm: str, f_hit: str, f_dsp: str, - defv, + chns: list, + chns_rm: list, + mode: str, + expr: str, + nrows: int, + table: Table = None, + para: dict = None, + qry: str = None, + defv: bool | int | float = np.nan, + sorter: str = None, tcm_id_table_pattern: str = "ch{}", evt_group: str = "evt", hit_group: str = "hit", dsp_group: str = "dsp", -) -> np.ndarray: - """Evaluates an expression and returns the result. + tcm_group: str = "tcm", +) -> Array | ArrayOfEqualSizedArrays | VectorOfVectors: + """Evaluates the expression defined by the user across all channels + according to the mode. Parameters ---------- - ch - "rawid" of channel to be evaluated. - idx - `tcm` index array. - ids - `tcm` id array. - expr - expression to be evaluated. - exprl - list of parameter-tuples ``(root_group, field)`` found in the expression. - var_ph - dict of additional parameters that are not channel dependent. - is_evaluated - if false, the expression does not get evaluated but an array of default - values is returned. + f_tcm + path to `tcm` tier file. f_hit path to `hit` tier file. f_dsp path to `dsp` tier file. - defv - default value. - tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. - dsp_group - LH5 root group in dsp file. - hit_group - LH5 root group in hit file. - evt_group - LH5 root group in evt file. - """ - - # get index list for this channel to be loaded - idx_ch = idx[ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] - outsize = len(idx_ch) - - if not is_evaluated: - res = np.full(outsize, defv, dtype=type(defv)) - elif "tcm.array_id" == expr: - res = np.full( - outsize, get_tcm_id_by_pattern(tcm_id_table_pattern, ch), dtype=int - ) - elif "tcm.index" == expr: - res = np.where(ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch))[0] - else: - var = find_parameters( - f_hit=f_hit, - f_dsp=f_dsp, - ch=ch, - idx_ch=idx_ch, - exprl=exprl, - hit_group=hit_group, - dsp_group=dsp_group, - ) - - if var_ph is not None: - var = var | var_ph - - # evaluate expression - # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) - res = eval( - expr.replace(f"{dsp_group}.", f"{dsp_group}_") - .replace(f"{hit_group}.", f"{hit_group}_") - .replace(f"{evt_group}.", ""), - var, - ) - - # in case the expression evaluates to a single value blow it up - if (not hasattr(res, "__len__")) or (isinstance(res, str)): - return np.full(outsize, res) - - # the resulting arrays need to be 1D from the operation, - # this can only change once we support larger than two dimensional LGDOs - # ak.to_numpy() raises error if array not regular - res = ak.to_numpy(res, allow_missing=False) - - # in this method only 1D values are allowed - if res.ndim > 1: - raise ValueError( - f"expression '{expr}' must return 1D array. If you are using VectorOfVectors or ArrayOfEqualSizedArrays, use awkward reduction functions to reduce the dimension" - ) - - return res - + chns + list of channel names across which expression gets evaluated (form: + ``ch``). + chns_rm + list of channels which get set to default value during evaluation. In + function mode they are removed entirely (form: ``ch``) + mode + The mode determines how the event entry is calculated across channels. + Options are: -def get_mask_from_query( - qry: str | NDArray, - length: int, - ch: str, - idx_ch: NDArray, - f_hit: str, - f_dsp: str, - hit_group: str = "hit", - dsp_group: str = "dsp", -) -> np.ndarray: - """Evaluates a query expression and returns a mask accordingly. + - ``first_at:sorter``: aggregates across channels by returning the + expression of the channel with smallest value of sorter. + - ``last_at``: aggregates across channels by returning the expression of + the channel with largest value of sorter. + - ``sum``: aggregates by summation. + - ``any``: aggregates by logical or. + - ``all``: aggregates by logical and. + - ``keep_at_ch:ch_field``: aggregates according to passed ch_field. + - ``keep_at_idx:tcm_idx_field``: aggregates according to passed tcm + index field. + - ``gather``: Channels are not combined, but result saved as + :class:`.VectorOfVectors`. - Parameters - ---------- qry - query expression. - length - length of the return mask. - ch - "rawid" of channel to be evaluated. - idx_ch - channel indices to be read. - f_hit - path to `hit` tier file. - f_dsp - path to `dsp` tier file. - hit_group - LH5 root group in hit file. - dsp_group - LH5 root group in dsp file. - """ - - # get sub evt based query condition if needed - if isinstance(qry, str): - qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) - qry_var = find_parameters( - f_hit=f_hit, - f_dsp=f_dsp, - ch=ch, - idx_ch=idx_ch, - exprl=qry_lst, - hit_group=hit_group, - dsp_group=dsp_group, - ) - limarr = eval( - qry.replace(f"{dsp_group}.", f"{dsp_group}_").replace( - f"{hit_group}.", f"{hit_group}_" - ), - qry_var, - ) - - # in case the expression evaluates to a single value blow it up - if (not hasattr(limarr, "__len__")) or (isinstance(limarr, str)): - return np.full(len(idx_ch), limarr) - - limarr = ak.to_numpy(limarr, allow_missing=False) - if limarr.ndim > 1: - raise ValueError( - f"query '{qry}' must return 1D array. If you are using VectorOfVectors or ArrayOfEqualSizedArrays, use awkward reduction functions to reduce the dimension" - ) - - # or forward the array - elif isinstance(qry, np.ndarray): - limarr = qry - - # if no condition, it must be true - else: - limarr = np.ones(length).astype(bool) - - # explicit cast to bool - if limarr.dtype != bool: - limarr = limarr.astype(bool) - - return limarr - - -def evaluate_to_first_or_last( - idx: NDArray, - ids: NDArray, - f_hit: str, - f_dsp: str, - chns: list, - chns_rm: list, - expr: str, - exprl: list, - qry: str | NDArray, - nrows: int, - sorter: tuple, - var_ph: dict = None, - defv: bool | int | float = np.nan, - is_first: bool = True, - tcm_id_table_pattern: str = "ch{}", - evt_group: str = "evt", - hit_group: str = "hit", - dsp_group: str = "dsp", -) -> Array: - """Aggregates across channels by returning the expression of the channel - with value of `sorter`. - - Parameters - ---------- - idx - `tcm` index array. - ids - `tcm` id array. - f_hit - path to `hit` tier file. - f_dsp - path to `dsp` tier file. - chns - list of channels to be aggregated. - chns_rm - list of channels to be skipped from evaluation and set to default value. + a query that can mask the aggregation. expr - expression string to be evaluated. - exprl - list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. - qry - query expression to mask aggregation. + the expression. That can be any mathematical equation/comparison. If + `mode` is ``function``, the expression needs to be a special processing + function defined in modules (e.g. :func:`.modules.spm.get_energy`). In + the expression parameters from either hit, dsp, evt tier (from + operations performed before this one! Dictionary operations order + matters), or from the ``parameters`` field can be used. nrows - length of output array. - sorter - tuple of field in `hit/dsp/evt` tier to evaluate ``(tier, field)``. - var_ph - dictionary of `evt` and additional parameters and their values. + number of rows to be processed. + table + table of 'evt' tier data. + para + dictionary of parameters defined in the ``parameters`` field in the + configuration dictionary. defv - default value. - is_first - defines if sorted by smallest or largest value of `sorter` + default value of evaluation. + sorter + can be used to sort vector outputs according to sorter expression (see + :func:`evaluate_to_vector`). tcm_id_table_pattern Pattern to format tcm id values to table name in higher tiers. Must have one placeholder which is the tcm id. + evt group + LH5 root group name of evt tier. + tcm_group + LH5 root group in tcm file. dsp_group LH5 root group in dsp file. hit_group LH5 root group in hit file. - evt_group - LH5 root group in evt file. """ - # define dimension of output array - out = np.full(nrows, defv, dtype=type(defv)) - outt = np.zeros(len(out)) - store = LH5Store() - for ch in chns: - # get index list for this channel to be loaded - idx_ch = idx[ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] - - # evaluate at channel - res = get_data_at_channel( - ch=ch, - ids=ids, - idx=idx, - expr=expr, - exprl=exprl, - var_ph=var_ph, - is_evaluated=ch not in chns_rm, - f_hit=f_hit, - f_dsp=f_dsp, - defv=defv, - tcm_id_table_pattern=tcm_id_table_pattern, - evt_group=evt_group, - hit_group=hit_group, - dsp_group=dsp_group, - ) + # find parameters in evt file or in parameters + exprl = re.findall( + rf"({evt_group}|{hit_group}|{dsp_group}).([a-zA-Z_$][\w$]*)", expr + ) + var_ph = {} + if table: + var_ph = var_ph | { + e: table[e].view_as("ak") + for e in table.keys() + if isinstance(table[e], (Array, ArrayOfEqualSizedArrays, VectorOfVectors)) + } + if para: + var_ph = var_ph | para - # get mask from query - limarr = get_mask_from_query( - qry=qry, - length=len(res), - ch=ch, - idx_ch=idx_ch, - f_hit=f_hit, - f_dsp=f_dsp, - hit_group=hit_group, - dsp_group=dsp_group, + if mode == "function": + # evaluate expression + func, params = expr.split("(") + params = ( + params.replace(f"{dsp_group}.", f"{dsp_group}_") + .replace(f"{hit_group}.", f"{hit_group}_") + .replace(f"{evt_group}.", "") ) + params = [ + f_hit, + f_dsp, + f_tcm, + hit_group, + dsp_group, + tcm_group, + tcm_id_table_pattern, + [x for x in chns if x not in chns_rm], + ] + [utils.num_and_pars(e, var_ph) for e in params[:-1].split(",")] - # find if sorter is in hit or dsp - t0 = store.read( - f"{ch}/{sorter[0]}/{sorter[1]}", - f_hit if f"{hit_group}" == sorter[0] else f_dsp, - idx=idx_ch, - )[0].view_as("np") - - if t0.ndim > 1: - raise ValueError(f"sorter '{sorter[0]}/{sorter[1]}' must be a 1D array") - - if is_first: - if ch == chns[0]: - outt[:] = np.inf - - out[idx_ch] = np.where((t0 < outt) & (limarr), res, out[idx_ch]) - outt[idx_ch] = np.where((t0 < outt) & (limarr), t0, outt[idx_ch]) - - else: - out[idx_ch] = np.where((t0 > outt) & (limarr), res, out[idx_ch]) - outt[idx_ch] = np.where((t0 > outt) & (limarr), t0, outt[idx_ch]) - - return Array(nda=out) - - -def evaluate_to_scalar( - mode: str, - idx: NDArray, - ids: NDArray, - f_hit: str, - f_dsp: str, - chns: list, - chns_rm: list, - expr: str, - exprl: list, - qry: str | NDArray, - nrows: int, - var_ph: dict = None, - defv: bool | int | float = np.nan, - tcm_id_table_pattern: str = "ch{}", - evt_group: str = "evt", - hit_group: str = "hit", - dsp_group: str = "dsp", -) -> Array: - """Aggregates by summation across channels. - - Parameters - ---------- - mode - aggregation mode. - idx - tcm index array. - ids - tcm id array. - f_hit - path to `hit` tier file. - f_dsp - path to `dsp` tier file. - chns - list of channels to be aggregated. - chns_rm - list of channels to be skipped from evaluation and set to default value. - expr - expression string to be evaluated. - exprl - list of dsp/hit/evt parameter tuples in expression (tier, field). - qry - query expression to mask aggregation. - nrows - length of output array - var_ph - dictionary of evt and additional parameters and their values. - defv - default value. - tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. - dsp_group - LH5 root group in dsp file. - hit_group - LH5 root group in hit file. - evt_group - LH5 root group in evt file. - """ - - # define dimension of output array - out = np.full(nrows, defv, dtype=type(defv)) + # load function dynamically + p, m = func.rsplit(".", 1) + met = getattr(import_module(p, package=__package__), m) + return met(*params) - for ch in chns: - # get index list for this channel to be loaded - idx_ch = idx[ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] + else: + # check if query is either on channel basis or evt basis (and not a mix) + qry_mask = qry + if qry is not None: + if f"{evt_group}." in qry and ( + f"{hit_group}." in qry or f"{dsp_group}." in qry + ): + raise ValueError( + f"Query can't be a mix of {evt_group} tier and lower tiers." + ) - res = get_data_at_channel( - ch=ch, - ids=ids, - idx=idx, - expr=expr, - exprl=exprl, - var_ph=var_ph, - is_evaluated=ch not in chns_rm, - f_hit=f_hit, - f_dsp=f_dsp, - defv=defv, - tcm_id_table_pattern=tcm_id_table_pattern, - evt_group=evt_group, - hit_group=hit_group, - dsp_group=dsp_group, - ) + # if it is an evt query we can evaluate it directly here + if table and f"{evt_group}." in qry: + qry_mask = eval(qry.replace(f"{evt_group}.", ""), table) - # get mask from query - limarr = get_mask_from_query( - qry=qry, - length=len(res), - ch=ch, - idx_ch=idx_ch, - f_hit=f_hit, - f_dsp=f_dsp, - hit_group=hit_group, - dsp_group=dsp_group, - ) + # load TCM data to define an event + ids = store.read(f"/{tcm_group}/array_id", f_tcm)[0].view_as("np") + idx = store.read(f"/{tcm_group}/array_idx", f_tcm)[0].view_as("np") # switch through modes - if "sum" == mode: - if res.dtype == bool: - res = res.astype(int) - out[idx_ch] = np.where(limarr, res + out[idx_ch], out[idx_ch]) - if "any" == mode: - if res.dtype != bool: - res = res.astype(bool) - out[idx_ch] = out[idx_ch] | (res & limarr) - if "all" == mode: - if res.dtype != bool: - res = res.astype(bool) - out[idx_ch] = out[idx_ch] & res & limarr - - return Array(nda=out) - - -def evaluate_at_channel( - idx: NDArray, - ids: NDArray, - f_hit: str, - f_dsp: str, - chns_rm: list, - expr: str, - exprl: list, - ch_comp: Array, - var_ph: dict = None, - defv: bool | int | float = np.nan, - tcm_id_table_pattern: str = "ch{}", - evt_group: str = "evt", - hit_group: str = "hit", - dsp_group: str = "dsp", -) -> Array: - """Aggregates by evaluating the expression at a given channel. - - Parameters - ---------- - idx - `tcm` index array. - ids - `tcm` id array. - f_hit - path to `hit` tier file. - f_dsp - path to `dsp` tier file. - chns_rm - list of channels to be skipped from evaluation and set to default value. - expr - expression string to be evaluated. - exprl - list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. - ch_comp - array of rawids at which the expression is evaluated. - var_ph - dictionary of `evt` and additional parameters and their values. - defv - default value. - tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. - dsp_group - LH5 root group in dsp file. - hit_group - LH5 root group in hit file. - evt_group - LH5 root group in evt file. - """ - - out = np.full(len(ch_comp.nda), defv, dtype=type(defv)) - - for ch in np.unique(ch_comp.nda.astype(int)): - # skip default value - if get_table_name_by_pattern(tcm_id_table_pattern, ch) not in lh5.ls(f_hit): - continue - idx_ch = idx[ids == ch] - res = get_data_at_channel( - ch=get_table_name_by_pattern(tcm_id_table_pattern, ch), - ids=ids, - idx=idx, - expr=expr, - exprl=exprl, - var_ph=var_ph, - is_evaluated=get_table_name_by_pattern(tcm_id_table_pattern, ch) - not in chns_rm, - f_hit=f_hit, - f_dsp=f_dsp, - defv=defv, - tcm_id_table_pattern=tcm_id_table_pattern, - evt_group=evt_group, - hit_group=hit_group, - dsp_group=dsp_group, - ) - - out[idx_ch] = np.where(ch == ch_comp.nda[idx_ch], res, out[idx_ch]) - - return Array(nda=out) - + if table and (("keep_at_ch:" == mode[:11]) or ("keep_at_idx:" == mode[:12])): + if "keep_at_ch:" == mode[:11]: + ch_comp = table[mode[11:].replace(f"{evt_group}.", "")] + else: + ch_comp = table[mode[12:].replace(f"{evt_group}.", "")] + if isinstance(ch_comp, Array): + ch_comp = Array(nda=ids[ch_comp.view_as("np")]) + elif isinstance(ch_comp, VectorOfVectors): + ch_comp = ch_comp.view_as("ak") + ch_comp = VectorOfVectors( + array=ak.unflatten( + ids[ak.flatten(ch_comp)], ak.count(ch_comp, axis=-1) + ) + ) + else: + raise NotImplementedError( + type(ch_comp) + + " not supported (only Array and VectorOfVectors are supported)" + ) -def evaluate_at_channel_vov( - idx: NDArray, - ids: NDArray, - f_hit: str, - f_dsp: str, - expr: str, - exprl: list, - ch_comp: VectorOfVectors, - chns_rm: list, - var_ph: dict = None, - defv: bool | int | float = np.nan, - tcm_id_table_pattern: str = "ch{}", - evt_group: str = "evt", - hit_group: str = "hit", - dsp_group: str = "dsp", -) -> VectorOfVectors: - """Same as :func:`evaluate_at_channel` but evaluates expression at non - flat channels :class:`.VectorOfVectors`. - - Parameters - ---------- - idx - `tcm` index array. - ids - `tcm` id array. - f_hit - path to `hit` tier file. - f_dsp - path to `dsp` tier file. - expr - expression string to be evaluated. - exprl - list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. - ch_comp - array of "rawid"s at which the expression is evaluated. - chns_rm - list of channels to be skipped from evaluation and set to default value. - var_ph - dictionary of `evt` and additional parameters and their values. - defv - default value. - tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. - dsp_group - LH5 root group in dsp file. - hit_group - LH5 root group in hit file. - evt_group - LH5 root group in evt file. - """ - - # blow up vov to aoesa - out = ak.Array([[] for _ in range(len(ch_comp))]) - - chns = np.unique(ch_comp.flattened_data.nda).astype(int) - ch_comp = ch_comp.view_as("ak") - - type_name = None - for ch in chns: - idx_ch = idx[ids == ch] - res = get_data_at_channel( - ch=get_table_name_by_pattern(tcm_id_table_pattern, ch), - ids=ids, - idx=idx, - expr=expr, - exprl=exprl, - var_ph=var_ph, - is_evaluated=get_table_name_by_pattern(tcm_id_table_pattern, ch) - not in chns_rm, - f_hit=f_hit, - f_dsp=f_dsp, - defv=defv, - tcm_id_table_pattern=tcm_id_table_pattern, - evt_group=evt_group, - hit_group=hit_group, - dsp_group=dsp_group, - ) - - # see in which events the current channel is present - mask = ak.to_numpy(ak.any(ch_comp == ch, axis=-1), allow_missing=False) - cv = np.full(len(ch_comp), np.nan) - cv[idx_ch] = res - cv[~mask] = np.nan - cv = ak.drop_none(ak.nan_to_none(ak.Array(cv)[:, None])) - - out = ak.concatenate((out, cv), axis=-1) - - if ch == chns[0]: - type_name = res.dtype - - return VectorOfVectors(ak.values_astype(out, type_name)) - - -def evaluate_to_aoesa( - idx: NDArray, - ids: NDArray, - f_hit: str, - f_dsp: str, - chns: list, - chns_rm: list, - expr: str, - exprl: list, - qry: str | NDArray, - nrows: int, - var_ph: dict = None, - defv: bool | int | float = np.nan, - missv=np.nan, - tcm_id_table_pattern: str = "ch{}", - evt_group: str = "evt", - hit_group: str = "hit", - dsp_group: str = "dsp", -) -> ArrayOfEqualSizedArrays: - """Aggregates by returning an :class:`.ArrayOfEqualSizedArrays` of evaluated - expressions of channels that fulfill a query expression. - - Parameters - ---------- - idx - `tcm` index array. - ids - `tcm` id array. - f_hit - path to `hit` tier file. - f_dsp - path to `dsp` tier file. - chns - list of channels to be aggregated. - chns_rm - list of channels to be skipped from evaluation and set to default value. - expr - expression string to be evaluated. - exprl - list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. - qry - query expression to mask aggregation. - nrows - length of output :class:`.VectorOfVectors`. - ch_comp - array of "rawid"s at which the expression is evaluated. - var_ph - dictionary of `evt` and additional parameters and their values. - defv - default value. - missv - missing value. - sorter - sorts the entries in the vector according to sorter expression. - tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. - dsp_group - LH5 root group in dsp file. - hit_group - LH5 root group in hit file. - evt_group - LH5 root group in evt file. - """ - # define dimension of output array - out = np.full((nrows, len(chns)), missv) - - i = 0 - for ch in chns: - idx_ch = idx[ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] - res = get_data_at_channel( - ch=ch, - ids=ids, - idx=idx, - expr=expr, - exprl=exprl, - var_ph=var_ph, - is_evaluated=ch not in chns_rm, - f_hit=f_hit, - f_dsp=f_dsp, - defv=defv, - tcm_id_table_pattern=tcm_id_table_pattern, - evt_group=evt_group, - hit_group=hit_group, - dsp_group=dsp_group, - ) - - # get mask from query - limarr = get_mask_from_query( - qry=qry, - length=len(res), - ch=ch, - idx_ch=idx_ch, - f_hit=f_hit, - f_dsp=f_dsp, - hit_group=hit_group, - dsp_group=dsp_group, - ) - - out[idx_ch, i] = np.where(limarr, res, out[idx_ch, i]) - - i += 1 - - return ArrayOfEqualSizedArrays(nda=out) - - -def evaluate_to_vector( - idx: NDArray, - ids: NDArray, - f_hit: str, - f_dsp: str, - chns: list, - chns_rm: list, - expr: str, - exprl: list, - qry: str | NDArray, - nrows: int, - var_ph: dict = None, - defv: bool | int | float = np.nan, - sorter: str = None, - tcm_id_table_pattern: str = "ch{}", - evt_group: str = "evt", - hit_group: str = "hit", - dsp_group: str = "dsp", -) -> VectorOfVectors: - """Aggregates by returning a :class:`.VectorOfVector` of evaluated - expressions of channels that fulfill a query expression. - - Parameters - ---------- - idx - `tcm` index array. - ids - `tcm` id array. - f_hit - path to `hit` tier file. - f_dsp - path to `dsp` tier file. - chns - list of channels to be aggregated. - chns_rm - list of channels to be skipped from evaluation and set to default value. - expr - expression string to be evaluated. - exprl - list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. - qry - query expression to mask aggregation. - nrows - length of output :class:`.VectorOfVectors`. - ch_comp - array of "rawids" at which the expression is evaluated. - var_ph - dictionary of `evt` and additional parameters and their values. - defv - default value. - sorter - sorts the entries in the vector according to sorter expression. - ``ascend_by:`` results in an vector ordered ascending, - ``decend_by:`` sorts descending. - tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. - dsp_group - LH5 root group in dsp file. - hit_group - LH5 root group in hit file. - evt_group - LH5 root group in evt file. - """ - out = evaluate_to_aoesa( - idx=idx, - ids=ids, - f_hit=f_hit, - f_dsp=f_dsp, - chns=chns, - chns_rm=chns_rm, - expr=expr, - exprl=exprl, - qry=qry, - nrows=nrows, - var_ph=var_ph, - defv=defv, - missv=np.nan, - tcm_id_table_pattern=tcm_id_table_pattern, - evt_group=evt_group, - hit_group=hit_group, - dsp_group=dsp_group, - ).view_as("np") - - # if a sorter is given sort accordingly - if sorter is not None: - md, fld = sorter.split(":") - s_val = evaluate_to_aoesa( - idx=idx, - ids=ids, - f_hit=f_hit, - f_dsp=f_dsp, - chns=chns, - chns_rm=chns_rm, - expr=fld, - exprl=[tuple(fld.split("."))], - qry=None, - nrows=nrows, - missv=np.nan, - tcm_id_table_pattern=tcm_id_table_pattern, - evt_group=evt_group, - hit_group=hit_group, - dsp_group=dsp_group, - ).view_as("np") - if "ascend_by" == md: - out = out[np.arange(len(out))[:, None], np.argsort(s_val)] - - elif "descend_by" == md: - out = out[np.arange(len(out))[:, None], np.argsort(-s_val)] - else: - raise ValueError( - "sorter values can only have 'ascend_by' or 'descend_by' prefixes" - ) - - return VectorOfVectors( - ak.values_astype(ak.drop_none(ak.nan_to_none(ak.Array(out))), type(defv)) - ) - - -def build_evt( - f_tcm: str, - f_dsp: str, - f_hit: str, - f_evt: str, - evt_config: str | dict, - wo_mode: str = "write_safe", - evt_group: str = "evt", - tcm_group: str = "hardware_tcm_1", - dsp_group: str = "dsp", - hit_group: str = "hit", - tcm_id_table_pattern: str = "ch{}", -) -> None: - """Transform data from the `hit` and `dsp` levels which a channel sorted to a - event sorted data format. - - Parameters - ---------- - f_tcm - input LH5 file of the tcm level. - f_dsp - input LH5 file of the dsp level. - f_hit - input LH5 file of the hit level. - f_evt - name of the output file. - evt_config - name of configuration file or dictionary defining event fields. Channel - lists can be defined by importing a metadata module. - - - ``operations`` defines the fields ``name=key``, where ``channels`` - specifies the channels used to for this field (either a string or a - list of strings), - - ``aggregation_mode`` defines how the channels should be combined (see - :func:`evaluate_expression`). - - ``expression`` defnies the mathematical/special function to apply - (see :func:`evaluate_expression`), - - ``query`` defines an expression to mask the aggregation. - - ``parameters`` defines any other parameter used in expression. - - For example: - - .. code-block:: json - - { - "channels": { - "geds_on": ["ch1084803", "ch1084804", "ch1121600"], - "spms_on": ["ch1057600", "ch1059201", "ch1062405"], - "muon": "ch1027202", - }, - "operations": { - "energy_id":{ - "channels": "geds_on", - "aggregation_mode": "gather", - "query": "hit.cuspEmax_ctc_cal > 25", - "expression": "tcm.array_id", - "sort": "ascend_by:dsp.tp_0_est" - }, - "energy":{ - "aggregation_mode": "keep_at_ch:evt.energy_id", - "expression": "hit.cuspEmax_ctc_cal > 25" - } - "is_muon_rejected":{ - "channels": "muon", - "aggregation_mode": "any", - "expression": "dsp.wf_max>a", - "parameters": {"a":15100}, - "initial": false - }, - "multiplicity":{ - "channels": ["geds_on", "geds_no_psd", "geds_ac"], - "aggregation_mode": "sum", - "expression": "hit.cuspEmax_ctc_cal > a", - "parameters": {"a":25}, - "initial": 0 - }, - "t0":{ - "aggregation_mode": "keep_at_ch:evt.energy_id", - "expression": "dsp.tp_0_est" - }, - "lar_energy":{ - "channels": "spms_on", - "aggregation_mode": "function", - "expression": ".modules.spm.get_energy(0.5, evt.t0, 48000, 1000, 5000)" - }, - } - } - - wo_mode - writing mode. - evt group - LH5 root group name of evt tier. - tcm_group - LH5 root group in tcm file. - dsp_group - LH5 root group in dsp file. - hit_group - LH5 root group in hit file. - tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. - """ - - store = LH5Store() - tbl_cfg = evt_config - if not isinstance(tbl_cfg, (str, dict)): - raise TypeError() - if isinstance(tbl_cfg, str): - with open(tbl_cfg) as f: - tbl_cfg = json.load(f) - - if "channels" not in tbl_cfg.keys(): - raise ValueError("channel field needs to be specified in the config") - if "operations" not in tbl_cfg.keys(): - raise ValueError("operations field needs to be specified in the config") - - # check tcm_id_table_pattern validity - pattern_check = re.findall(r"{([^}]*?)}", tcm_id_table_pattern) - if len(pattern_check) != 1: - raise ValueError( - f"tcm_id_table_pattern must have exactly one placeholder. {tcm_id_table_pattern} is invalid." - ) - elif "{" in pattern_check[0] or "}" in pattern_check[0]: - raise ValueError( - f"tcm_id_table_pattern {tcm_id_table_pattern} has an invalid placeholder." - ) - - if ( - get_table_name_by_pattern( - tcm_id_table_pattern, - get_tcm_id_by_pattern(tcm_id_table_pattern, lh5.ls(f_hit)[0]), - ) - != lh5.ls(f_hit)[0] - ): - raise ValueError( - f"tcm_id_table_pattern {tcm_id_table_pattern} does not match keys in data!" - ) - - # create channel list according to config - # This can be either read from the meta data - # or a list of channel names - log.debug("Creating channel dictionary") - - chns = {} - - for k, v in tbl_cfg["channels"].items(): - if isinstance(v, dict): - # it is a meta module. module_name must exist - if "module" not in v.keys(): - raise ValueError( - "Need module_name to load channel via a meta data module" + if isinstance(ch_comp, Array): + return aggregators.evaluate_at_channel( + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + chns_rm=chns_rm, + expr=expr, + exprl=exprl, + ch_comp=ch_comp, + var_ph=var_ph, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) - - attr = {} - # the time_key argument is set to the time key of the DSP file - # in case it is not provided by the config - if "time_key" not in v.keys(): - attr["time_key"] = re.search(r"\d{8}T\d{6}Z", f_dsp).group(0) - - # if "None" do None - elif "None" == v["time_key"]: - attr["time_key"] = None - - # load module - p, m = v["module"].rsplit(".", 1) - met = getattr(import_module(p, package=__package__), m) - chns[k] = met(v | attr) - - elif isinstance(v, str): - chns[k] = [v] - - elif isinstance(v, list): - chns[k] = [e for e in v] - - nrows = store.read_n_rows(f"/{tcm_group}/cumulative_length", f_tcm) - - table = Table(size=nrows) - - for k, v in tbl_cfg["operations"].items(): - log.debug("Processing field " + k) - - # if mode not defined in operation, it can only be an operation on the evt level. - if "aggregation_mode" not in v.keys(): - var = {} - if "parameters" in v.keys(): - var = var | v["parameters"] - res = table.eval(v["expression"].replace(f"{evt_group}.", ""), var) - - # add attribute if present - if "lgdo_attrs" in v.keys(): - res.attrs |= v["lgdo_attrs"] - - table.add_field(k, res) - - # Else we build the event entry - else: - if "channels" not in v.keys(): - chns_e = [] - elif isinstance(v["channels"], str): - chns_e = chns[v["channels"]] - elif isinstance(v["channels"], list): - chns_e = list( - itertools.chain.from_iterable([chns[e] for e in v["channels"]]) + elif isinstance(ch_comp, VectorOfVectors): + return aggregators.evaluate_at_channel_vov( + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + expr=expr, + exprl=exprl, + ch_comp=ch_comp, + chns_rm=chns_rm, + var_ph=var_ph, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) - chns_rm = [] - if "exclude_channels" in v.keys(): - if isinstance(v["exclude_channels"], str): - chns_rm = chns[v["exclude_channels"]] - elif isinstance(v["exclude_channels"], list): - chns_rm = list( - itertools.chain.from_iterable( - [chns[e] for e in v["exclude_channels"]] - ) - ) - - pars, qry, defaultv, srter = None, None, np.nan, None - if "parameters" in v.keys(): - pars = v["parameters"] - if "query" in v.keys(): - qry = v["query"] - if "initial" in v.keys(): - defaultv = v["initial"] - if isinstance(defaultv, str) and ( - defaultv in ["np.nan", "np.inf", "-np.inf"] - ): - defaultv = eval(defaultv) - if "sort" in v.keys(): - srter = v["sort"] - - obj = evaluate_expression( - f_tcm=f_tcm, + else: + raise NotImplementedError( + type(ch_comp) + + " not supported (only Array and VectorOfVectors are supported)" + ) + elif "first_at:" in mode or "last_at:" in mode: + sorter = tuple( + re.findall( + rf"({evt_group}|{hit_group}|{dsp_group}).([a-zA-Z_$][\w$]*)", + mode.split("first_at:")[-1], + )[0] + ) + return aggregators.evaluate_to_first_or_last( + idx=idx, + ids=ids, f_hit=f_hit, f_dsp=f_dsp, - chns=chns_e, + chns=chns, chns_rm=chns_rm, - mode=v["aggregation_mode"], - expr=v["expression"], + expr=expr, + exprl=exprl, + qry=qry_mask, nrows=nrows, - table=table, - para=pars, - qry=qry, - defv=defaultv, - sorter=srter, + sorter=sorter, + var_ph=var_ph, + defv=defv, + is_first=True if "first_at:" in mode else False, tcm_id_table_pattern=tcm_id_table_pattern, evt_group=evt_group, hit_group=hit_group, dsp_group=dsp_group, - tcm_group=tcm_group, ) - - # add attribute if present - if "lgdo_attrs" in v.keys(): - obj.attrs |= v["lgdo_attrs"] - - table.add_field(k, obj) - - # write output fields into f_evt - if "outputs" in tbl_cfg.keys(): - if len(tbl_cfg["outputs"]) < 1: - log.warning("No output fields specified, no file will be written.") - else: - clms_to_remove = [e for e in table.keys() if e not in tbl_cfg["outputs"]] - for fld in clms_to_remove: - table.remove_field(fld, True) - store.write( - obj=table, name=f"/{evt_group}/", lh5_file=f_evt, wo_mode=wo_mode + elif mode in ["sum", "any", "all"]: + return aggregators.evaluate_to_scalar( + mode=mode, + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + chns=chns, + chns_rm=chns_rm, + expr=expr, + exprl=exprl, + qry=qry_mask, + nrows=nrows, + var_ph=var_ph, + defv=defv, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, ) - else: - log.warning("No output fields specified, no file will be written.") - - key = re.search(r"\d{8}T\d{6}Z", f_hit).group(0) - log.info( - f"Applied {len(tbl_cfg['operations'])} operations to key {key} and saved {len(tbl_cfg['outputs'])} evt fields across {len(chns)} channel groups" - ) + elif "gather" == mode: + return aggregators.evaluate_to_vector( + idx=idx, + ids=ids, + f_hit=f_hit, + f_dsp=f_dsp, + chns=chns, + chns_rm=chns_rm, + expr=expr, + exprl=exprl, + qry=qry_mask, + nrows=nrows, + var_ph=var_ph, + defv=defv, + sorter=sorter, + tcm_id_table_pattern=tcm_id_table_pattern, + evt_group=evt_group, + hit_group=hit_group, + dsp_group=dsp_group, + ) + else: + raise ValueError(mode + " not a valid mode") diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index b72198a6f..9539c49f2 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -1,13 +1,14 @@ """ Module for special event level routines for SiPMs -functions must take as the first 4 args in order: +functions must take as the first 8 args in order: - path to the hit file -- path to the dsp file +- path to the dsp int: + pre = tcm_id_table_pattern.split("{")[0] + post = tcm_id_table_pattern.split("}")[1] + return int(ch.strip(pre).strip(post)) + + +def get_table_name_by_pattern(tcm_id_table_pattern: str, ch_id: int) -> str: + # check tcm_id_table_pattern validity + pattern_check = re.findall(r"{([^}]*?)}", tcm_id_table_pattern)[0] + if pattern_check == "" or ":" == pattern_check[0]: + return tcm_id_table_pattern.format(ch_id) + else: + raise NotImplementedError( + "Only empty placeholders with format specifications are currently implemented" + ) + + +def num_and_pars(value: str, par_dic: dict): + # function tries to convert a string to a int, float, bool + # or returns the value if value is a key in par_dic + if value in par_dic.keys(): + return par_dic[value] + try: + value = int(value) + except ValueError: + try: + value = float(value) + except ValueError: + try: + value = bool(value) + except ValueError: + pass + return value + +def find_parameters( + f_hit: str, + f_dsp: str, + ch: str, + idx_ch: NDArray, + exprl: list, + hit_group: str = "hit", + dsp_group: str = "dsp", +) -> dict: + """Wraps :func:`load_vars_to_nda` to return parameters from `hit` and `dsp` + tiers. + + Parameters + ---------- + f_hit + path to `hit` tier file. + f_dsp + path to `dsp` tier file. + ch + "rawid" in the tiers. + idx_ch + index array of entries to be read from files. + exprl + list of tuples ``(tier, field)`` to be found in the `hit/dsp` tiers. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + """ + + # find fields in either dsp, hit + dsp_flds = [e[1] for e in exprl if e[0] == dsp_group] + hit_flds = [e[1] for e in exprl if e[0] == hit_group] + + store = LH5Store() + hit_dict, dsp_dict = {}, {} + if len(hit_flds) > 0: + hit_ak = store.read( + f"{ch.replace('/','')}/{hit_group}/", f_hit, field_mask=hit_flds, idx=idx_ch + )[0].view_as("ak") + hit_dict = dict( + zip([f"{hit_group}_" + e for e in ak.fields(hit_ak)], ak.unzip(hit_ak)) + ) + if len(dsp_flds) > 0: + dsp_ak = store.read( + f"{ch.replace('/','')}/{dsp_group}/", f_dsp, field_mask=dsp_flds, idx=idx_ch + )[0].view_as("ak") + dsp_dict = dict( + zip([f"{dsp_group}_" + e for e in ak.fields(dsp_ak)], ak.unzip(dsp_ak)) + ) + + return hit_dict | dsp_dict + + +def get_data_at_channel( + ch: str, + ids: NDArray, + idx: NDArray, + expr: str, + exprl: list, + var_ph: dict, + is_evaluated: bool, + f_hit: str, + f_dsp: str, + defv, + tcm_id_table_pattern: str = "ch{}", + evt_group: str = "evt", + hit_group: str = "hit", + dsp_group: str = "dsp", +) -> np.ndarray: + """Evaluates an expression and returns the result. + + Parameters + ---------- + ch + "rawid" of channel to be evaluated. + idx + `tcm` index array. + ids + `tcm` id array. + expr + expression to be evaluated. + exprl + list of parameter-tuples ``(root_group, field)`` found in the expression. + var_ph + dict of additional parameters that are not channel dependent. + is_evaluated + if false, the expression does not get evaluated but an array of default + values is returned. + f_hit + path to `hit` tier file. + f_dsp + path to `dsp` tier file. + defv + default value. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. + dsp_group + LH5 root group in dsp file. + hit_group + LH5 root group in hit file. + evt_group + LH5 root group in evt file. + """ + + # get index list for this channel to be loaded + idx_ch = idx[ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] + outsize = len(idx_ch) + + if not is_evaluated: + res = np.full(outsize, defv, dtype=type(defv)) + elif "tcm.array_id" == expr: + res = np.full( + outsize, get_tcm_id_by_pattern(tcm_id_table_pattern, ch), dtype=int + ) + elif "tcm.index" == expr: + res = np.where(ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch))[0] + else: + var = find_parameters( + f_hit=f_hit, + f_dsp=f_dsp, + ch=ch, + idx_ch=idx_ch, + exprl=exprl, + hit_group=hit_group, + dsp_group=dsp_group, + ) + + if var_ph is not None: + var = var | var_ph + + # evaluate expression + # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo) + res = eval( + expr.replace(f"{dsp_group}.", f"{dsp_group}_") + .replace(f"{hit_group}.", f"{hit_group}_") + .replace(f"{evt_group}.", ""), + var, + ) + + # in case the expression evaluates to a single value blow it up + if (not hasattr(res, "__len__")) or (isinstance(res, str)): + return np.full(outsize, res) + + # the resulting arrays need to be 1D from the operation, + # this can only change once we support larger than two dimensional LGDOs + # ak.to_numpy() raises error if array not regular + res = ak.to_numpy(res, allow_missing=False) + + # in this method only 1D values are allowed + if res.ndim > 1: + raise ValueError( + f"expression '{expr}' must return 1D array. If you are using VectorOfVectors or ArrayOfEqualSizedArrays, use awkward reduction functions to reduce the dimension" + ) + + return res + + +def get_mask_from_query( + qry: str | NDArray, + length: int, + ch: str, + idx_ch: NDArray, + f_hit: str, + f_dsp: str, + hit_group: str = "hit", + dsp_group: str = "dsp", +) -> np.ndarray: + """Evaluates a query expression and returns a mask accordingly. + + Parameters + ---------- + qry + query expression. + length + length of the return mask. + ch + "rawid" of channel to be evaluated. + idx_ch + channel indices to be read. + f_hit + path to `hit` tier file. + f_dsp + path to `dsp` tier file. + hit_group + LH5 root group in hit file. + dsp_group + LH5 root group in dsp file. + """ + + # get sub evt based query condition if needed + if isinstance(qry, str): + qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry) + qry_var = find_parameters( + f_hit=f_hit, + f_dsp=f_dsp, + ch=ch, + idx_ch=idx_ch, + exprl=qry_lst, + hit_group=hit_group, + dsp_group=dsp_group, + ) + limarr = eval( + qry.replace(f"{dsp_group}.", f"{dsp_group}_").replace( + f"{hit_group}.", f"{hit_group}_" + ), + qry_var, + ) + + # in case the expression evaluates to a single value blow it up + if (not hasattr(limarr, "__len__")) or (isinstance(limarr, str)): + return np.full(len(idx_ch), limarr) + + limarr = ak.to_numpy(limarr, allow_missing=False) + if limarr.ndim > 1: + raise ValueError( + f"query '{qry}' must return 1D array. If you are using VectorOfVectors or ArrayOfEqualSizedArrays, use awkward reduction functions to reduce the dimension" + ) + + # or forward the array + elif isinstance(qry, np.ndarray): + limarr = qry + + # if no condition, it must be true + else: + limarr = np.ones(length).astype(bool) + + # explicit cast to bool + if limarr.dtype != bool: + limarr = limarr.astype(bool) + + return limarr \ No newline at end of file From 2c5b9b49cef22c7566bafd112b6e07049865f6bf Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Fri, 26 Jan 2024 16:02:42 +0100 Subject: [PATCH 160/191] agnostify also skm tier --- src/pygama/evt/aggregators.py | 13 +++++++---- src/pygama/evt/utils.py | 10 +++++--- src/pygama/skm/build_skm.py | 43 ++++++++++++++++++++++++----------- 3 files changed, 45 insertions(+), 21 deletions(-) diff --git a/src/pygama/evt/aggregators.py b/src/pygama/evt/aggregators.py index f9131ed96..b95db1b5e 100644 --- a/src/pygama/evt/aggregators.py +++ b/src/pygama/evt/aggregators.py @@ -3,15 +3,16 @@ """ from __future__ import annotations -import re -import numpy as np -from numpy.typing import NDArray + import awkward as ak +import numpy as np +from lgdo import Array, ArrayOfEqualSizedArrays, VectorOfVectors, lh5 from lgdo.lh5 import LH5Store -from lgdo import Array, ArrayOfEqualSizedArrays, Table, VectorOfVectors, lh5 +from numpy.typing import NDArray from . import utils + def evaluate_to_first_or_last( idx: NDArray, ids: NDArray, @@ -308,7 +309,9 @@ def evaluate_at_channel( for ch in np.unique(ch_comp.nda.astype(int)): # skip default value - if utils.get_table_name_by_pattern(tcm_id_table_pattern, ch) not in lh5.ls(f_hit): + if utils.get_table_name_by_pattern(tcm_id_table_pattern, ch) not in lh5.ls( + f_hit + ): continue idx_ch = idx[ids == ch] res = utils.get_data_at_channel( diff --git a/src/pygama/evt/utils.py b/src/pygama/evt/utils.py index fe1e35e94..175cd868a 100644 --- a/src/pygama/evt/utils.py +++ b/src/pygama/evt/utils.py @@ -3,11 +3,14 @@ """ from __future__ import annotations + import re -import numpy as np -from numpy.typing import NDArray + import awkward as ak +import numpy as np from lgdo.lh5 import LH5Store +from numpy.typing import NDArray + def get_tcm_id_by_pattern(tcm_id_table_pattern: str, ch: str) -> int: pre = tcm_id_table_pattern.split("{")[0] @@ -43,6 +46,7 @@ def num_and_pars(value: str, par_dic: dict): pass return value + def find_parameters( f_hit: str, f_dsp: str, @@ -275,4 +279,4 @@ def get_mask_from_query( if limarr.dtype != bool: limarr = limarr.astype(bool) - return limarr \ No newline at end of file + return limarr diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index 049012985..e1f9add39 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -14,6 +14,8 @@ from lgdo import Array, Table, lh5 from lgdo.lh5 import LH5Store +from pygama.evt import utils + log = logging.getLogger(__name__) @@ -25,7 +27,12 @@ def build_skm( f_skm: str, skm_conf: dict | str, wo_mode="w", - group: str = "/skm/", + skm_group: str = "skm", + evt_group: str = "evt", + tcm_group: str = "hardware_tcm_1", + dsp_group: str = "dsp", + hit_group: str = "hit", + tcm_id_table_pattern: str = "ch{}", ) -> None: """Builds a skimmed file from a (set) of evt/hit/dsp tier file(s). @@ -89,10 +96,21 @@ def build_skm( - ``append`` or ``a``: append to file. - ``overwrite`` or ``o``: replaces existing file. - group - LH5 root group name (only used if ``skim_format`` is ``lh5``). + skm_group + skm LH5 root group name. + evt_group + evt LH5 root group name. + hit_group + hit LH5 root group name. + dsp_group + dsp LH5 root group name. + tcm_group + tcm LH5 root group name. + tcm_id_table_pattern + Pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the tcm id. """ - f_dict = {"evt": f_evt, "hit": f_hit, "dsp": f_dsp, "tcm": f_tcm} + f_dict = {evt_group: f_evt, hit_group: f_hit, dsp_group: f_dsp, tcm_group: f_tcm} log = logging.getLogger(__name__) log.debug(f"I am skimming {len(f_evt) if isinstance(f_evt,list) else 1} files") @@ -122,11 +140,9 @@ def build_skm( miss_val = eval(miss_val) fw_fld = tbl_cfg["operations"][op]["forward_field"].split(".") - if fw_fld[0] not in ["evt", "hit", "dsp", "tcm"]: - raise ValueError(f"{fw_fld[0]} is not a valid tier") # load object if from evt tier - if fw_fld[0] == "evt": + if fw_fld[0] == evt_group: obj = store.read(f"/{fw_fld[0]}/{fw_fld[1]}", f_dict[fw_fld[0]])[ 0 ].view_as("ak") @@ -145,10 +161,10 @@ def build_skm( obj = ak.Array([[] for x in range(len(tcm_idx))]) # load TCM data to define an event - ids = store.read("hardware_tcm_1/array_id", f_tcm)[0].view_as("ak") + ids = store.read(f"/{tcm_group}/array_id", f_tcm)[0].view_as("ak") ids = ak.unflatten(ids[ak.flatten(tcm_idx)], ak.count(tcm_idx, axis=-1)) - idx = store.read("hardware_tcm_1/array_idx", f_tcm)[0].view_as("ak") + idx = store.read(f"/{tcm_group}/array_idx", f_tcm)[0].view_as("ak") idx = ak.unflatten(idx[ak.flatten(tcm_idx)], ak.count(tcm_idx, axis=-1)) if "tcm.array_id" == tbl_cfg["operations"][op]["forward_field"]: @@ -167,13 +183,14 @@ def build_skm( ct_idx = ak.count(ch_idx, axis=-1) fl_idx = ak.to_numpy(ak.flatten(ch_idx), allow_missing=False) - if f"ch{ch}/{fw_fld[0]}/{fw_fld[1]}" not in lh5.ls( - f_dict[fw_fld[0]], f"ch{ch}/{fw_fld[0]}/" + if ( + f"{utils.get_table_name_by_pattern(tcm_id_table_pattern,ch)}/{fw_fld[0]}/{fw_fld[1]}" + not in lh5.ls(f_dict[fw_fld[0]], f"ch{ch}/{fw_fld[0]}/") ): och = Array(nda=np.full(len(fl_idx), miss_val)) else: och, _ = store.read( - f"ch{ch}/{fw_fld[0]}/{fw_fld[1]}", + f"{utils.get_table_name_by_pattern(tcm_id_table_pattern,ch)}/{fw_fld[0]}/{fw_fld[1]}", f_dict[fw_fld[0]], idx=fl_idx, ) @@ -216,4 +233,4 @@ def build_skm( raise FileExistsError(f"Write_safe mode: {f_skm} exists.") wo = wo_mode if wo_mode not in ["o", "overwrite"] else "of" - store.write(obj=table, name=group, lh5_file=f_skm, wo_mode=wo) + store.write(obj=table, name=f"/{skm_group}/", lh5_file=f_skm, wo_mode=wo) From 013dc04c183a49f597a1dbd9e4d30d55acd725ae Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 30 Jan 2024 09:11:19 +0100 Subject: [PATCH 161/191] updated etc classifier --- src/pygama/evt/modules/spm.py | 11 +++++++++++ tests/evt/configs/module-test-evt-config.json | 2 +- tests/evt/configs/module-test-t0-vov-evt-config.json | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 9539c49f2..2dc5a4290 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -368,6 +368,7 @@ def get_majority_dplms( # trail = 1: Singlet window = [t_first_lar_pulse, t_first_lar_pulse+ swin] # trail = 2: Like trail = 1, but t_first_lar_pulse <= tge is ensured # min_first_pls_ene sets the minimum energy of the first pulse (only used in trail > 0) +# max_per_channel, maximum number of pes a channel is allowed to have, if above it gets excluded def get_etc( f_hit, f_dsp, @@ -385,6 +386,7 @@ def get_etc( swin, trail, min_first_pls_ene, + max_per_channel, ) -> Array: # load TCM data to define an event store = LH5Store() @@ -419,9 +421,18 @@ def get_etc( mask = get_spm_mask(lim, tge, tmin, tmax, pe, times) pe = pe[mask] + + # max pe mask + max_pe_mask = ak.nansum(pe, axis=-1) < max_per_channel + pe = ak.drop_none( + ak.nan_to_none(ak.where(max_pe_mask, pe, ak.Array([[np.nan]]))) + ) pe_lst.append(pe) times = times[mask] * 16 + times = ak.drop_none( + ak.nan_to_none(ak.where(max_pe_mask, times, ak.Array([[np.nan]]))) + ) time_lst.append(times) pe_all = ak.concatenate(pe_lst, axis=-1) diff --git a/tests/evt/configs/module-test-evt-config.json b/tests/evt/configs/module-test-evt-config.json index 6aba3bf75..0daa94658 100644 --- a/tests/evt/configs/module-test-evt-config.json +++ b/tests/evt/configs/module-test-evt-config.json @@ -51,7 +51,7 @@ "lar_classifier": { "channels": "spms_on", "aggregation_mode": "function", - "expression": ".modules.spm.get_etc(0.5,evt.t0,48000,100,6000,80,1,0)" + "expression": ".modules.spm.get_etc(0.5,evt.t0,48000,100,6000,80,1,0,50)" }, "lar_energy_dplms": { "channels": "spms_on", diff --git a/tests/evt/configs/module-test-t0-vov-evt-config.json b/tests/evt/configs/module-test-t0-vov-evt-config.json index 5d1c6f256..cda042337 100644 --- a/tests/evt/configs/module-test-t0-vov-evt-config.json +++ b/tests/evt/configs/module-test-t0-vov-evt-config.json @@ -51,7 +51,7 @@ "lar_classifier": { "channels": "spms_on", "aggregation_mode": "function", - "expression": ".modules.spm.get_etc(0.5,evt.t0,48000,100,6000,80,1,0)" + "expression": ".modules.spm.get_etc(0.5,evt.t0,48000,100,6000,80,1,0,50)" }, "lar_energy_dplms": { "channels": "spms_on", From 78134ca398df9303aa3326a18cee9877b4dd1685 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Tue, 30 Jan 2024 10:46:39 +0100 Subject: [PATCH 162/191] Docstring cosmetics --- src/pygama/evt/aggregators.py | 68 +++++++++++++++++------------------ src/pygama/evt/build_evt.py | 32 ++++++++--------- src/pygama/skm/build_skm.py | 60 +++++++++++++++---------------- 3 files changed, 80 insertions(+), 80 deletions(-) diff --git a/src/pygama/evt/aggregators.py b/src/pygama/evt/aggregators.py index b95db1b5e..993c0ffe6 100644 --- a/src/pygama/evt/aggregators.py +++ b/src/pygama/evt/aggregators.py @@ -67,14 +67,14 @@ def evaluate_to_first_or_last( is_first defines if sorted by smallest or largest value of `sorter` tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. + pattern to format `tcm` id values to table name in higher tiers. Must have one + placeholder which is the `tcm` id. dsp_group - LH5 root group in dsp file. + LH5 root group in `dsp` file. hit_group - LH5 root group in hit file. + LH5 root group in `hit` file. evt_group - LH5 root group in evt file. + LH5 root group in `evt` file. """ # define dimension of output array @@ -167,9 +167,9 @@ def evaluate_to_scalar( mode aggregation mode. idx - tcm index array. + `tcm` index array. ids - tcm id array. + `tcm` id array. f_hit path to `hit` tier file. f_dsp @@ -181,24 +181,24 @@ def evaluate_to_scalar( expr expression string to be evaluated. exprl - list of dsp/hit/evt parameter tuples in expression (tier, field). + list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``. qry query expression to mask aggregation. nrows length of output array var_ph - dictionary of evt and additional parameters and their values. + dictionary of `evt` and additional parameters and their values. defv default value. tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. + pattern to format `tcm` id values to table name in higher tiers. Must have one + placeholder which is the `tcm` id. dsp_group - LH5 root group in dsp file. + LH5 root group in `dsp` file. hit_group - LH5 root group in hit file. + LH5 root group in `hit` file. evt_group - LH5 root group in evt file. + LH5 root group in `evt` file. """ # define dimension of output array @@ -295,14 +295,14 @@ def evaluate_at_channel( defv default value. tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. + pattern to format `tcm` id values to table name in higher tiers. Must have one + placeholder which is the `tcm` id. dsp_group - LH5 root group in dsp file. + LH5 root group in `dsp` file. hit_group - LH5 root group in hit file. + LH5 root group in `hit` file. evt_group - LH5 root group in evt file. + LH5 root group in `evt` file. """ out = np.full(len(ch_comp.nda), defv, dtype=type(defv)) @@ -379,14 +379,14 @@ def evaluate_at_channel_vov( defv default value. tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. + pattern to format `tcm` id values to table name in higher tiers. Must have one + placeholder which is the `tcm` id. dsp_group - LH5 root group in dsp file. + LH5 root group in `dsp` file. hit_group - LH5 root group in hit file. + LH5 root group in `hit` file. evt_group - LH5 root group in evt file. + LH5 root group in `evt` file. """ # blow up vov to aoesa @@ -486,14 +486,14 @@ def evaluate_to_aoesa( sorter sorts the entries in the vector according to sorter expression. tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. + pattern to format `tcm` id values to table name in higher tiers. Must have one + placeholder which is the `tcm` id. dsp_group - LH5 root group in dsp file. + LH5 root group in `dsp` file. hit_group - LH5 root group in hit file. + LH5 root group in `hit` file. evt_group - LH5 root group in evt file. + LH5 root group in `evt` file. """ # define dimension of output array out = np.full((nrows, len(chns)), missv) @@ -592,14 +592,14 @@ def evaluate_to_vector( ``ascend_by:`` results in an vector ordered ascending, ``decend_by:`` sorts descending. tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. + pattern to format `tcm` id values to table name in higher tiers. Must have one + placeholder which is the `tcm` id. dsp_group - LH5 root group in dsp file. + LH5 root group in `dsp` file. hit_group - LH5 root group in hit file. + LH5 root group in `hit` file. evt_group - LH5 root group in evt file. + LH5 root group in `evt` file. """ out = evaluate_to_aoesa( idx=idx, diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index e0c0dafb3..66489c38c 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -39,11 +39,11 @@ def build_evt( Parameters ---------- f_tcm - input LH5 file of the tcm level. + input LH5 file of the `tcm` level. f_dsp - input LH5 file of the dsp level. + input LH5 file of the `dsp` level. f_hit - input LH5 file of the hit level. + input LH5 file of the `hit` level. f_evt name of the output file. evt_config @@ -111,16 +111,16 @@ def build_evt( wo_mode writing mode. evt group - LH5 root group name of evt tier. + LH5 root group name of `evt` tier. tcm_group - LH5 root group in tcm file. + LH5 root group in `tcm` file. dsp_group - LH5 root group in dsp file. + LH5 root group in `dsp` file. hit_group - LH5 root group in hit file. + LH5 root group in `hit` file. tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. + pattern to format `tcm` id values to table name in higher tiers. Must + have one placeholder which is the `tcm` id. """ store = LH5Store() @@ -362,7 +362,7 @@ def evaluate_expression( nrows number of rows to be processed. table - table of 'evt' tier data. + table of `evt` tier data. para dictionary of parameters defined in the ``parameters`` field in the configuration dictionary. @@ -372,16 +372,16 @@ def evaluate_expression( can be used to sort vector outputs according to sorter expression (see :func:`evaluate_to_vector`). tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. + pattern to format tcm id values to table name in higher tiers. Must have one + placeholder which is the `tcm` id. evt group - LH5 root group name of evt tier. + LH5 root group name of `evt` tier. tcm_group - LH5 root group in tcm file. + LH5 root group in `tcm` file. dsp_group - LH5 root group in dsp file. + LH5 root group in `dsp` file. hit_group - LH5 root group in hit file. + LH5 root group in `hit` file. """ store = LH5Store() diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index e1f9add39..a92619b83 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -34,7 +34,7 @@ def build_skm( hit_group: str = "hit", tcm_id_table_pattern: str = "ch{}", ) -> None: - """Builds a skimmed file from a (set) of evt/hit/dsp tier file(s). + """Builds a skimmed file from a (set) of `evt/hit/dsp` tier file(s). Parameters ---------- @@ -65,28 +65,28 @@ def build_skm( .. code-block:: json - { - "multiplicity": 2, - "postfixes":["","aux"], - "operations": { - "timestamp":{ - "forward_field": "evt.timestamp" - }, - "multiplicity":{ - "forward_field": "evt.multiplicity" - }, - "energy":{ - "forward_field": "hit.cuspEmax_ctc_cal", - "missing_value": "np.nan", - "tcm_idx": "evt.energy_idx" - }, - "energy_id":{ - "forward_field": "tcm.array_id", - "missing_value": 0, - "tcm_idx": "evt.energy_idx" - } - } - } + { + "multiplicity": 2, + "postfixes":["", "aux"], + "operations": { + "timestamp":{ + "forward_field": "evt.timestamp" + }, + "multiplicity":{ + "forward_field": "evt.multiplicity" + }, + "energy":{ + "forward_field": "hit.cuspEmax_ctc_cal", + "missing_value": "np.nan", + "tcm_idx": "evt.energy_idx" + }, + "energy_id":{ + "forward_field": "tcm.array_id", + "missing_value": 0, + "tcm_idx": "evt.energy_idx" + } + } + } wo_mode writing mode. @@ -97,18 +97,18 @@ def build_skm( - ``overwrite`` or ``o``: replaces existing file. skm_group - skm LH5 root group name. + `skm` LH5 root group name. evt_group - evt LH5 root group name. + `evt` LH5 root group name. hit_group - hit LH5 root group name. + `hit` LH5 root group name. dsp_group - dsp LH5 root group name. + `dsp` LH5 root group name. tcm_group - tcm LH5 root group name. + `tcm` LH5 root group name. tcm_id_table_pattern - Pattern to format tcm id values to table name in higher tiers. Must have one - placeholder which is the tcm id. + pattern to format `tcm` id values to table name in higher tiers. Must have one + placeholder which is the `tcm` id. """ f_dict = {evt_group: f_evt, hit_group: f_hit, dsp_group: f_dsp, tcm_group: f_tcm} log = logging.getLogger(__name__) From 79d47bdfb0bf4b413b1caf1705dd8563cc86c0a1 Mon Sep 17 00:00:00 2001 From: ggmarshall <72088559+ggmarshall@users.noreply.github.com> Date: Wed, 31 Jan 2024 09:15:02 +0000 Subject: [PATCH 163/191] Fix for aggregators in `evt` to index output with `evt_idx` instead of `ch_idx` (#551) * fix for first to last for cal data where different rows for each table * fix filedb to use new lgdo * evaluate_at_channel_vov does not need cumulength argument, fixed cumulengths _> cumulengt, add cumulength to evaluate_to_vector and fix int channels * searchsorted needs to be 'right' to match cumulative lengths, updated evaluate_at_channel_vov to use evt_ids_ch --- src/pygama/evt/aggregators.py | 54 ++++++++++++++++++++++++++++------- src/pygama/evt/build_evt.py | 8 ++++++ src/pygama/flow/file_db.py | 3 +- 3 files changed, 53 insertions(+), 12 deletions(-) diff --git a/src/pygama/evt/aggregators.py b/src/pygama/evt/aggregators.py index 993c0ffe6..3f45b5a6e 100644 --- a/src/pygama/evt/aggregators.py +++ b/src/pygama/evt/aggregators.py @@ -14,6 +14,7 @@ def evaluate_to_first_or_last( + cumulength: NDArray, idx: NDArray, ids: NDArray, f_hit: str, @@ -86,6 +87,11 @@ def evaluate_to_first_or_last( for ch in chns: # get index list for this channel to be loaded idx_ch = idx[ids == utils.get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] + evt_ids_ch = np.searchsorted( + cumulength, + np.where(ids == utils.get_tcm_id_by_pattern(tcm_id_table_pattern, ch))[0], + "right", + ) # evaluate at channel res = utils.get_data_at_channel( @@ -131,18 +137,27 @@ def evaluate_to_first_or_last( if ch == chns[0]: outt[:] = np.inf - out[idx_ch] = np.where((t0 < outt) & (limarr), res, out[idx_ch]) - outt[idx_ch] = np.where((t0 < outt) & (limarr), t0, outt[idx_ch]) + out[evt_ids_ch] = np.where( + (t0 < outt[evt_ids_ch]) & (limarr), res, out[evt_ids_ch] + ) + outt[evt_ids_ch] = np.where( + (t0 < outt[evt_ids_ch]) & (limarr), t0, outt[evt_ids_ch] + ) else: - out[idx_ch] = np.where((t0 > outt) & (limarr), res, out[idx_ch]) - outt[idx_ch] = np.where((t0 > outt) & (limarr), t0, outt[idx_ch]) + out[evt_ids_ch] = np.where( + (t0 > outt[evt_ids_ch]) & (limarr), res, out[evt_ids_ch] + ) + outt[evt_ids_ch] = np.where( + (t0 > outt[evt_ids_ch]) & (limarr), t0, outt[evt_ids_ch] + ) return Array(nda=out) def evaluate_to_scalar( mode: str, + cumulength: NDArray, idx: NDArray, ids: NDArray, f_hit: str, @@ -207,6 +222,11 @@ def evaluate_to_scalar( for ch in chns: # get index list for this channel to be loaded idx_ch = idx[ids == utils.get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] + evt_ids_ch = np.searchsorted( + cumulength, + np.where(ids == utils.get_tcm_id_by_pattern(tcm_id_table_pattern, ch))[0], + "right", + ) res = utils.get_data_at_channel( ch=ch, @@ -241,20 +261,21 @@ def evaluate_to_scalar( if "sum" == mode: if res.dtype == bool: res = res.astype(int) - out[idx_ch] = np.where(limarr, res + out[idx_ch], out[idx_ch]) + out[evt_ids_ch] = np.where(limarr, res + out[evt_ids_ch], out[evt_ids_ch]) if "any" == mode: if res.dtype != bool: res = res.astype(bool) - out[idx_ch] = out[idx_ch] | (res & limarr) + out[evt_ids_ch] = out[evt_ids_ch] | (res & limarr) if "all" == mode: if res.dtype != bool: res = res.astype(bool) - out[idx_ch] = out[idx_ch] & res & limarr + out[evt_ids_ch] = out[evt_ids_ch] & res & limarr return Array(nda=out) def evaluate_at_channel( + cumulength: NDArray, idx: NDArray, ids: NDArray, f_hit: str, @@ -314,6 +335,7 @@ def evaluate_at_channel( ): continue idx_ch = idx[ids == ch] + evt_ids_ch = np.searchsorted(cumulength, np.where(ids == ch)[0], "right") res = utils.get_data_at_channel( ch=utils.get_table_name_by_pattern(tcm_id_table_pattern, ch), ids=ids, @@ -332,12 +354,13 @@ def evaluate_at_channel( dsp_group=dsp_group, ) - out[idx_ch] = np.where(ch == ch_comp.nda[idx_ch], res, out[idx_ch]) + out[evt_ids_ch] = np.where(ch == ch_comp.nda[idx_ch], res, out[evt_ids_ch]) return Array(nda=out) def evaluate_at_channel_vov( + cumulength: NDArray, idx: NDArray, ids: NDArray, f_hit: str, @@ -397,7 +420,7 @@ def evaluate_at_channel_vov( type_name = None for ch in chns: - idx_ch = idx[ids == ch] + evt_ids_ch = np.searchsorted(cumulength, np.where(ids == ch)[0], "right") res = utils.get_data_at_channel( ch=utils.get_table_name_by_pattern(tcm_id_table_pattern, ch), ids=ids, @@ -419,7 +442,7 @@ def evaluate_at_channel_vov( # see in which events the current channel is present mask = ak.to_numpy(ak.any(ch_comp == ch, axis=-1), allow_missing=False) cv = np.full(len(ch_comp), np.nan) - cv[idx_ch] = res + cv[evt_ids_ch] = res cv[~mask] = np.nan cv = ak.drop_none(ak.nan_to_none(ak.Array(cv)[:, None])) @@ -432,6 +455,7 @@ def evaluate_at_channel_vov( def evaluate_to_aoesa( + cumulength: NDArray, idx: NDArray, ids: NDArray, f_hit: str, @@ -501,6 +525,11 @@ def evaluate_to_aoesa( i = 0 for ch in chns: idx_ch = idx[ids == utils.get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] + evt_ids_ch = np.searchsorted( + cumulength, + np.where(ids == utils.get_tcm_id_by_pattern(tcm_id_table_pattern, ch))[0], + "right", + ) res = utils.get_data_at_channel( ch=ch, ids=ids, @@ -530,7 +559,7 @@ def evaluate_to_aoesa( dsp_group=dsp_group, ) - out[idx_ch, i] = np.where(limarr, res, out[idx_ch, i]) + out[evt_ids_ch, i] = np.where(limarr, res, out[evt_ids_ch, i]) i += 1 @@ -538,6 +567,7 @@ def evaluate_to_aoesa( def evaluate_to_vector( + cumulength: NDArray, idx: NDArray, ids: NDArray, f_hit: str, @@ -602,6 +632,7 @@ def evaluate_to_vector( LH5 root group in `evt` file. """ out = evaluate_to_aoesa( + cumulength=cumulength, idx=idx, ids=ids, f_hit=f_hit, @@ -625,6 +656,7 @@ def evaluate_to_vector( if sorter is not None: md, fld = sorter.split(":") s_val = evaluate_to_aoesa( + cumulength=cumulength, idx=idx, ids=ids, f_hit=f_hit, diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 66489c38c..61296f9ee 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -442,6 +442,9 @@ def evaluate_expression( # load TCM data to define an event ids = store.read(f"/{tcm_group}/array_id", f_tcm)[0].view_as("np") idx = store.read(f"/{tcm_group}/array_idx", f_tcm)[0].view_as("np") + cumulength = store.read(f"/{tcm_group}/cumulative_length", f_tcm)[0].view_as( + "np" + ) # switch through modes if table and (("keep_at_ch:" == mode[:11]) or ("keep_at_idx:" == mode[:12])): @@ -466,6 +469,7 @@ def evaluate_expression( if isinstance(ch_comp, Array): return aggregators.evaluate_at_channel( + cumulength=cumulength, idx=idx, ids=ids, f_hit=f_hit, @@ -483,6 +487,7 @@ def evaluate_expression( ) elif isinstance(ch_comp, VectorOfVectors): return aggregators.evaluate_at_channel_vov( + cumulength=cumulength, idx=idx, ids=ids, f_hit=f_hit, @@ -511,6 +516,7 @@ def evaluate_expression( )[0] ) return aggregators.evaluate_to_first_or_last( + cumulength=cumulength, idx=idx, ids=ids, f_hit=f_hit, @@ -533,6 +539,7 @@ def evaluate_expression( elif mode in ["sum", "any", "all"]: return aggregators.evaluate_to_scalar( mode=mode, + cumulength=cumulength, idx=idx, ids=ids, f_hit=f_hit, @@ -552,6 +559,7 @@ def evaluate_expression( ) elif "gather" == mode: return aggregators.evaluate_to_vector( + cumulength=cumulength, idx=idx, ids=ids, f_hit=f_hit, diff --git a/src/pygama/flow/file_db.py b/src/pygama/flow/file_db.py index 66545c419..fdca65b2d 100644 --- a/src/pygama/flow/file_db.py +++ b/src/pygama/flow/file_db.py @@ -11,7 +11,8 @@ import h5py import numpy as np import pandas as pd -from lgdo.lh5.store import LH5Store, ls +from lgdo.lh5 import ls +from lgdo.lh5.store import LH5Store from lgdo.lh5.utils import expand_path, expand_vars from lgdo.types import Array, Scalar, VectorOfVectors from parse import parse From 1e3f3d7b3689e5db8911db72ee5e951e4f4a088e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 1 Feb 2024 11:55:40 +0000 Subject: [PATCH 164/191] Bump codecov/codecov-action from 3 to 4 Bumps [codecov/codecov-action](https://github.com/codecov/codecov-action) from 3 to 4. - [Release notes](https://github.com/codecov/codecov-action/releases) - [Changelog](https://github.com/codecov/codecov-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/codecov/codecov-action/compare/v3...v4) --- updated-dependencies: - dependency-name: codecov/codecov-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e5e817249..535cff14d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -57,7 +57,7 @@ jobs: python -m pip install --upgrade .[test] python -m pytest --cov=pygama --cov-report=xml - name: Upload Coverage to codecov.io - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@v4 with: token: ${{ secrets.CODECOV_TOKEN }} fail_ci_if_error: false From 3738fc15b12f6c0695d80eaf758d7c4926658819 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 1 Feb 2024 18:05:14 +0100 Subject: [PATCH 165/191] change print to log --- src/pygama/math/histogram.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pygama/math/histogram.py b/src/pygama/math/histogram.py index 5391c9b23..b62bd4da1 100644 --- a/src/pygama/math/histogram.py +++ b/src/pygama/math/histogram.py @@ -318,7 +318,7 @@ def get_fwfm(fraction, hist, bins, var=None, mx=None, dmx=0, bl=0, dbl=0, method # interpolate between the two bins that cross the [fraction] line # works well for high stats if bin_lo < 1 or bin_hi >= len(hist)-1: - print(f"get_fwhm: can't interpolate ({bin_lo}, {bin_hi})") + log.debug(f"get_fwhm: can't interpolate ({bin_lo}, {bin_hi})") return 0, 0 val_f = bl + fraction*(mx-bl) @@ -403,7 +403,7 @@ def get_fwfm(fraction, hist, bins, var=None, mx=None, dmx=0, bl=0, dbl=0, method return x_hi - x_lo, np.sqrt(dxl2 + dxh2) else: - print(f"get_fwhm: unrecognized method {method}") + log.debug(f"get_fwhm: unrecognized method {method}") return 0, 0 From abbe6866c160b430dd5d74ef3a14ee0cc726821f Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 1 Feb 2024 18:05:52 +0100 Subject: [PATCH 166/191] fix table import for new version --- src/pygama/pargen/cuts.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/pygama/pargen/cuts.py b/src/pygama/pargen/cuts.py index c4ab158d7..258797bd9 100644 --- a/src/pygama/pargen/cuts.py +++ b/src/pygama/pargen/cuts.py @@ -10,6 +10,7 @@ import os import lgdo.lh5 as lh5 +from lgdo.types import Table import numpy as np import pandas as pd from scipy import stats @@ -51,7 +52,7 @@ def generate_cuts( output_dict = {} if isinstance(data, pd.DataFrame): pass - elif isinstance(data, lh5.Table): + elif isinstance(data, Table): data = {entry: data[entry].nda for entry in get_keys(data, parameters)} data = pd.DataFrame.from_dict(data) elif isinstance(data, dict): @@ -204,7 +205,7 @@ def get_cut_indexes( keys = cut_dict.keys() if isinstance(all_data, pd.DataFrame): pass - elif isinstance(all_data, lh5.Table): + elif isinstance(all_data, Table): cut_keys = list(cut_dict) cut_keys.append(energy_param) all_data = { From 5a0df3e04b1b6667bf29de0499fc2e421011a30d Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 1 Feb 2024 18:06:29 +0100 Subject: [PATCH 167/191] change load_dfs to new read and suppress convergance warnings --- src/pygama/pargen/energy_optimisation.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index ecad4bbd7..c53adc5c9 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -25,6 +25,8 @@ from scipy.stats import chisquare, norm from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import RBF, ConstantKernel +from sklearn.exceptions import ConvergenceWarning +from sklearn.utils._testing import ignore_warnings import pygama.math.histogram as pgh import pygama.math.peak_fitting as pgf @@ -922,8 +924,7 @@ def event_selection( if not isinstance(kev_widths, list): kev_widths = [kev_widths] - sto = lh5.LH5Store() - df = lh5.load_dfs(raw_files, ["daqenergy", "timestamp"], lh5_path) + df = sto.read(lh5_path, raw_files, field_mask=["daqenergy", "timestamp"])[0].view_as('pd') if pulser_mask is None: pulser_props = cts.find_pulser_properties(df, energy="daqenergy") @@ -1067,7 +1068,7 @@ def event_selection( log.warning("Less than half number of specified events found") elif len(peak_ids[final_mask]) < 0.1 * n_events: log.error("Less than 10% number of specified events found") - out_events = np.unique(np.array(out_events).flatten()) + out_events = np.unique(np.concatenate(out_events)) sort_index = np.argsort(np.concatenate(final_events)) idx_list = get_wf_indexes(sort_index, [len(mask) for mask in final_events]) return out_events, idx_list @@ -1381,6 +1382,7 @@ def get_first_point(self): self.optimal_ei = None return self.optimal_x, self.optimal_ei + @ignore_warnings(category=ConvergenceWarning) def iterate_values(self): nan_idxs = np.isnan(self.y_init) self.gauss_pr.fit(self.x_init[~nan_idxs], np.array(self.y_init)[~nan_idxs]) @@ -1451,6 +1453,7 @@ def get_best_vals(self): out_dict[name][parameter] = value_str return out_dict + @ignore_warnings(category=ConvergenceWarning) def plot(self, init_samples=None): nan_idxs = np.isnan(self.y_init) fail_idxs = np.isnan(self.yerr_init) @@ -1557,6 +1560,7 @@ def plot(self, init_samples=None): plt.close() return fig + @ignore_warnings(category=ConvergenceWarning) def plot_acq(self, init_samples=None): nan_idxs = np.isnan(self.y_init) self.gauss_pr.fit(self.x_init[~nan_idxs], np.array(self.y_init)[~nan_idxs]) From 10e403c40c5442f36cec97a317d7bccfbf840c20 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 1 Feb 2024 18:07:00 +0100 Subject: [PATCH 168/191] change load_dfs to new read and change plotting to no longer give warnings --- src/pygama/pargen/extract_tau.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/pygama/pargen/extract_tau.py b/src/pygama/pargen/extract_tau.py index d35473715..5c7169a69 100644 --- a/src/pygama/pargen/extract_tau.py +++ b/src/pygama/pargen/extract_tau.py @@ -26,7 +26,7 @@ import pygama.pargen.energy_optimisation as om log = logging.getLogger(__name__) - +sto = lh5.LH5Store() def load_data( raw_file: list[str], @@ -36,8 +36,7 @@ def load_data( threshold: int = 5000, wf_field: str = "waveform", ) -> lgdo.Table: - sto = lh5.LH5Store() - df = lh5.load_dfs(raw_file, ["daqenergy", "timestamp"], lh5_path) + df = sto.read(lh5_path, raw_file, field_mask=["daqenergy", "timestamp"])[0].view_as('pd') if pulser_mask is None: pulser_props = cts.find_pulser_properties(df, energy="daqenergy") @@ -142,8 +141,8 @@ def get_decay_constant( ) axins.axvline(high_bin, color="red") axins.set_xlim(bins[in_min], bins[in_max]) - labels = ax.get_xticklabels() - ax.set_xticklabels(labels=labels, rotation=45) + ax.set_xticks(ax.get_xticks()) + ax.set_xticklabels(labels=ax.get_xticklabels(), rotation=45) out_plot_dict["slope"] = fig if display > 1: plt.show() From a3f8a1c3470dd5443c0d4a4b7a58e0fe7a90fb1a Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 1 Feb 2024 18:07:40 +0100 Subject: [PATCH 169/191] fix tcm read for new lgdo --- src/pygama/pargen/utils.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/pygama/pargen/utils.py b/src/pygama/pargen/utils.py index 5c8f8c101..c41c1d69f 100644 --- a/src/pygama/pargen/utils.py +++ b/src/pygama/pargen/utils.py @@ -9,6 +9,7 @@ from lgdo import Table, lh5 log = logging.getLogger(__name__) +sto=lh5.LH5Store() def return_nans(input): @@ -50,8 +51,6 @@ def load_data( Loads in the A/E parameters needed and applies calibration constants to energy """ - sto = lh5.LH5Store() - out_df = pd.DataFrame(columns=params) if isinstance(files, dict): @@ -142,14 +141,13 @@ def get_tcm_pulser_ids(tcm_file, channel, multiplicity_threshold): mask = np.append(mask, file_mask) ids = np.where(mask)[0] else: - data = lh5.load_dfs(tcm_file, ["array_id", "array_idx"], "hardware_tcm_1") - cum_length = lh5.load_nda(tcm_file, ["cumulative_length"], "hardware_tcm_1")[ - "cumulative_length" - ] - cum_length = np.append(np.array([0]), cum_length) - n_channels = np.diff(cum_length) - evt_numbers = np.repeat(np.arange(0, len(cum_length) - 1), np.diff(cum_length)) - evt_mult = np.repeat(np.diff(cum_length), np.diff(cum_length)) + data = pd.DataFrame({"array_id":sto.read("hardware_tcm_1/array_id", tcm_file)[0].view_as('np'), + "array_idx":sto.read("hardware_tcm_1/array_idx", tcm_file)[0].view_as('np')}) + cumulength = sto.read("hardware_tcm_1/cumulative_length", tcm_file)[0].view_as('np') + cumulength = np.append(np.array([0]), cumulength) + n_channels = np.diff(cumulength) + evt_numbers = np.repeat(np.arange(0, len(cumulength) - 1), np.diff(cumulength)) + evt_mult = np.repeat(np.diff(cumulength), np.diff(cumulength)) data["evt_number"] = evt_numbers data["evt_mult"] = evt_mult high_mult_events = np.where(n_channels > multiplicity_threshold)[0] From 2dee12b809a975c98eba0ff587580a46a660bf9b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 1 Feb 2024 17:31:19 +0000 Subject: [PATCH 170/191] style: pre-commit fixes --- src/pygama/pargen/cuts.py | 2 +- src/pygama/pargen/energy_optimisation.py | 6 ++++-- src/pygama/pargen/extract_tau.py | 5 ++++- src/pygama/pargen/utils.py | 18 ++++++++++++++---- 4 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/pygama/pargen/cuts.py b/src/pygama/pargen/cuts.py index 258797bd9..638199f64 100644 --- a/src/pygama/pargen/cuts.py +++ b/src/pygama/pargen/cuts.py @@ -10,9 +10,9 @@ import os import lgdo.lh5 as lh5 -from lgdo.types import Table import numpy as np import pandas as pd +from lgdo.types import Table from scipy import stats import pygama.math.histogram as pgh diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index c53adc5c9..905d126f0 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -23,9 +23,9 @@ from matplotlib.colors import LogNorm from scipy.optimize import curve_fit, minimize from scipy.stats import chisquare, norm +from sklearn.exceptions import ConvergenceWarning from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import RBF, ConstantKernel -from sklearn.exceptions import ConvergenceWarning from sklearn.utils._testing import ignore_warnings import pygama.math.histogram as pgh @@ -924,7 +924,9 @@ def event_selection( if not isinstance(kev_widths, list): kev_widths = [kev_widths] - df = sto.read(lh5_path, raw_files, field_mask=["daqenergy", "timestamp"])[0].view_as('pd') + df = sto.read(lh5_path, raw_files, field_mask=["daqenergy", "timestamp"])[ + 0 + ].view_as("pd") if pulser_mask is None: pulser_props = cts.find_pulser_properties(df, energy="daqenergy") diff --git a/src/pygama/pargen/extract_tau.py b/src/pygama/pargen/extract_tau.py index 5c7169a69..61e833994 100644 --- a/src/pygama/pargen/extract_tau.py +++ b/src/pygama/pargen/extract_tau.py @@ -28,6 +28,7 @@ log = logging.getLogger(__name__) sto = lh5.LH5Store() + def load_data( raw_file: list[str], lh5_path: str, @@ -36,7 +37,9 @@ def load_data( threshold: int = 5000, wf_field: str = "waveform", ) -> lgdo.Table: - df = sto.read(lh5_path, raw_file, field_mask=["daqenergy", "timestamp"])[0].view_as('pd') + df = sto.read(lh5_path, raw_file, field_mask=["daqenergy", "timestamp"])[0].view_as( + "pd" + ) if pulser_mask is None: pulser_props = cts.find_pulser_properties(df, energy="daqenergy") diff --git a/src/pygama/pargen/utils.py b/src/pygama/pargen/utils.py index c41c1d69f..74e520046 100644 --- a/src/pygama/pargen/utils.py +++ b/src/pygama/pargen/utils.py @@ -9,7 +9,7 @@ from lgdo import Table, lh5 log = logging.getLogger(__name__) -sto=lh5.LH5Store() +sto = lh5.LH5Store() def return_nans(input): @@ -141,9 +141,19 @@ def get_tcm_pulser_ids(tcm_file, channel, multiplicity_threshold): mask = np.append(mask, file_mask) ids = np.where(mask)[0] else: - data = pd.DataFrame({"array_id":sto.read("hardware_tcm_1/array_id", tcm_file)[0].view_as('np'), - "array_idx":sto.read("hardware_tcm_1/array_idx", tcm_file)[0].view_as('np')}) - cumulength = sto.read("hardware_tcm_1/cumulative_length", tcm_file)[0].view_as('np') + data = pd.DataFrame( + { + "array_id": sto.read("hardware_tcm_1/array_id", tcm_file)[0].view_as( + "np" + ), + "array_idx": sto.read("hardware_tcm_1/array_idx", tcm_file)[0].view_as( + "np" + ), + } + ) + cumulength = sto.read("hardware_tcm_1/cumulative_length", tcm_file)[0].view_as( + "np" + ) cumulength = np.append(np.array([0]), cumulength) n_channels = np.diff(cumulength) evt_numbers = np.repeat(np.arange(0, len(cumulength) - 1), np.diff(cumulength)) From ec8dd3d437b402fc510e6bec3a6be17dc43e7597 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 2 Feb 2024 16:32:25 +0100 Subject: [PATCH 171/191] bugfix for load_data to eval all fields in hit dict --- src/pygama/pargen/utils.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/pygama/pargen/utils.py b/src/pygama/pargen/utils.py index 74e520046..b81acf66e 100644 --- a/src/pygama/pargen/utils.py +++ b/src/pygama/pargen/utils.py @@ -69,18 +69,21 @@ def load_data( masks = np.array([], dtype=bool) for tstamp, tfiles in files.items(): table = sto.read(lh5_path, tfiles)[0] + file_df = pd.DataFrame(columns=params) if tstamp in cal_dict: cal_dict_ts = cal_dict[tstamp] else: cal_dict_ts = cal_dict + + for outname, info in cal_dict_ts.items(): + outcol = table.eval( + info["expression"], info.get("parameters", None) + ) + table.add_column(outname, outcol) + for param in params: - if param in cal_dict_ts: - expression = cal_dict_ts[param]["expression"] - parameters = cal_dict_ts[param].get("parameters", None) - file_df[param] = table.eval(expression, parameters) - else: - file_df[param] = table[param] + file_df[param] = table[param] file_df["run_timestamp"] = np.full(len(file_df), tstamp, dtype=object) params.append("run_timestamp") if threshold is not None: @@ -101,13 +104,13 @@ def load_data( table = sto.read(lh5_path, files)[0] df = pd.DataFrame(columns=params) + for outname, info in cal_dict.items(): + outcol = table.eval( + info["expression"], info.get("parameters", None) + ) + table.add_column(outname, outcol) for param in params: - if param in cal_dict: - expression = cal_dict[param]["expression"] - parameters = cal_dict[param].get("parameters", None) - df[param] = table.eval(expression, parameters) - else: - df[param] = table[param] + df[param] = table[param] if threshold is not None: masks = df[cal_energy_param] > threshold df.drop(np.where(~masks)[0], inplace=True) From 5f90e405f65f1eb2e19a3630675f88231ca9b6db Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 2 Feb 2024 16:32:53 +0100 Subject: [PATCH 172/191] specify dtype when init lh5 objects --- src/pygama/evt/aggregators.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/pygama/evt/aggregators.py b/src/pygama/evt/aggregators.py index 3f45b5a6e..0ec3d0fbe 100644 --- a/src/pygama/evt/aggregators.py +++ b/src/pygama/evt/aggregators.py @@ -152,7 +152,7 @@ def evaluate_to_first_or_last( (t0 > outt[evt_ids_ch]) & (limarr), t0, outt[evt_ids_ch] ) - return Array(nda=out) + return Array(nda=out, dtype=type(defv)) def evaluate_to_scalar( @@ -271,7 +271,7 @@ def evaluate_to_scalar( res = res.astype(bool) out[evt_ids_ch] = out[evt_ids_ch] & res & limarr - return Array(nda=out) + return Array(nda=out, dtype=type(defv)) def evaluate_at_channel( @@ -356,7 +356,7 @@ def evaluate_at_channel( out[evt_ids_ch] = np.where(ch == ch_comp.nda[idx_ch], res, out[evt_ids_ch]) - return Array(nda=out) + return Array(nda=out, dtype=type(defv)) def evaluate_at_channel_vov( @@ -451,7 +451,7 @@ def evaluate_at_channel_vov( if ch == chns[0]: type_name = res.dtype - return VectorOfVectors(ak.values_astype(out, type_name)) + return VectorOfVectors(ak.values_astype(out, type_name), dtype=type_name) def evaluate_to_aoesa( @@ -684,5 +684,5 @@ def evaluate_to_vector( ) return VectorOfVectors( - ak.values_astype(ak.drop_none(ak.nan_to_none(ak.Array(out))), type(defv)) + ak.values_astype(ak.drop_none(ak.nan_to_none(ak.Array(out))), type(defv)),dtype=type(defv) ) From e6129af50bdf07dd396169cce2e4c2564b3d78b3 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 2 Feb 2024 16:33:13 +0100 Subject: [PATCH 173/191] support subtables --- src/pygama/skm/build_skm.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index a92619b83..012fdf8f4 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -139,11 +139,11 @@ def build_skm( ): miss_val = eval(miss_val) - fw_fld = tbl_cfg["operations"][op]["forward_field"].split(".") + fw_fld = tbl_cfg["operations"][op]["forward_field"] # load object if from evt tier - if fw_fld[0] == evt_group: - obj = store.read(f"/{fw_fld[0]}/{fw_fld[1]}", f_dict[fw_fld[0]])[ + if evt_group in fw_fld.replace('.','/'): + obj = store.read(f"/{fw_fld.replace('.','/')}", f_dict[fw_fld.split(".",1)[0]])[ 0 ].view_as("ak") @@ -153,9 +153,9 @@ def build_skm( raise ValueError( f"{op} is an sub evt level operation. tcm_idx field must be specified" ) - tcm_idx_fld = tbl_cfg["operations"][op]["tcm_idx"].split(".") + tcm_idx_fld = tbl_cfg["operations"][op]["tcm_idx"] tcm_idx = store.read( - f"/{tcm_idx_fld[0]}/{tcm_idx_fld[1]}", f_dict[tcm_idx_fld[0]] + f"/{tcm_idx_fld.replace('.','/')}", f_dict[tcm_idx_fld.split(".")[0]] )[0].view_as("ak")[:, :multi] obj = ak.Array([[] for x in range(len(tcm_idx))]) @@ -182,16 +182,16 @@ def build_skm( ch_idx = idx[ids == ch] ct_idx = ak.count(ch_idx, axis=-1) fl_idx = ak.to_numpy(ak.flatten(ch_idx), allow_missing=False) - + if ( - f"{utils.get_table_name_by_pattern(tcm_id_table_pattern,ch)}/{fw_fld[0]}/{fw_fld[1]}" - not in lh5.ls(f_dict[fw_fld[0]], f"ch{ch}/{fw_fld[0]}/") + f"{utils.get_table_name_by_pattern(tcm_id_table_pattern,ch)}/{fw_fld.replace('.','/')}" + not in lh5.ls(f_dict[[key for key in f_dict if key in fw_fld][0]], f"ch{ch}/{fw_fld.rsplit('.',1)[0]}/") ): och = Array(nda=np.full(len(fl_idx), miss_val)) else: och, _ = store.read( - f"{utils.get_table_name_by_pattern(tcm_id_table_pattern,ch)}/{fw_fld[0]}/{fw_fld[1]}", - f_dict[fw_fld[0]], + f"{utils.get_table_name_by_pattern(tcm_id_table_pattern,ch)}/{fw_fld.replace('.','/')}", + f_dict[[key for key in f_dict if key in fw_fld][0]], idx=fl_idx, ) if not isinstance(och, Array): From 96a07cc8bdb29c3034147664dd63c6a53fc47131 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 2 Feb 2024 15:37:41 +0000 Subject: [PATCH 174/191] style: pre-commit fixes --- src/pygama/evt/aggregators.py | 3 ++- src/pygama/pargen/utils.py | 10 +++------- src/pygama/skm/build_skm.py | 18 +++++++++++------- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/src/pygama/evt/aggregators.py b/src/pygama/evt/aggregators.py index 0ec3d0fbe..dbcae2829 100644 --- a/src/pygama/evt/aggregators.py +++ b/src/pygama/evt/aggregators.py @@ -684,5 +684,6 @@ def evaluate_to_vector( ) return VectorOfVectors( - ak.values_astype(ak.drop_none(ak.nan_to_none(ak.Array(out))), type(defv)),dtype=type(defv) + ak.values_astype(ak.drop_none(ak.nan_to_none(ak.Array(out))), type(defv)), + dtype=type(defv), ) diff --git a/src/pygama/pargen/utils.py b/src/pygama/pargen/utils.py index b81acf66e..c53d39906 100644 --- a/src/pygama/pargen/utils.py +++ b/src/pygama/pargen/utils.py @@ -69,7 +69,7 @@ def load_data( masks = np.array([], dtype=bool) for tstamp, tfiles in files.items(): table = sto.read(lh5_path, tfiles)[0] - + file_df = pd.DataFrame(columns=params) if tstamp in cal_dict: cal_dict_ts = cal_dict[tstamp] @@ -77,9 +77,7 @@ def load_data( cal_dict_ts = cal_dict for outname, info in cal_dict_ts.items(): - outcol = table.eval( - info["expression"], info.get("parameters", None) - ) + outcol = table.eval(info["expression"], info.get("parameters", None)) table.add_column(outname, outcol) for param in params: @@ -105,9 +103,7 @@ def load_data( table = sto.read(lh5_path, files)[0] df = pd.DataFrame(columns=params) for outname, info in cal_dict.items(): - outcol = table.eval( - info["expression"], info.get("parameters", None) - ) + outcol = table.eval(info["expression"], info.get("parameters", None)) table.add_column(outname, outcol) for param in params: df[param] = table[param] diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index 012fdf8f4..7d79f8c40 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -142,10 +142,10 @@ def build_skm( fw_fld = tbl_cfg["operations"][op]["forward_field"] # load object if from evt tier - if evt_group in fw_fld.replace('.','/'): - obj = store.read(f"/{fw_fld.replace('.','/')}", f_dict[fw_fld.split(".",1)[0]])[ - 0 - ].view_as("ak") + if evt_group in fw_fld.replace(".", "/"): + obj = store.read( + f"/{fw_fld.replace('.','/')}", f_dict[fw_fld.split(".", 1)[0]] + )[0].view_as("ak") # else collect data from lower tier via tcm_idx else: @@ -155,7 +155,8 @@ def build_skm( ) tcm_idx_fld = tbl_cfg["operations"][op]["tcm_idx"] tcm_idx = store.read( - f"/{tcm_idx_fld.replace('.','/')}", f_dict[tcm_idx_fld.split(".")[0]] + f"/{tcm_idx_fld.replace('.','/')}", + f_dict[tcm_idx_fld.split(".")[0]], )[0].view_as("ak")[:, :multi] obj = ak.Array([[] for x in range(len(tcm_idx))]) @@ -182,10 +183,13 @@ def build_skm( ch_idx = idx[ids == ch] ct_idx = ak.count(ch_idx, axis=-1) fl_idx = ak.to_numpy(ak.flatten(ch_idx), allow_missing=False) - + if ( f"{utils.get_table_name_by_pattern(tcm_id_table_pattern,ch)}/{fw_fld.replace('.','/')}" - not in lh5.ls(f_dict[[key for key in f_dict if key in fw_fld][0]], f"ch{ch}/{fw_fld.rsplit('.',1)[0]}/") + not in lh5.ls( + f_dict[[key for key in f_dict if key in fw_fld][0]], + f"ch{ch}/{fw_fld.rsplit('.',1)[0]}/", + ) ): och = Array(nda=np.full(len(fl_idx), miss_val)) else: From 0b569e956f993569128da8523d2d1d00391a8cf4 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 2 Feb 2024 16:42:15 +0100 Subject: [PATCH 175/191] change more prints to log --- src/pygama/math/histogram.py | 13 ++++++++----- src/pygama/math/peak_fitting.py | 3 +++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/pygama/math/histogram.py b/src/pygama/math/histogram.py index b62bd4da1..32774ac0d 100644 --- a/src/pygama/math/histogram.py +++ b/src/pygama/math/histogram.py @@ -17,9 +17,12 @@ import matplotlib.pyplot as plt import numpy as np from matplotlib import rcParams +import logging import pygama.math.utils as pgu +log = logging.getLogger(__name__) + def get_hist(data, bins=None, range=None, dx=None, wts=None): """return hist, bins, var after binning data @@ -361,7 +364,7 @@ def get_fwfm(fraction, hist, bins, var=None, mx=None, dmx=0, bl=0, dbl=0, method # x_lo i_0 = bin_lo - int(np.floor(n_slope/2)) if i_0 < 0: - print(f"get_fwfm: fit slopes failed") + log.debug(f"get_fwfm: fit slopes failed") return 0, 0 i_n = i_0 + n_slope wts = None if var is None else 1/np.sqrt(var[i_0:i_n]) #fails for any var = 0 @@ -370,7 +373,7 @@ def get_fwfm(fraction, hist, bins, var=None, mx=None, dmx=0, bl=0, dbl=0, method try: (m, b), cov = np.polyfit(bin_centers[i_0:i_n], hist[i_0:i_n], 1, w=wts, cov='unscaled') except np.linalg.LinAlgError: - print(f"get_fwfm: LinAlgError") + log.debug(f"get_fwfm: LinAlgError") return 0, 0 x_lo = (val_f-b)/m #uncertainty @@ -380,7 +383,7 @@ def get_fwfm(fraction, hist, bins, var=None, mx=None, dmx=0, bl=0, dbl=0, method # x_hi i_0 = bin_hi - int(np.floor(n_slope/2)) + 1 if i_0 == len(hist): - print(f"get_fwfm: fit slopes failed") + log.debug(f"get_fwfm: fit slopes failed") return 0, 0 i_n = i_0 + n_slope @@ -389,11 +392,11 @@ def get_fwfm(fraction, hist, bins, var=None, mx=None, dmx=0, bl=0, dbl=0, method try: (m, b), cov = np.polyfit(bin_centers[i_0:i_n], hist[i_0:i_n], 1, w=wts, cov='unscaled') except np.linalg.LinAlgError: - print(f"get_fwfm: LinAlgError") + log.debug(f"get_fwfm: LinAlgError") return 0, 0 x_hi = (val_f-b)/m if x_hi < x_lo: - print(f"get_fwfm: fit slopes produced negative fwfm") + log.debug(f"get_fwfm: fit slopes produced negative fwfm") return 0, 0 #uncertainty diff --git a/src/pygama/math/peak_fitting.py b/src/pygama/math/peak_fitting.py index 0a2ad98c4..958a12925 100644 --- a/src/pygama/math/peak_fitting.py +++ b/src/pygama/math/peak_fitting.py @@ -6,9 +6,12 @@ from iminuit import Minuit, cost from scipy.optimize import brentq, minimize_scalar from scipy.stats import crystalball +import logging import pygama.math.histogram as pgh +log = logging.getLogger(__name__) + limit = np.log(sys.float_info.max)/10 kwd = {"parallel": False, "fastmath": True} From 70d6ecd64ccbe377ac3b183f82eb67eea1ffd70a Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sat, 3 Feb 2024 21:45:39 +0100 Subject: [PATCH 176/191] bugfix for load_data put run timestamp in later --- src/pygama/pargen/utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/pygama/pargen/utils.py b/src/pygama/pargen/utils.py index c53d39906..0ad605c9d 100644 --- a/src/pygama/pargen/utils.py +++ b/src/pygama/pargen/utils.py @@ -82,8 +82,9 @@ def load_data( for param in params: file_df[param] = table[param] + file_df["run_timestamp"] = np.full(len(file_df), tstamp, dtype=object) - params.append("run_timestamp") + if threshold is not None: mask = file_df[cal_energy_param] > threshold file_df.drop(np.where(~mask)[0], inplace=True) @@ -92,7 +93,8 @@ def load_data( masks = np.append(masks, mask) df.append(file_df) all_files += tfiles - + + params.append("run_timestamp") df = pd.concat(df) elif isinstance(files, list): From 4155a0cedd4c008f609eb7499a4f9b05d4cadcd7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 3 Feb 2024 20:46:42 +0000 Subject: [PATCH 177/191] style: pre-commit fixes --- src/pygama/pargen/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pygama/pargen/utils.py b/src/pygama/pargen/utils.py index 0ad605c9d..a1ec229ab 100644 --- a/src/pygama/pargen/utils.py +++ b/src/pygama/pargen/utils.py @@ -84,7 +84,7 @@ def load_data( file_df[param] = table[param] file_df["run_timestamp"] = np.full(len(file_df), tstamp, dtype=object) - + if threshold is not None: mask = file_df[cal_energy_param] > threshold file_df.drop(np.where(~mask)[0], inplace=True) @@ -93,7 +93,7 @@ def load_data( masks = np.append(masks, mask) df.append(file_df) all_files += tfiles - + params.append("run_timestamp") df = pd.concat(df) From 2c9f411666549273b773b5ffd1e1d5cd84c33796 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 9 Feb 2024 15:18:52 +0100 Subject: [PATCH 178/191] Allow build_evt and build_skm to just return the output Table --- src/pygama/evt/build_evt.py | 24 ++++++++++++++++-------- src/pygama/skm/build_skm.py | 15 ++++++++++----- tests/evt/test_build_evt.py | 32 ++++++++++++++++---------------- tests/skm/test_build_skm.py | 6 +++--- 4 files changed, 45 insertions(+), 32 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 61296f9ee..09f34220e 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -24,15 +24,16 @@ def build_evt( f_tcm: str, f_dsp: str, f_hit: str, - f_evt: str, evt_config: str | dict, + *, + f_evt: str | None = None, wo_mode: str = "write_safe", evt_group: str = "evt", tcm_group: str = "hardware_tcm_1", dsp_group: str = "dsp", hit_group: str = "hit", tcm_id_table_pattern: str = "ch{}", -) -> None: +) -> None | Table: """Transform data from the `hit` and `dsp` levels which a channel sorted to a event sorted data format. @@ -44,8 +45,6 @@ def build_evt( input LH5 file of the `dsp` level. f_hit input LH5 file of the `hit` level. - f_evt - name of the output file. evt_config name of configuration file or dictionary defining event fields. Channel lists can be defined by importing a metadata module. @@ -108,6 +107,9 @@ def build_evt( } } + f_evt + name of the output file. If ``None``, return the output :class:`.Table` + instead of writing to disk. wo_mode writing mode. evt group @@ -280,19 +282,25 @@ def build_evt( if "outputs" in tbl_cfg.keys(): if len(tbl_cfg["outputs"]) < 1: log.warning("No output fields specified, no file will be written.") + return table else: clms_to_remove = [e for e in table.keys() if e not in tbl_cfg["outputs"]] for fld in clms_to_remove: table.remove_field(fld, True) - store.write( - obj=table, name=f"/{evt_group}/", lh5_file=f_evt, wo_mode=wo_mode - ) + + if f_evt: + store.write( + obj=table, name=f"/{evt_group}/", lh5_file=f_evt, wo_mode=wo_mode + ) + else: + return table else: log.warning("No output fields specified, no file will be written.") key = re.search(r"\d{8}T\d{6}Z", f_hit).group(0) log.info( - f"Applied {len(tbl_cfg['operations'])} operations to key {key} and saved {len(tbl_cfg['outputs'])} evt fields across {len(chns)} channel groups" + f"Applied {len(tbl_cfg['operations'])} operations to key {key} and saved " + f"{len(tbl_cfg['outputs'])} evt fields across {len(chns)} channel groups" ) diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index 7d79f8c40..47f852565 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -24,16 +24,17 @@ def build_skm( f_hit: str, f_dsp: str, f_tcm: str, - f_skm: str, skm_conf: dict | str, - wo_mode="w", + *, + f_skm: str | None = None, + wo_mode: str = "w", skm_group: str = "skm", evt_group: str = "evt", tcm_group: str = "hardware_tcm_1", dsp_group: str = "dsp", hit_group: str = "hit", tcm_id_table_pattern: str = "ch{}", -) -> None: +) -> None | Table: """Builds a skimmed file from a (set) of `evt/hit/dsp` tier file(s). Parameters @@ -46,8 +47,6 @@ def build_skm( path of `dsp` file. f_tcm path of `tcm` file. - f_skm - name of the `skm` output file. skm_conf name of configuration file or dictionary defining `skm` fields. @@ -87,6 +86,9 @@ def build_skm( } } } + f_skm + name of the `skm` output file. If ``None``, return the output + class:`.Table` instead of writing to disk. wo_mode writing mode. @@ -229,6 +231,9 @@ def build_skm( obj.attrs |= tbl_cfg["operations"][op]["lgdo_attrs"] table.add_field(op, obj, True) + if not f_skm: + return table + # last thing missing is writing it out if wo_mode not in ["w", "write_safe", "o", "overwrite", "a", "append"]: raise ValueError(f"wo_mode {wo_mode} not valid.") diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index 0f193074c..aa177df63 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -22,8 +22,8 @@ def test_basics(lgnd_test_data, tmptestdir): f_tcm=lgnd_test_data.get_path(tcm_path), f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), - f_evt=outfile, evt_config=f"{config_dir}/basic-evt-config.json", + f_evt=outfile, wo_mode="o", evt_group="evt", hit_group="hit", @@ -74,8 +74,8 @@ def test_lar_module(lgnd_test_data, tmptestdir): f_tcm=lgnd_test_data.get_path(tcm_path), f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), - f_evt=outfile, evt_config=f"{config_dir}/module-test-evt-config.json", + f_evt=outfile, wo_mode="o", evt_group="evt", hit_group="hit", @@ -103,8 +103,8 @@ def test_lar_t0_vov_module(lgnd_test_data, tmptestdir): f_tcm=lgnd_test_data.get_path(tcm_path), f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), - f_evt=outfile, evt_config=f"{config_dir}/module-test-t0-vov-evt-config.json", + f_evt=outfile, wo_mode="o", evt_group="evt", hit_group="hit", @@ -136,8 +136,8 @@ def test_vov(lgnd_test_data, tmptestdir): f_tcm=lgnd_test_data.get_path(tcm_path), f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), - f_evt=outfile, evt_config=f"{config_dir}/vov-test-evt-config.json", + f_evt=outfile, wo_mode="o", evt_group="evt", hit_group="hit", @@ -187,21 +187,21 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): f_config = f"{config_dir}/basic-evt-config.json" with pytest.raises(KeyError): - build_evt(f_dsp, f_tcm, f_hit, outfile, f_config) + build_evt(f_dsp, f_tcm, f_hit, f_config, outfile) with pytest.raises(KeyError): - build_evt(f_tcm, f_hit, f_dsp, outfile, f_config) + build_evt(f_tcm, f_hit, f_dsp, f_config, outfile) with pytest.raises(TypeError): - build_evt(f_tcm, f_dsp, f_hit, outfile, None) + build_evt(f_tcm, f_dsp, f_hit, None, outfile) conf = {"operations": {}} with pytest.raises(ValueError): - build_evt(f_tcm, f_dsp, f_hit, outfile, conf) + build_evt(f_tcm, f_dsp, f_hit, conf, outfile) conf = {"channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}} with pytest.raises(ValueError): - build_evt(f_tcm, f_dsp, f_hit, outfile, conf) + build_evt(f_tcm, f_dsp, f_hit, conf, outfile) conf = { "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}, @@ -217,7 +217,7 @@ def test_graceful_crashing(lgnd_test_data, tmptestdir): }, } with pytest.raises(ValueError): - build_evt(f_tcm, f_dsp, f_hit, outfile, conf) + build_evt(f_tcm, f_dsp, f_hit, conf, outfile) def test_query(lgnd_test_data, tmptestdir): @@ -229,8 +229,8 @@ def test_query(lgnd_test_data, tmptestdir): f_tcm=lgnd_test_data.get_path(tcm_path), f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), - f_evt=outfile, evt_config=f"{config_dir}/query-test-evt-config.json", + f_evt=outfile, wo_mode="o", evt_group="evt", hit_group="hit", @@ -277,7 +277,7 @@ def test_vector_sort(lgnd_test_data, tmptestdir): }, }, } - build_evt(f_tcm, f_dsp, f_hit, outfile, conf) + build_evt(f_tcm, f_dsp, f_hit, conf, outfile) assert os.path.exists(outfile) assert len(lh5.ls(outfile, "/evt/")) == 4 @@ -300,14 +300,14 @@ def test_tcm_id_table_pattern(lgnd_test_data, tmptestdir): f_config = f"{config_dir}/basic-evt-config.json" with pytest.raises(ValueError): - build_evt(f_tcm, f_dsp, f_hit, outfile, f_config, tcm_id_table_pattern="ch{{}}") + build_evt(f_tcm, f_dsp, f_hit, f_config, outfile, tcm_id_table_pattern="ch{{}}") with pytest.raises(ValueError): - build_evt(f_tcm, f_dsp, f_hit, outfile, f_config, tcm_id_table_pattern="ch{}{}") + build_evt(f_tcm, f_dsp, f_hit, f_config, outfile, tcm_id_table_pattern="ch{}{}") with pytest.raises(NotImplementedError): build_evt( - f_tcm, f_dsp, f_hit, outfile, f_config, tcm_id_table_pattern="ch{tcm_id}" + f_tcm, f_dsp, f_hit, f_config, outfile, tcm_id_table_pattern="ch{tcm_id}" ) with pytest.raises(ValueError): build_evt( - f_tcm, f_dsp, f_hit, outfile, f_config, tcm_id_table_pattern="apple{}banana" + f_tcm, f_dsp, f_hit, f_config, outfile, tcm_id_table_pattern="apple{}banana" ) diff --git a/tests/skm/test_build_skm.py b/tests/skm/test_build_skm.py index b23137ec6..ea1c5e37d 100644 --- a/tests/skm/test_build_skm.py +++ b/tests/skm/test_build_skm.py @@ -22,8 +22,8 @@ def test_basics(lgnd_test_data, tmptestdir): f_tcm=lgnd_test_data.get_path(tcm_path), f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), - f_evt=outfile, evt_config=f"{evt_config_dir}/vov-test-evt-config.json", + f_evt=outfile, wo_mode="o", evt_group="evt", hit_group="hit", @@ -38,8 +38,8 @@ def test_basics(lgnd_test_data, tmptestdir): lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), lgnd_test_data.get_path(tcm_path), - skm_out, skm_conf, + skm_out, wo_mode="o", ) @@ -103,8 +103,8 @@ def test_attribute_passing(lgnd_test_data, tmptestdir): lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), lgnd_test_data.get_path(tcm_path), - skm_out, skm_conf, + skm_out, wo_mode="o", ) From e94c956474d7e8ffca5ab689c855b0feb8ab40c3 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 9 Feb 2024 15:57:00 +0100 Subject: [PATCH 179/191] Fix tests --- src/pygama/evt/build_evt.py | 1 - src/pygama/skm/build_skm.py | 2 +- tests/evt/test_build_evt.py | 2 ++ tests/skm/configs/basic-skm-config.json | 2 +- tests/skm/test_build_skm.py | 29 +++++++++++++++++++------ 5 files changed, 26 insertions(+), 10 deletions(-) diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py index 09f34220e..5f7949bdb 100644 --- a/src/pygama/evt/build_evt.py +++ b/src/pygama/evt/build_evt.py @@ -25,7 +25,6 @@ def build_evt( f_dsp: str, f_hit: str, evt_config: str | dict, - *, f_evt: str | None = None, wo_mode: str = "write_safe", evt_group: str = "evt", diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py index 47f852565..83c601c3a 100644 --- a/src/pygama/skm/build_skm.py +++ b/src/pygama/skm/build_skm.py @@ -25,7 +25,6 @@ def build_skm( f_dsp: str, f_tcm: str, skm_conf: dict | str, - *, f_skm: str | None = None, wo_mode: str = "w", skm_group: str = "skm", @@ -237,6 +236,7 @@ def build_skm( # last thing missing is writing it out if wo_mode not in ["w", "write_safe", "o", "overwrite", "a", "append"]: raise ValueError(f"wo_mode {wo_mode} not valid.") + log.debug("saving skm file") if (wo_mode in ["w", "write_safe"]) and os.path.exists(f_skm): raise FileExistsError(f"Write_safe mode: {f_skm} exists.") diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py index aa177df63..80a40d9a8 100644 --- a/tests/evt/test_build_evt.py +++ b/tests/evt/test_build_evt.py @@ -18,6 +18,7 @@ def test_basics(lgnd_test_data, tmptestdir): tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" if os.path.exists(outfile): os.remove(outfile) + build_evt( f_tcm=lgnd_test_data.get_path(tcm_path), f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), @@ -30,6 +31,7 @@ def test_basics(lgnd_test_data, tmptestdir): dsp_group="dsp", tcm_group="hardware_tcm_1", ) + assert "statement" in store.read("/evt/multiplicity", outfile)[0].getattrs().keys() assert ( store.read("/evt/multiplicity", outfile)[0].getattrs()["statement"] diff --git a/tests/skm/configs/basic-skm-config.json b/tests/skm/configs/basic-skm-config.json index 8037b21bf..e1ffda941 100644 --- a/tests/skm/configs/basic-skm-config.json +++ b/tests/skm/configs/basic-skm-config.json @@ -13,7 +13,7 @@ }, "energy": { "forward_field": "hit.cuspEmax_ctc_cal", - "missing_value": "np.nan", + "missing_value": 0.0, "tcm_idx": "evt.energy_idx" }, "energy_id": { diff --git a/tests/skm/test_build_skm.py b/tests/skm/test_build_skm.py index ea1c5e37d..c60c460f0 100644 --- a/tests/skm/test_build_skm.py +++ b/tests/skm/test_build_skm.py @@ -2,7 +2,7 @@ from pathlib import Path import awkward as ak -import numpy as np +import lgdo from lgdo.lh5 import LH5Store from pygama.evt import build_evt @@ -18,6 +18,7 @@ def test_basics(lgnd_test_data, tmptestdir): tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" if os.path.exists(outfile): os.remove(outfile) + build_evt( f_tcm=lgnd_test_data.get_path(tcm_path), f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), @@ -33,6 +34,17 @@ def test_basics(lgnd_test_data, tmptestdir): skm_conf = f"{config_dir}/basic-skm-config.json" skm_out = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.lh5" + + result = build_skm( + outfile, + lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), + lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), + lgnd_test_data.get_path(tcm_path), + skm_conf, + ) + + assert isinstance(result, lgdo.Table) + build_skm( outfile, lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), @@ -44,7 +56,11 @@ def test_basics(lgnd_test_data, tmptestdir): ) assert os.path.exists(skm_out) - df = store.read("/skm/", skm_out)[0].view_as("pd") + obj, _ = store.read("/skm/", skm_out) + + assert obj == result + + df = obj.view_as("pd") assert "timestamp" in df.keys() assert "energy_0" in df.keys() assert "energy_1" in df.keys() @@ -56,9 +72,7 @@ def test_basics(lgnd_test_data, tmptestdir): assert "energy_sum" in df.keys() assert (df.multiplicity.to_numpy() <= 3).all() assert ( - np.nan_to_num(df.energy_0.to_numpy()) - + np.nan_to_num(df.energy_1.to_numpy()) - + np.nan_to_num(df.energy_2.to_numpy()) + df.energy_0.to_numpy() + df.energy_1.to_numpy() + df.energy_2.to_numpy() == df.energy_sum.to_numpy() ).all() @@ -81,12 +95,13 @@ def test_attribute_passing(lgnd_test_data, tmptestdir): tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5" if os.path.exists(outfile): os.remove(outfile) + build_evt( f_tcm=lgnd_test_data.get_path(tcm_path), f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")), - f_evt=outfile, evt_config=f"{evt_config_dir}/vov-test-evt-config.json", + f_evt=outfile, wo_mode="o", evt_group="evt", hit_group="hit", @@ -104,7 +119,7 @@ def test_attribute_passing(lgnd_test_data, tmptestdir): lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")), lgnd_test_data.get_path(tcm_path), skm_conf, - skm_out, + f_skm=skm_out, wo_mode="o", ) From 74f857f6c7f8ab01683a6dd575c8e6e43dd8b131 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 9 Feb 2024 16:05:06 +0100 Subject: [PATCH 180/191] [docs] remove broken iminuit intersphinx_mapping --- docs/source/conf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 5accbd4a7..3bcd7d1fc 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -58,7 +58,6 @@ "scipy": ("https://docs.scipy.org/doc/scipy", None), "pandas": ("https://pandas.pydata.org/docs", None), "matplotlib": ("https://matplotlib.org/stable", None), - "iminuit": ("https://iminuit.readthedocs.io/en/stable", None), "h5py": ("https://docs.h5py.org/en/stable", None), "pint": ("https://pint.readthedocs.io/en/stable", None), "lgdo": ("https://legend-pydataobj.readthedocs.io/en/stable", None), From 0432e50471cd8d03e2706442f4a619589d611e2a Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 9 Feb 2024 16:07:40 +0100 Subject: [PATCH 181/191] [ci] try increasing tqdm silence period --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 535cff14d..24b9bfbd8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,7 +13,7 @@ concurrency: cancel-in-progress: true env: - TQDM_MININTERVAL: 10 + TQDM_MININTERVAL: 100 jobs: From a6d5aacbe48ffbe6fd65b52eb3ffa0e2059de92e Mon Sep 17 00:00:00 2001 From: ggmarshall <72088559+ggmarshall@users.noreply.github.com> Date: Mon, 19 Feb 2024 09:53:59 +0000 Subject: [PATCH 182/191] Bug fix: `evt.modules.spm.cast_trigger` where Awkward arrays not replacing `nan`s (#559) * fixed bug in cast_triger where awk arrays not replacing nans --- src/pygama/evt/modules/spm.py | 8 +++++--- src/pygama/flow/utils.py | 7 +------ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py index 2dc5a4290..6e7140d17 100644 --- a/src/pygama/evt/modules/spm.py +++ b/src/pygama/evt/modules/spm.py @@ -37,11 +37,13 @@ def cast_trigger( ak.min(ak.fill_none(trgr.view_as("ak"), tdefault), axis=-1), tdefault ) - elif isinstance(trgr, ak.Array): + elif isinstance(trgr, (ak.Array, ak.highlevel.Array)): if trgr.ndim == 1: - return ak.fill_none(trgr, tdefault) + return ak.fill_none(ak.nan_to_none(trgr), tdefault) elif trgr.ndim == 2: - return ak.fill_none(ak.min(ak.fill_none(trgr, tdefault), axis=-1), tdefault) + return ak.fill_none( + ak.min(ak.fill_none(ak.nan_to_none(trgr), tdefault), axis=-1), tdefault + ) else: raise ValueError(f"Too many dimensions: {trgr.ndim}") elif isinstance(trgr, (float, int)) and isinstance(length, int): diff --git a/src/pygama/flow/utils.py b/src/pygama/flow/utils.py index c51cd6898..eef8b3983 100644 --- a/src/pygama/flow/utils.py +++ b/src/pygama/flow/utils.py @@ -122,12 +122,7 @@ def fill_col_dict( (table_length, len(tier_table[col].nda[0])), dtype=tier_table[col].dtype, ) - try: - col_dict[col][tcm_idx] = tier_table[col].nda - except BaseException: - raise ValueError( - f"self.aoesa_to_vov is False but {col} is a jagged array" - ) + col_dict[col][tcm_idx] = tier_table[col].nda elif isinstance(tier_table[col], VectorOfVectors): # Allocate memory for column for all channels if col not in col_dict.keys(): From e5f02b0bd328f542873ea6f55123c91e0b92c0d5 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Mon, 19 Feb 2024 15:28:45 +0100 Subject: [PATCH 183/191] Bump lgdo version and ignore Pandas/Pyarrow deprecation warning --- pyproject.toml | 2 +- setup.cfg | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4d08123b8..1dcd76af8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ write_to = "src/pygama/_version.py" minversion = "6.0" addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"] xfail_strict = true -filterwarnings = ["error"] +filterwarnings = ["error", 'ignore:\nPyarrow:DeprecationWarning'] log_cli_level = "info" testpaths = "tests" diff --git a/setup.cfg b/setup.cfg index 74c036924..9d3080bbc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,13 +36,12 @@ install_requires = h5py>=3.2 iminuit legend-daq2lh5>=1.2.0a1 - legend-pydataobj>=1.5.0a2 + legend-pydataobj>=1.5.0a5 matplotlib numba!=0.53.*,!=0.54.*,!=0.57 numpy>=1.21 pandas>=1.4.4 pint - pyarrow scikit-learn scipy>=1.0.1 tables From 69d89b0837f9fa32ee49824b18585f59b97dab03 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Tue, 20 Feb 2024 22:04:01 +0100 Subject: [PATCH 184/191] Add CITATION.cff (#556) * [docs] add draft CITATION.cff --------- Co-authored-by: Jason Detwiler --- CITATION.cff | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 CITATION.cff diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 000000000..9782d1ac8 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,58 @@ +cff-version: 1.2.0 +title: pygama +doi: FILLME +date-released: 2024-02-01 +url: https://github.com/github-linguist/linguist +message: "If you use this software, please cite it as below." +authors: + - family-names: Agostini + given-names: Matteo + orcid: https://orcid.org/0000-0003-1151-5301 + - family-names: Detwiler + given-names: Jason + orcid: https://orcid.org/0000-0002-9050-4610 + - family-names: Pertoldi + given-names: Luigi + orcid: https://orcid.org/0000-0002-0467-2571 + - family-names: Guinn + given-names: Ian + orcid: https://orcid.org/0000-0002-2424-3272 + - family-names: Marshall + given-names: George + orcid: https://orcid.org/0000-0002-5470-5132 + - family-names: D'Andrea + given-names: Valerio + orcid: https://orcid.org/0000-0003-2037-4133 + - family-names: Krause + given-names: Patrick + orcid: https://orcid.org/0000-0002-9603-7865 + - family-names: Song + given-names: Grace + email: grsong@uw.edu + - family-names: Engelhardt + given-names: Erin + email: erin717@live.unc.edu + - family-names: Borden + given-names: Sam + orcid: https://orcid.org/0009-0003-2539-4333 + - family-names: Deckert + given-names: Rosanna + orcid: https://orcid.org/0009-0006-0431-341X + - family-names: Sweigart + given-names: David + email: dsweigar@uw.edu + - family-names: Zschocke + given-names: Andreas + email: Andreas.Zschocke@uni-tuebingen.de + - family-names: Wiseman + given-names: Clint + orcid: https://orcid.org/0000-0002-4232-1326 + - family-names: Mathew + given-names: Tim + email: tmathew@uoregon.edu + - family-names: Kermaïdic + given-names: Yoann + orcid: https://orcid.org/0000-0001-8007-8016 + - family-names: Shanks + given-names: Ben + email: benjamin.shanks@gmail.com From a0085207d2a6965841810292b56ca9137eba4dc2 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 28 Feb 2024 11:20:38 +0100 Subject: [PATCH 185/191] Update citation-related docs --- CITATION.cff | 4 ++-- README.md | 14 +++++++++++--- docs/source/index.rst | 1 + 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 9782d1ac8..90524f881 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -1,7 +1,7 @@ cff-version: 1.2.0 title: pygama -doi: FILLME -date-released: 2024-02-01 +doi: https://doi.org/10.5281/zenodo.10614246 +date-released: 2024-02-03 url: https://github.com/github-linguist/linguist message: "If you use this software, please cite it as below." authors: diff --git a/README.md b/README.md index 54fecb59d..8abf4f7e5 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ [![GitHub pull requests](https://img.shields.io/github/issues-pr/legend-exp/pygama?logo=github)](https://github.com/legend-exp/pygama/pulls) [![License](https://img.shields.io/github/license/legend-exp/pygama)](https://github.com/legend-exp/pygama/blob/main/LICENSE) [![Read the Docs](https://img.shields.io/readthedocs/pygama?logo=readthedocs)](https://pygama.readthedocs.io) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.10614246.svg)](https://zenodo.org/doi/10.5281/zenodo.10614246) *pygama* is a Python package for: @@ -28,7 +29,14 @@ Check out the [online documentation](https://pygama.readthedocs.io). +If you are using this software, consider +[citing](https://zenodo.org/doi/10.5281/zenodo.10614246)! + ## Related repositories -- [legend-exp/legend-pydataobj](https://github.com/legend-exp/legend-pydataobj) → LEGEND Python Data Objects -- [legend-exp/legend-daq2lh5](https://github.com/legend-exp/legend-daq2lh5) → Convert digitizer data to LEGEND HDF5 -- [legend-exp/dspeed](https://github.com/legend-exp/dspeed) → Fast Digital Signal Processing for particle detector signals in Python + +- [legend-exp/legend-pydataobj](https://github.com/legend-exp/legend-pydataobj) + → LEGEND Python Data Objects +- [legend-exp/legend-daq2lh5](https://github.com/legend-exp/legend-daq2lh5) + → Convert digitizer data to LEGEND HDF5 +- [legend-exp/dspeed](https://github.com/legend-exp/dspeed) + → Fast Digital Signal Processing for particle detector signals in Python diff --git a/docs/source/index.rst b/docs/source/index.rst index e3972f81f..cf78de997 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -65,5 +65,6 @@ Next steps Source Code License + Citation Changelog developer From db324ffb8e5181d8f67dc39d2231e596ff2b7990 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 28 Feb 2024 11:22:08 +0100 Subject: [PATCH 186/191] [ci] update workflow files --- .github/workflows/distribution.yml | 49 ++++++++++++++++-------------- .github/workflows/main.yml | 7 +++-- 2 files changed, 31 insertions(+), 25 deletions(-) diff --git a/.github/workflows/distribution.yml b/.github/workflows/distribution.yml index 63e9c51e7..61aa724a4 100644 --- a/.github/workflows/distribution.yml +++ b/.github/workflows/distribution.yml @@ -2,42 +2,47 @@ name: distribute on: workflow_dispatch: + pull_request: push: - tags: "v*" + branches: + - main release: types: - published -jobs: +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + FORCE_COLOR: 3 +jobs: dist: + name: Distribution build runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - name: Build SDist and wheel - run: pipx run build - - - uses: actions/upload-artifact@v4 - with: - path: dist/* + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 - - name: Check metadata - run: pipx run twine check dist/* + - uses: hynek/build-and-inspect-python-package@v2 publish: needs: [dist] + name: Publish to PyPI + environment: pypi + permissions: + id-token: write runs-on: ubuntu-latest if: github.event_name == 'release' && github.event.action == 'published' steps: - - uses: actions/download-artifact@v4 - with: - name: artifact - path: dist - - - uses: pypa/gh-action-pypi-publish@v1.8.11 - with: - password: ${{ secrets.pypi_password }} + - uses: actions/download-artifact@v4 + with: + name: Packages + path: dist + + - uses: pypa/gh-action-pypi-publish@release/v1 + if: github.event_name == 'release' && github.event.action == 'published' diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 24b9bfbd8..76eb5c78b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,18 +1,19 @@ name: pygama on: + workflow_dispatch: + pull_request: push: branches: - main - - 'releases/**' - pull_request: - release: + - "releases/**" concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true env: + FORCE_COLOR: 3 TQDM_MININTERVAL: 100 jobs: From 3dccd974764e976e0c1decbdfa2db853e135c431 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 28 Feb 2024 11:26:37 +0100 Subject: [PATCH 187/191] Update subpackage descriptions to silence wheel checking failure in CI --- src/pygama/math/__init__.py | 2 +- src/pygama/pargen/__init__.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/pygama/math/__init__.py b/src/pygama/math/__init__.py index cca7a6038..93b88f477 100644 --- a/src/pygama/math/__init__.py +++ b/src/pygama/math/__init__.py @@ -1,3 +1,3 @@ """ -Subpackage description +Statistical and mathematical utilities. """ diff --git a/src/pygama/pargen/__init__.py b/src/pygama/pargen/__init__.py index cca7a6038..3dcede5b7 100644 --- a/src/pygama/pargen/__init__.py +++ b/src/pygama/pargen/__init__.py @@ -1,3 +1,4 @@ """ -Subpackage description +Utilities to generate and optimize parameters of interest from data (e.g. +calibration routines) """ From d7d3c8183faf9294440173dd010703e1f51cde82 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 28 Feb 2024 11:44:00 +0100 Subject: [PATCH 188/191] [setup] transition to pure pyproject.toml --- .pre-commit-config.yaml | 1 + pyproject.toml | 113 ++++++++++++++++++++++++++++++++++++++-- setup.cfg | 86 ------------------------------ setup.py | 4 -- 4 files changed, 111 insertions(+), 93 deletions(-) delete mode 100644 setup.cfg delete mode 100644 setup.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f0bc4718d..02ffb3e94 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -70,6 +70,7 @@ repos: flake8-print, pep8-naming ] + args: ["--extend-ignore", "E203,E501"] - repo: https://github.com/kynan/nbstripout rev: "0.6.1" diff --git a/pyproject.toml b/pyproject.toml index 1dcd76af8..8f759a09d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,14 +1,121 @@ [build-system] requires = [ - "setuptools>=42.0.0", - "setuptools_scm[toml]>=3.4", + "setuptools>=61.2", + "setuptools_scm[toml]>=7", ] - build-backend = "setuptools.build_meta" +[project] +name = "pygama" +description = "Python package for data processing and analysis" +authors = [ + { name = "The LEGEND collaboration" }, +] +maintainers = [ + { name = "The LEGEND collaboration" }, +] +readme = "README.md" +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", + "Operating System :: MacOS", + "Operating System :: POSIX", + "Operating System :: Unix", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Scientific/Engineering", +] +requires-python = ">=3.9" +dependencies = [ + "colorlog", + "dspeed>=1.3.0a4", + "h5py>=3.2", + "iminuit", + "legend-daq2lh5>=1.2.0a1", + "legend-pydataobj>=1.5.0a5", + "matplotlib", + "numba!=0.53.*,!=0.54.*,!=0.57", + "numpy>=1.21", + "pandas>=1.4.4", + "pint", + "scikit-learn", + "scipy>=1.0.1", + "tables", + "tqdm>=4.27", +] +dynamic = [ + "version", +] + +[project.urls] +Homepage = "https://github.com/legend-exp/pygama" +"Bug Tracker" = "https://github.com/legend-exp/pygama/issues" +Discussions = "https://github.com/legend-exp/pygama/discussions" +Changelog = "https://github.com/legend-exp/pygama/releases" + [tool.setuptools_scm] write_to = "src/pygama/_version.py" +[project.optional-dependencies] +all = [ + "pygama[docs,test]", +] +docs = [ + "furo", + "jupyter", + "myst-parser", + "nbsphinx", + "sphinx", + "sphinx-copybutton", + "sphinx-inline-tabs", +] +test = [ + "pre-commit", + "pylegendtestdata", + "pytest>=6.0", + "pytest-cov", +] + +[project.scripts] +pygama = "pygama.cli:pygama_cli" + +[tool.setuptools] +include-package-data = true +zip-safe = false +license-files = [ + "LICENSE", +] + +[tool.setuptools.package-dir] +"" = "src" + +[tool.setuptools.packages.find] +where = [ + "src", +] +namespaces = false + +[tool.setuptools.package-data] +"*" = [ + "*.json", +] + +[tool.coverage] +run.source = ["pygama"] +report.exclude_also = [ + '\.\.\.', + 'if typing.TYPE_CHECKING:', +] + +[tool.flake8] +extend-ignore = "E203, E501" + +[tool.codespell] +ignore-words-list = "hist, gaus, nd, ans, crate, nin, puls, spms, fom" + [tool.pytest.ini_options] minversion = "6.0" addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"] diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 9d3080bbc..000000000 --- a/setup.cfg +++ /dev/null @@ -1,86 +0,0 @@ -[metadata] -name = pygama -description = Python package for decoding and processing digitizer data -long_description = file: README.md -long_description_content_type = text/markdown -url = https://github.com/legend-exp/pygama -author = The LEGEND collaboration -maintainer = The LEGEND collaboration -license = GPL-3.0 -license_files = LICENSE -classifiers = - Development Status :: 4 - Beta - Intended Audience :: Developers - Intended Audience :: Information Technology - Intended Audience :: Science/Research - License :: OSI Approved :: GNU General Public License v3 (GPLv3) - Operating System :: MacOS - Operating System :: POSIX - Operating System :: Unix - Programming Language :: Python - Programming Language :: Python :: 3 - Programming Language :: Python :: 3 :: Only - Topic :: Scientific/Engineering - Topic :: Scientific/Engineering :: Information Analysis - Topic :: Scientific/Engineering :: Mathematics - Topic :: Scientific/Engineering :: Physics - Topic :: Software Development -project_urls = - Documentation = https://pygama.readthedocs.io - -[options] -packages = find: -install_requires = - colorlog - dspeed>=1.3.0a4 - h5py>=3.2 - iminuit - legend-daq2lh5>=1.2.0a1 - legend-pydataobj>=1.5.0a5 - matplotlib - numba!=0.53.*,!=0.54.*,!=0.57 - numpy>=1.21 - pandas>=1.4.4 - pint - scikit-learn - scipy>=1.0.1 - tables - tqdm>=4.27 -python_requires = >=3.9 -include_package_data = True -package_dir = - = src -zip_safe = False - -[options.packages.find] -where = src - -[options.entry_points] -console_scripts = - pygama = pygama.cli:pygama_cli - -[options.extras_require] -all = - pygama[docs,test] -docs = - furo - jupyter - myst-parser - nbsphinx - sphinx - sphinx-copybutton - sphinx-inline-tabs -test = - pre-commit - pylegendtestdata - pytest>=6.0 - pytest-cov - -[options.package_data] -* = *.json - -[flake8] -extend-ignore = E203, E501 - -[codespell] -ignore-words-list = hist, gaus, nd, ans, crate, nin, puls, spms, fom diff --git a/setup.py b/setup.py deleted file mode 100644 index f6844919c..000000000 --- a/setup.py +++ /dev/null @@ -1,4 +0,0 @@ -import setuptools_scm # noqa: F401 -from setuptools import setup - -setup() From ae3752ab079a944217c2dc203ad7cbd5b5bea0c5 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 1 Mar 2024 11:43:53 +0100 Subject: [PATCH 189/191] [setup] bump LEGEND package versions --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8f759a09d..0b196a424 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,11 +31,11 @@ classifiers = [ requires-python = ">=3.9" dependencies = [ "colorlog", - "dspeed>=1.3.0a4", + "dspeed>=1.3", "h5py>=3.2", "iminuit", - "legend-daq2lh5>=1.2.0a1", - "legend-pydataobj>=1.5.0a5", + "legend-daq2lh5>=1.2", + "legend-pydataobj>=1.5", "matplotlib", "numba!=0.53.*,!=0.54.*,!=0.57", "numpy>=1.21", From ca7e5b5b576c4f3cc8a6b9885fa91a8804eff8e2 Mon Sep 17 00:00:00 2001 From: SamuelBorden Date: Wed, 6 Mar 2024 22:45:38 -0800 Subject: [PATCH 190/191] restored pargen --- src/pygama/pargen/AoE_cal.py | 113 +++++++++++------------ src/pygama/pargen/cuts.py | 4 +- src/pygama/pargen/data_cleaning.py | 25 ++--- src/pygama/pargen/ecal_th.py | 53 ++++++----- src/pygama/pargen/energy_cal.py | 108 ++++++++++------------ src/pygama/pargen/energy_optimisation.py | 87 +++++++++-------- src/pygama/pargen/extract_tau.py | 6 +- src/pygama/pargen/mse_psd.py | 16 ++-- 8 files changed, 194 insertions(+), 218 deletions(-) diff --git a/src/pygama/pargen/AoE_cal.py b/src/pygama/pargen/AoE_cal.py index 6b53e5490..227aec4e2 100644 --- a/src/pygama/pargen/AoE_cal.py +++ b/src/pygama/pargen/AoE_cal.py @@ -27,7 +27,6 @@ from matplotlib.colors import LogNorm from scipy.stats import chi2 -import pygama.math.distributions as pgd import pygama.math.histogram as pgh import pygama.math.peak_fitting as pgf from pygama.math.peak_fitting import nb_erfc @@ -68,10 +67,10 @@ def pdf( PDF for A/E consists of a gaussian signal with gaussian tail background """ try: - sig = n_sig * pgd.gaussian.get_pdf(x, mu, sigma) - x_lo = np.nanmin(x) if lower_range == np.inf else lower_range - x_hi = np.nanmax(x) if upper_range == np.inf else upper_range - bkg = n_bkg * pgd.exgauss.pdf_norm(x, x_lo, x_hi, mu, sigma, tau_bkg) + sig = n_sig * pgf.gauss_norm(x, mu, sigma) + bkg = n_bkg * pgf.gauss_tail_norm( + x, mu, sigma, tau_bkg, lower_range, upper_range + ) except: sig = np.full_like(x, np.nan) bkg = np.full_like(x, np.nan) @@ -128,7 +127,7 @@ def guess(hist, bins, var, **kwargs): try: _, sigma, _ = pgh.get_gaussian_guess(hist, bins) except: - pars, cov = pgbf.gauss_mode_width_max( + pars, cov = pgf.gauss_mode_width_max( hist, bins, var, mode_guess=mu, n_bins=20 ) _, sigma, _ = pars @@ -215,13 +214,14 @@ def pdf( PDF for A/E consists of a gaussian signal with tail with gaussian tail background """ try: - x_lo = np.nanmin(x) if lower_range == np.inf else lower_range - x_hi = np.nanmax(x) if upper_range == np.inf else upper_range sig = n_sig * ( - (1 - htail) * pgd.gaussian.get_pdf(x, mu, sigma) - + htail * pgd.exgauss.pdf_norm(x, x_lo, x_hi, mu, sigma, tau_sig) + (1 - htail) * pgf.gauss_norm(x, mu, sigma) + + htail + * pgf.gauss_tail_norm(x, mu, sigma, tau_sig, lower_range, upper_range) + ) + bkg = n_bkg * pgf.gauss_tail_norm( + x, mu, sigma, tau_bkg, lower_range, upper_range ) - bkg = n_bkg * pgd.exgauss.pdf_norm(x, x_lo, x_hi, mu, sigma, tau_bkg) except: sig = np.full_like(x, np.nan) bkg = np.full_like(x, np.nan) @@ -283,7 +283,7 @@ def guess(hist, bins, var, **kwargs): try: _, sigma, _ = pgh.get_gaussian_guess(hist, bins) except: - pars, cov = pgbf.gauss_mode_width_max( + pars, cov = pgf.gauss_mode_width_max( hist, bins, var, mode_guess=mu, n_bins=20 ) _, sigma, _ = pars @@ -375,9 +375,9 @@ def pdf( PDF for A/E consists of a gaussian signal with tail with gaussian tail background """ try: - x_lo = np.nanmin(x) if lower_range == np.inf else lower_range - x_hi = np.nanmax(x) if upper_range == np.inf else upper_range - sig = n_events * pgd.exgauss.pdf_norm(x, x_lo, x_hi, mu, sigma, tau_bkg) + sig = n_events * pgf.gauss_tail_norm( + x, mu, sigma, tau_bkg, lower_range, upper_range + ) except: sig = np.full_like(x, np.nan) @@ -406,7 +406,7 @@ def guess(hist, bins, var, **kwargs): try: _, sigma, _ = pgh.get_gaussian_guess(hist, bins) except: - pars, cov = pgbf.gauss_mode_width_max( + pars, cov = pgf.gauss_mode_width_max( hist, bins, var, mode_guess=mu, n_bins=20 ) _, sigma, _ = pars @@ -469,7 +469,7 @@ def pdf(x: np.array, n_events: float, mu: float, sigma: float) -> np.array: PDF for A/E consists of a gaussian signal with tail with gaussian tail background """ try: - sig = n_events * pgd.gaussian.get_pdf(x, mu, sigma) + sig = n_events * pgf.gauss_norm(x, mu, sigma) except: sig = np.full_like(x, np.nan) @@ -489,7 +489,7 @@ def guess(hist, bins, var, **kwargs): try: _, sigma, _ = pgh.get_gaussian_guess(hist, bins) except: - pars, cov = pgbf.gauss_mode_width_max( + pars, cov = pgf.gauss_mode_width_max( hist, bins, var, mode_guess=mu, n_bins=20 ) _, sigma, _ = pars @@ -540,10 +540,8 @@ def pdf( tau2, components, ): - gauss1 = n_sig1 * pgd.gauss_on_exgauss.get_pdf(x, mu1, sigma1, htail1, tau1) - gauss2 = n_sig2 * pgd.gauss_on_exgauss.get_pdf( - x, mu2, sigma2, tau2, htail2 - ) # NOTE: are tau2 and htail2 in the intended order? + gauss1 = n_sig1 * pgf.gauss_with_tail_pdf(x, mu1, sigma1, htail1, tau1) + gauss2 = n_sig2 * pgf.gauss_with_tail_pdf(x, mu2, sigma2, tau2, htail2) if components is True: return gauss1, gauss2 else: @@ -609,7 +607,7 @@ def guess(hist: np.array, bins: np.array, var: np.array, **kwargs) -> list: mu1 = bcs[mus[0]] mu2 = bcs[mus[-1]] - pars, cov = pgbf.gauss_mode_width_max( + pars, cov = pgf.gauss_mode_width_max( hist, bins, var=None, @@ -622,7 +620,7 @@ def guess(hist: np.array, bins: np.array, var: np.array, **kwargs) -> list: mu1, sigma1, amp = pars ix = np.where(bcs < mu1 + 3 * sigma1)[0][-1] n_sig1 = np.sum(hist[:ix]) - pars2, cov2 = pgbf.gauss_mode_width_max( + pars2, cov2 = pgf.gauss_mode_width_max( hist, bins, var=None, @@ -911,7 +909,7 @@ def energy_guess(hist, bins, var, func_i, peak, eres, fit_range): """ Simple guess for peak fitting """ - if func_i == pgd.hpge_peak.pdf_ext: + if func_i == pgf.extended_radford_pdf: bin_cs = (bins[1:] + bins[:-1]) / 2 sigma = eres / 2.355 i_0 = np.nanargmax(hist) @@ -934,8 +932,6 @@ def energy_guess(hist, bins, var, func_i, peak, eres, fit_range): if nsig_guess < 0: nsig_guess = 0 parguess = [ - fit_range[0], - fit_range[1], nsig_guess, mu, sigma, @@ -952,7 +948,7 @@ def energy_guess(hist, bins, var, func_i, peak, eres, fit_range): parguess[i] = 0 return parguess - elif func_i == pgd.gauss_on_step.pdf_ext: + elif func_i == pgf.extended_gauss_step_pdf: mu = peak sigma = eres / 2.355 i_0 = np.argmax(hist) @@ -1001,7 +997,7 @@ def unbinned_energy_fit( energy, dx=0.5, range=(np.nanmin(energy), np.nanmax(energy)) ) except ValueError: - pars, errs, cov = return_nans(pgd.hpge_peak.get_pdf) + pars, errs, cov = return_nans(pgf.radford_pdf) return pars, errs sigma = eres / 2.355 if guess is None: @@ -1009,33 +1005,32 @@ def unbinned_energy_fit( hist, bins, var, - pgd.gauss_on_step.pdf_ext, + pgf.extended_gauss_step_pdf, peak, eres, (np.nanmin(energy), np.nanmax(energy)), ) - c = cost.ExtendedUnbinnedNLL(energy, pgd.gauss_on_step.pdf_ext) + c = cost.ExtendedUnbinnedNLL(energy, pgf.extended_gauss_step_pdf) m = Minuit(c, *x0) m.limits = [ - (None, None), - (None, None), (0, 2 * np.sum(hist)), (peak - 1, peak + 1), (0, None), (0, 2 * np.sum(hist)), (-1, 1), + (None, None), + (None, None), + (None, None), ] - m.fixed[:2] = True + m.fixed[-3:] = True m.simplex().migrad() m.hesse() - x0 = m.values[:5] - x0 += [0.2, 0.2 * m.values[4]] - x0 += m.values[5:] + x0 = m.values[:3] + x0 += [0.2, 0.2 * m.values[2]] + x0 += m.values[3:] if verbose: print(m) bounds = [ - (None, None), - (None, None), (0, 2 * np.sum(hist)), (peak - 1, peak + 1), (0, None), @@ -1043,40 +1038,44 @@ def unbinned_energy_fit( (0, None), (0, 2 * np.sum(hist)), (-1, 1), + (None, None), + (None, None), + (None, None), ] - fixed = [0, 1] + fixed = [7, 8, 9] else: x0 = guess x1 = energy_guess( hist, bins, var, - pgd.hpge_peak.pdf_ext, + pgf.extended_radford_pdf, peak, eres, (np.nanmin(energy), np.nanmax(energy)), ) - x0[2] = x1[2] - x0[7] = x1[7] + x0[0] = x1[0] + x0[5] = x1[5] bounds = [ - (None, None), - (None, None), (0, 2 * np.sum(hist)), - (guess[3] - 0.5, guess[3] + 0.5), + (guess[1] - 0.5, guess[1] + 0.5), + sorted((0.8 * guess[2], 1.2 * guess[2])), + sorted((0.8 * guess[3], 1.2 * guess[3])), sorted((0.8 * guess[4], 1.2 * guess[4])), - sorted((0.8 * guess[5], 1.2 * guess[5])), - sorted((0.8 * guess[6], 1.2 * guess[6])), (0, 2 * np.sum(hist)), - sorted((0.8 * guess[8], 1.2 * guess[8])), + sorted((0.8 * guess[6], 1.2 * guess[6])), + (None, None), + (None, None), + (None, None), ] - fixed = [0, 1, 3, 4, 5, 6, 8] + fixed = [1, 2, 3, 4, 6, 7, 8, 9] if len(x0) == 0: - pars, errs, cov = return_nans(pgd.hpge_peak.pdf_ext) + pars, errs, cov = return_nans(pgf.extended_radford_pdf) return pars, errs if verbose: print(x0) - c = cost.ExtendedUnbinnedNLL(energy, pgd.hpge_peak.pdf_ext) + c = cost.ExtendedUnbinnedNLL(energy, pgf.extended_radford_pdf) m = Minuit(c, *x0) m.limits = bounds for fix in fixed: @@ -1093,20 +1092,20 @@ def unbinned_energy_fit( plt.figure() bcs = (bins[1:] + bins[:-1]) / 2 plt.step(bcs, hist, where="mid") - plt.plot(bcs, pgd.hpge_peak.get_pdf(bcs, *x0) * np.diff(bcs)[0]) - plt.plot(bcs, pgd.hpge_peak.get_pdf(bcs, *m.values) * np.diff(bcs)[0]) + plt.plot(bcs, pgf.radford_pdf(bcs, *x0) * np.diff(bcs)[0]) + plt.plot(bcs, pgf.radford_pdf(bcs, *m.values) * np.diff(bcs)[0]) plt.show() - if not np.isnan(m.errors[2:]).all(): + if not np.isnan(m.errors[:-3]).all(): return m.values, m.errors else: try: m.simplex().migrad() m.minos() - if not np.isnan(m.errors[2:]).all(): + if not np.isnan(m.errors[:-3]).all(): return m.values, m.errors except: - pars, errs, cov = return_nans(pgd.hpge_peak.pdf_ext) + pars, errs, cov = return_nans(pgf.extended_radford_pdf) return pars, errs diff --git a/src/pygama/pargen/cuts.py b/src/pygama/pargen/cuts.py index 6aa3ebd44..638199f64 100644 --- a/src/pygama/pargen/cuts.py +++ b/src/pygama/pargen/cuts.py @@ -16,8 +16,8 @@ from scipy import stats import pygama.math.histogram as pgh +import pygama.math.peak_fitting as pgf import pygama.pargen.energy_cal as pgc -from pygama.math.binned_fitting import gauss_mode_width_max log = logging.getLogger(__name__) @@ -124,7 +124,7 @@ def generate_cuts( fwhm = pgh.get_fwhm(counts, bins)[0] mean = float(bin_centres[np.argmax(counts)]) - pars, cov = gauss_mode_width_max( + pars, cov = pgf.gauss_mode_width_max( counts, bins, mode_guess=mean, diff --git a/src/pygama/pargen/data_cleaning.py b/src/pygama/pargen/data_cleaning.py index 4cd573e78..4a1ceb93e 100644 --- a/src/pygama/pargen/data_cleaning.py +++ b/src/pygama/pargen/data_cleaning.py @@ -10,10 +10,7 @@ import numpy as np from scipy import stats -from pygama.math.binned_fitting import * -from pygama.math.functions.crystal_ball import nb_crystal_ball_scaled_pdf -from pygama.math.functions.gauss import nb_gauss -from pygama.math.histogram import * +from pygama.math.peak_fitting import * def gaussian_cut(data, cut_sigma=3, plotAxis=None): @@ -32,11 +29,11 @@ def gaussian_cut(data, cut_sigma=3, plotAxis=None): bin_centers = bins[:-1] + (bins[1] - bins[0]) / 2 # fit gaussians to that - # result = fit_unbinned(nb_gauss, hist, [median, width/2] ) + # result = fit_unbinned(gauss, hist, [median, width/2] ) # print("unbinned: {}".format(result)) result = fit_binned( - nb_gauss, + gauss, hist, bin_centers, [median, width / 2, np.amax(hist) * (width / 2) * np.sqrt(2 * np.pi)], @@ -47,7 +44,7 @@ def gaussian_cut(data, cut_sigma=3, plotAxis=None): if plotAxis is not None: plotAxis.plot(bin_centers, hist, ls="steps-mid", color="k", label="data") - fit = nb_gauss(bin_centers, *result) + fit = gauss(bin_centers, *result) plotAxis.plot(bin_centers, fit, label="gaussian fit") plotAxis.axvline(result[0], color="g", label="fit mean") plotAxis.axvline(cut_lo, color="r", label=f"+/- {cut_sigma} sigma") @@ -74,19 +71,15 @@ def xtalball_cut(data, cut_sigma=3, plotFigure=None): bin_centers = bins[:-1] + (bins[1] - bins[0]) / 2 # fit gaussians to that - # result = fit_unbinned(nb_gauss, hist, [median, width/2] ) + # result = fit_unbinned(gauss, hist, [median, width/2] ) # print("unbinned: {}".format(result)) p0 = get_gaussian_guess(hist, bin_centers) bounds = [ - (p0[2] * 0.2, p0[0] * 0.5, p0[1] * 0.5, 0, 1), - (p0[2] * 5, p0[0] * 1.5, p0[1] * 1.5, np.inf, np.inf), + (p0[0] * 0.5, p0[1] * 0.5, p0[2] * 0.2, 0, 1), + (p0[0] * 1.5, p0[1] * 1.5, p0[2] * 5, np.inf, np.inf), ] result = fit_binned( - nb_crystal_ball_scaled_pdf, - hist, - bin_centers, - [p0[2], p0[0], p0[1], 10, 1], - bounds=bounds, + xtalball, hist, bin_centers, [p0[0], p0[1], p0[2], 10, 1], bounds=bounds ) # print("binned: {}".format(result)) cut_lo = result[0] - cut_sigma * result[1] @@ -95,7 +88,7 @@ def xtalball_cut(data, cut_sigma=3, plotFigure=None): if plotFigure is not None: plt.figure(plotFigure.number) plt.plot(bin_centers, hist, ls="steps-mid", color="k", label="data") - fit = nb_crystal_ball_scaled_pdf(bin_centers, *result) + fit = xtalball(bin_centers, *result) plt.plot(bin_centers, fit, label="xtalball fit") plt.axvline(result[0], color="g", label="fit mean") plt.axvline(cut_lo, color="r", label=f"+/- {cut_sigma} sigma") diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py index 9c4dbc4ae..e3526c63a 100644 --- a/src/pygama/pargen/ecal_th.py +++ b/src/pygama/pargen/ecal_th.py @@ -25,9 +25,8 @@ from matplotlib.colors import LogNorm from scipy.optimize import curve_fit -import pygama.math.binned_fitting as pgbf -import pygama.math.distributions as pgd import pygama.math.histogram as pgh +import pygama.math.peak_fitting as pgf import pygama.pargen.cuts as cts import pygama.pargen.energy_cal as cal from pygama.pargen.utils import load_data, return_nans @@ -143,24 +142,24 @@ class calibrate_parameter: (60, 60), ] # side bands width funcs = [ - # pgd.gauss_on_step.pdf_ext, - pgd.hpge_peak.pdf_ext, - pgd.hpge_peak.pdf_ext, - pgd.hpge_peak.pdf_ext, - pgd.gauss_on_step.pdf_ext, - pgd.gauss_on_step.pdf_ext, - pgd.gauss_on_step.pdf_ext, - pgd.hpge_peak.pdf_ext, + # pgf.extended_gauss_step_pdf, + pgf.extended_radford_pdf, + pgf.extended_radford_pdf, + pgf.extended_radford_pdf, + pgf.extended_gauss_step_pdf, + pgf.extended_gauss_step_pdf, + pgf.extended_gauss_step_pdf, + pgf.extended_radford_pdf, ] gof_funcs = [ - # pgd.gauss_on_step.get_pdf, - pgd.hpge_peak.get_pdf, - pgd.hpge_peak.get_pdf, - pgd.hpge_peak.get_pdf, - pgd.gauss_on_step.get_pdf, - pgd.gauss_on_step.get_pdf, - pgd.gauss_on_step.get_pdf, - pgd.gauss_on_step.get_pdf, + # pgf.gauss_step_pdf, + pgf.radford_pdf, + pgf.radford_pdf, + pgf.radford_pdf, + pgf.gauss_step_pdf, + pgf.gauss_step_pdf, + pgf.gauss_step_pdf, + pgf.radford_pdf, ] def __init__( @@ -963,7 +962,7 @@ def get_peak_labels( if i % 2 == 1: continue else: - out.append(f"{pgd.nb_poly(label, pars):.1f}") + out.append(f"{pgf.poly(label, pars):.1f}") out_labels.append(label) return out_labels, out @@ -1004,13 +1003,13 @@ def plot_fits( fitted_gof_funcs.append(ecal_class.gof_funcs[i]) mus = [ - func_i.get_mu(pars_i) if pars_i is not None else np.nan + pgf.get_mu_func(func_i, pars_i) if pars_i is not None else np.nan for func_i, pars_i in zip(fitted_gof_funcs, pk_pars) ] fig = plt.figure() derco = np.polyder(np.poly1d(ecal_class.pars)).coefficients - der = [pgd.nb_poly(5, derco) for Ei in fitted_peaks] + der = [pgf.poly(5, derco) for Ei in fitted_peaks] for i, peak in enumerate(mus): range_adu = 5 / der[i] plt.subplot(nrows, ncols, i + 1) @@ -1260,12 +1259,12 @@ def plot_cal_fit(ecal_class, data, figsize=[12, 8], fontsize=12, erange=[200, 27 fitted_peaks = np.array(fitted_peaks)[valid_fits] mus = [ - func_i.get_mu(pars_i) if pars_i is not None else np.nan + pgf.get_mu_func(func_i, pars_i) if pars_i is not None else np.nan for func_i, pars_i in zip(fitted_gof_funcs, pk_pars) ] mu_errs = [ - func_i.get_mu(pars_i) if pars_i is not None else np.nan + pgf.get_mu_func(func_i, pars_i) if pars_i is not None else np.nan for func_i, pars_i in zip(fitted_gof_funcs, pk_errs) ] @@ -1280,16 +1279,16 @@ def plot_cal_fit(ecal_class, data, figsize=[12, 8], fontsize=12, erange=[200, 27 ax1.scatter(fitted_peaks, mus, marker="x", c="b") - ax1.plot(pgd.nb_poly(cal_bins, ecal_class.pars), cal_bins, lw=1, c="g") + ax1.plot(pgf.poly(cal_bins, ecal_class.pars), cal_bins, lw=1, c="g") ax1.grid() ax1.set_xlim([erange[0], erange[1]]) ax1.set_ylabel("Energy (ADC)") ax2.errorbar( fitted_peaks, - pgd.nb_poly(np.array(mus), ecal_class.pars) - fitted_peaks, - yerr=pgd.nb_poly(np.array(mus) + np.array(mu_errs), ecal_class.pars) - - pgd.nb_poly(np.array(mus), ecal_class.pars), + pgf.poly(np.array(mus), ecal_class.pars) - fitted_peaks, + yerr=pgf.poly(np.array(mus) + np.array(mu_errs), ecal_class.pars) + - pgf.poly(np.array(mus), ecal_class.pars), linestyle=" ", marker="x", c="b", diff --git a/src/pygama/pargen/energy_cal.py b/src/pygama/pargen/energy_cal.py index 254a3f0b5..a9de8e6d3 100644 --- a/src/pygama/pargen/energy_cal.py +++ b/src/pygama/pargen/energy_cal.py @@ -15,9 +15,8 @@ from iminuit import Minuit, cost from scipy.signal import find_peaks_cwt, medfilt -import pygama.math.binned_fitting as pgbf -import pygama.math.distributions as pgd import pygama.math.histogram as pgh +import pygama.math.peak_fitting as pgf import pygama.math.utils as pgu from pygama.pargen.utils import return_nans @@ -160,7 +159,7 @@ def hpge_get_E_peaks( imaxes = get_i_local_maxima(hist / np.sqrt(var), n_sigma) # Keep maxes if they coincide with expected peaks - test_peaks_keV = np.asarray([pgd.nb_poly(i, cal_pars) for i in bins[imaxes]]) + test_peaks_keV = np.asarray([pgf.poly(i, cal_pars) for i in bins[imaxes]]) imatch = [abs(peaks_keV - i).min() < Etol_keV for i in test_peaks_keV] got_peak_locations = bins[imaxes[imatch]] @@ -230,7 +229,7 @@ def hpge_fit_E_peak_tops( cov_list = [] for E_peak in peak_locs: try: - pars, cov = pgbf.gauss_mode_width_max( + pars, cov = pgf.gauss_mode_width_max( hist, bins, var, @@ -260,9 +259,9 @@ def get_hpge_E_peak_par_guess(hist, bins, var, func, mode_guess): The function to be fit to the peak in the (windowed) hist """ if ( - func == pgd.gauss_on_step.get_cdf - or func == pgd.gauss_on_step.get_pdf - or func == pgd.gauss_on_step.pdf_ext + func == pgf.gauss_step_cdf + or func == pgf.gauss_step_pdf + or func == pgf.extended_gauss_step_pdf ): # get mu and height from a gauss fit, also sigma as fallback pars, cov = pgf.gauss_mode_width_max( @@ -329,9 +328,9 @@ def get_hpge_E_peak_par_guess(hist, bins, var, func, mode_guess): return parguess if ( - func == pgd.hpge_peak.get_cdf - or func == pgd.hpge_peak.get_pdf - or func == pgd.hpge_peak.pdf_ext + func == pgf.radford_cdf + or func == pgf.radford_pdf + or func == pgf.extended_radford_pdf ): # guess mu, height pars, cov = pgf.gauss_mode_width_max( @@ -396,11 +395,7 @@ def get_hpge_E_peak_par_guess(hist, bins, var, func, mode_guess): ) n_bkg = np.sum(hist) - n_sig - parguess = [bins[0], bins[-1], n_sig, mu, sigma, htail, tau, n_bkg, hstep] - - for i, guess in enumerate(parguess): - if np.isnan(guess): - parguess[i] = 0 + parguess = [n_sig, mu, sigma, htail, tau, n_bkg, hstep, bins[0], bins[-1], 0] for i, guess in enumerate(parguess): if np.isnan(guess): @@ -419,21 +414,21 @@ def get_hpge_E_fixed(func): """ if ( - func == pgd.gauss_on_step.get_cdf - or func == pgd.gauss_on_step.get_pdf - or func == pgd.gauss_on_step.pdf_ext + func == pgf.gauss_step_cdf + or func == pgf.gauss_step_pdf + or func == pgf.extended_gauss_step_pdf ): - # pars are: x_lo, x_hi, n_sig, mu, sigma, n_bkg, hstep - return [0, 1], np.array([False, False, True, True, True, True, True]) + # pars are: n_sig, mu, sigma, n_bkg, hstep, components + return [5, 6, 7], np.array([True, True, True, True, True, False, False, False]) if ( - func == pgd.hpge_peak.get_cdf - or func == pgd.hpge_peak.get_pdf - or func == pgd.hpge_peak.pdf_ext + func == pgf.radford_cdf + or func == pgf.radford_pdf + or func == pgf.extended_radford_pdf ): - # pars are: x_lo, x_hi, n_sig, mu, sigma, htail, tau, n_bkg, hstep - return [0, 1], np.array( - [False, False, True, True, True, True, True, True, True] + # pars are: n_sig, mu, sigma, htail,tau, n_bkg, hstep, components + return [7, 8, 9], np.array( + [True, True, True, True, True, True, True, False, False, False] ) else: @@ -444,9 +439,9 @@ def get_hpge_E_fixed(func): def get_hpge_E_bounds(func, parguess): if ( - func == pgd.hpge_peak.get_cdf - or func == pgd.hpge_peak.get_pdf - or func == pgd.hpge_peak.pdf_ext + func == pgf.radford_cdf + or func == pgf.radford_pdf + or func == pgf.extended_radford_pdf ): return [ (0, None), @@ -456,12 +451,15 @@ def get_hpge_E_bounds(func, parguess): (None, None), (0, None), (-1, 1), + (None, None), + (None, None), + (None, None), ] elif ( - func == pgd.gauss_on_step.get_cdf - or func == pgd.gauss_on_step.get_pdf - or func == pgd.gauss_on_step.pdf_ext + func == pgf.gauss_step_cdf + or func == pgf.gauss_step_pdf + or func == pgf.extended_gauss_step_pdf ): return [ (0, None), @@ -469,6 +467,9 @@ def get_hpge_E_bounds(func, parguess): (0, None), (0, None), (-1, 1), + (None, None), + (None, None), + (None, None), ] else: @@ -597,7 +598,7 @@ def hpge_fit_E_peaks( mode_guesses, wwidths, n_bins=50, - funcs=pgd.gauss_on_step.get_cdf, + funcs=pgf.gauss_step_cdf, method="unbinned", gof_funcs=None, n_events=None, @@ -740,7 +741,7 @@ def hpge_fit_E_peaks( par_guesses = get_hpge_E_peak_par_guess(hist, bins, var, func_i) bounds = get_hpge_E_bounds(func_i, par_guesses) fixed, mask = get_hpge_E_fixed(func_i) - pars_i, errs_i, cov_i = pgbf.fit_binned( + pars_i, errs_i, cov_i = pgf.fit_binned( func_i, hist, bins, @@ -963,7 +964,7 @@ def hpge_E_calibration( deg=0, uncal_is_int=False, range_keV=None, - funcs=pgd.gauss_on_step.get_cdf, + funcs=pgf.gauss_step_cdf, gof_funcs=None, method="unbinned", gof_func=None, @@ -1139,18 +1140,18 @@ def hpge_E_calibration( n_bins = 50 elif np.isscalar(range_keV): derco = np.polyder(np.poly1d(roughpars)).coefficients - der = [pgd.nb_poly(Ei, derco) for Ei in got_peaks_keV] + der = [pgf.poly(Ei, derco) for Ei in got_peaks_keV] range_uncal = [float(range_keV) / d for d in der] n_bins = [int(range_keV / 0.5 / d) for d in der] elif isinstance(range_keV, tuple): rangeleft_keV, rangeright_keV = range_keV derco = np.polyder(np.poly1d(roughpars)).coefficients - der = [pgd.nb_poly(Ei, derco) for Ei in got_peaks_keV] + der = [pgf.poly(Ei, derco) for Ei in got_peaks_keV] range_uncal = [(rangeleft_keV / d, rangeright_keV / d) for d in der] n_bins = [int(sum(range_keV) / 0.5 / d) for d in der] elif isinstance(range_keV, list): derco = np.polyder(np.poly1d(roughpars)).coefficients - der = [pgd.nb_poly(Ei, derco) for Ei in got_peaks_keV] + der = [pgf.poly(Ei, derco) for Ei in got_peaks_keV] range_uncal = [ (r[0] / d, r[1] / d) if isinstance(r, tuple) else r / d for r, d in zip(range_keV, der) @@ -1245,12 +1246,11 @@ def hpge_E_calibration( pgf.get_fwhm_func(func_i, pars_i, cov=covs_i) for func_i, pars_i, covs_i in zip(pk_funcs, pk_pars, pk_covs) ] - uncal_fwhms, uncal_fwhm_errs = zip(*uncal_fwhms) uncal_fwhms = np.asarray(uncal_fwhms) uncal_fwhm_errs = np.asarray(uncal_fwhm_errs) derco = np.polyder(np.poly1d(pars)).coefficients - der = [pgd.nb_poly(Ei, derco) for Ei in fitted_peaks_keV] + der = [pgf.poly(Ei, derco) for Ei in fitted_peaks_keV] cal_fwhms = uncal_fwhms * der cal_fwhms_errs = uncal_fwhm_errs * der @@ -1356,7 +1356,7 @@ def poly_match(xx, yy, deg=-1, rtol=1e-5, atol=1e-8): pars_i = np.polyfit(xx_i, yy_i, deg) polxx = np.zeros(len(yy_i)) xxn = np.ones(len(yy_i)) - polxx = pgd.nb_poly(xx_i, pars_i) + polxx = pgf.poly(xx_i, pars_i) # by here we have the best polxx. Search for matches and store pars_i if # its the best so far @@ -1684,37 +1684,25 @@ def calibrate_tl208(energy_series, cal_peaks=None, plotFigure=None): # if inp == "q": exit() bounds = ( + [0.9 * guess_e, 0.5 * guess_sigma, 0, 0, 0, 0, 0], [ - bin_centers[0], - bin_centers[-1], - 0, - 0.9 * guess_e, - 0.5 * guess_sigma, - 0, - 0, - 0, - 0, - ], - [ - bin_centers[0], - bin_centers[-1], - 5 * guess_area, 1.1 * guess_e, 2 * guess_sigma, + 0.1, 0.75, window_width_in_adc, 10, - 0.1, + 5 * guess_area, ], ) params = fit_binned( - hpge_peak.get_pdf, + radford_peak, peak_hist, bin_centers, - [guess_area, guess_e, guess_sigma, 0.7, 5, 0, 1e-3], + [guess_e, guess_sigma, 1e-3, 0.7, 5, 0, guess_area], ) # bounds=bounds) - plt.plot(bin_centers, hpge_peak.get_pdf(bin_centers, *params), color="r") + plt.plot(bin_centers, radford_peak(bin_centers, *params), color="r") # inp = input("q to quit...") # if inp == "q": exit() @@ -1746,7 +1734,7 @@ def calibrate_tl208(energy_series, cal_peaks=None, plotFigure=None): ls="steps-mid", color="k", ) - fit = hpge_peak.get_pdf(bin_centers, *params) + fit = radford_peak(bin_centers, *params) ax_peak.plot( bin_centers * rough_kev_per_adc + rough_kev_offset, fit, color="b" ) diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index 8e565e9b7..905d126f0 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -28,10 +28,8 @@ from sklearn.gaussian_process.kernels import RBF, ConstantKernel from sklearn.utils._testing import ignore_warnings -import pygama.math.binned_fitting as pgbf -import pygama.math.distributions as pgd import pygama.math.histogram as pgh -import pygama.math.hpge_peak_fitting as pghpf +import pygama.math.peak_fitting as pgf import pygama.pargen.cuts as cts import pygama.pargen.dsp_optimize as opt import pygama.pargen.energy_cal as pgc @@ -187,7 +185,7 @@ def simple_guess(hist, bins, var, func_i, fit_range): """ Simple guess for peak fitting """ - if func_i == pgd.hpge_peak.pdf_ext: + if func_i == pgf.extended_radford_pdf: bin_cs = (bins[1:] + bins[:-1]) / 2 _, sigma, amp = pgh.get_gaussian_guess(hist, bins) i_0 = np.nanargmax(hist) @@ -204,8 +202,6 @@ def simple_guess(hist, bins, var, func_i, fit_range): nsig_guess = np.sum(hist[i_0 - n_bins_range : i_0 + n_bins_range]) nbkg_guess = np.sum(hist) - nsig_guess parguess = [ - fit_range[0], - fit_range[1], nsig_guess, mu, sigma, @@ -213,10 +209,13 @@ def simple_guess(hist, bins, var, func_i, fit_range): tau, nbkg_guess, hstep, - ] + fit_range[0], + fit_range[1], + 0, + ] # return parguess - elif func_i == pgd.gauss_on_step.pdf_ext: + elif func_i == pgf.extended_gauss_step_pdf: mu, sigma, amp = pgh.get_gaussian_guess(hist, bins) i_0 = np.argmax(hist) bg = np.mean(hist[-10:]) @@ -226,7 +225,7 @@ def simple_guess(hist, bins, var, func_i, fit_range): n_bins_range = int((4 * sigma) // dx) nsig_guess = np.sum(hist[i_0 - n_bins_range : i_0 + n_bins_range]) nbkg_guess = np.sum(hist) - nsig_guess - return [fit_range[0], fit_range[1], nsig_guess, mu, sigma, nbkg_guess, hstep] + return [nsig_guess, mu, sigma, nbkg_guess, hstep, fit_range[0], fit_range[1], 0] def unbinned_energy_fit( @@ -253,24 +252,24 @@ def unbinned_energy_fit( ) bin_cs1 = (bins[:-1] + bins[1:]) / 2 if guess is not None: - x0 = [*guess[:-2], fit_range[0], fit_range[1]] + x0 = [*guess[:-2], fit_range[0], fit_range[1], False] else: - if func == pgd.hpge_peak.pdf_ext: - x0 = simple_guess(hist1, bins, var, pgd.gauss_on_step.pdf_ext, fit_range) + if func == pgf.extended_radford_pdf: + x0 = simple_guess(hist1, bins, var, pgf.extended_gauss_step_pdf, fit_range) if verbose: print(x0) - c = cost.ExtendedUnbinnedNLL(energy, pgd.gauss_on_step.pdf_ext) + c = cost.ExtendedUnbinnedNLL(energy, pgf.extended_gauss_step_pdf) m = Minuit(c, *x0) - m.fixed[:2] = True + m.fixed[-3:] = True m.simplex().migrad() m.hesse() if guess is not None: - x0_rad = [fit_range[0], fit_range[1], *guess[2:]] + x0_rad = [*guess[:-2], fit_range[0], fit_range[1], False] else: x0_rad = simple_guess(hist1, bins, var, func, fit_range) - x0 = m.values[:5] - x0 += x0_rad[5:7] - x0 += m.values[5:] + x0 = m.values[:3] + x0 += x0_rad[3:5] + x0 += m.values[3:] else: x0 = simple_guess(hist1, bins, var, func, fit_range) if verbose: @@ -279,7 +278,7 @@ def unbinned_energy_fit( m = Minuit(c, *x0) if tol is not None: m.tol = tol - m.fixed[:2] = True + m.fixed[-3:] = True m.migrad() m.hesse() @@ -291,17 +290,17 @@ def unbinned_energy_fit( m.valid # & m.accurate & (~np.isnan(m.errors).any()) - & (~(np.array(m.errors[2:]) == 0).all()) + & (~(np.array(m.errors[:-3]) == 0).all()) ) - cs = pgbf.goodness_of_fit( - hist, bins, None, gof_func, m.values[2:], method="Pearson" + cs = pgf.goodness_of_fit( + hist, bins, None, gof_func, m.values[:-3], method="Pearson" ) cs = cs[0] / cs[1] m2 = Minuit(c, *x0) if tol is not None: m2.tol = tol - m2.fixed[:2] = True + m2.fixed[-3:] = True m2.simplex().migrad() m2.hesse() m2_fit = func(bin_cs1, *m2.values)[1] @@ -309,16 +308,16 @@ def unbinned_energy_fit( m2.valid # & m2.accurate & (~np.isnan(m.errors).any()) - & (~(np.array(m2.errors[2:]) == 0).all()) + & (~(np.array(m2.errors[:-3]) == 0).all()) ) - cs2 = pgbf.goodness_of_fit( - hist, bins, None, gof_func, m2.values[2:], method="Pearson" + cs2 = pgf.goodness_of_fit( + hist, bins, None, gof_func, m2.values[:-3], method="Pearson" ) cs2 = cs2[0] / cs2[1] - frac_errors1 = np.sum(np.abs(np.array(m.errors)[2:] / np.array(m.values)[2:])) - frac_errors2 = np.sum(np.abs(np.array(m2.errors)[2:] / np.array(m2.values)[2:])) + frac_errors1 = np.sum(np.abs(np.array(m.errors)[:-3] / np.array(m.values)[:-3])) + frac_errors2 = np.sum(np.abs(np.array(m2.errors)[:-3] / np.array(m2.values)[:-3])) if verbose: print(m) @@ -341,21 +340,21 @@ def unbinned_energy_fit( m = Minuit(c, *x0) if tol is not None: m.tol = tol - m.fixed[:2] = True + m.fixed[-3:] = True m.limits = pgc.get_hpge_E_bounds(func) m.simplex().simplex().migrad() m.hesse() if verbose: print(m) - cs = pgbf.goodness_of_fit( - hist, bins, None, gof_func, m.values[:-2], method="Pearson" + cs = pgf.goodness_of_fit( + hist, bins, None, gof_func, m.values[:-3], method="Pearson" ) cs = cs[0] / cs[1] valid3 = ( m.valid # & m.accurate & (~np.isnan(m.errors).any()) - & (~(np.array(m.errors[2:]) == 0).all()) + & (~(np.array(m.errors[:-3]) == 0).all()) ) if valid3 is False: try: @@ -363,7 +362,7 @@ def unbinned_energy_fit( valid3 = ( m.valid & (~np.isnan(m.errors).any()) - & (~(np.array(m.errors[2:]) == 0).all()) + & (~(np.array(m.errors[:-3]) == 0).all()) ) except: raise RuntimeError @@ -375,25 +374,25 @@ def unbinned_energy_fit( elif valid2 == False or cs * 1.05 < cs2: pars = np.array(m.values)[:-1] - errs = np.array(m.errors)[:-2] + errs = np.array(m.errors)[:-3] cov = np.array(m.covariance)[:-1, :-1] csqr = cs elif valid1 == False or cs2 * 1.05 < cs: pars = np.array(m2.values)[:-1] - errs = np.array(m2.errors)[:-2] + errs = np.array(m2.errors)[:-3] cov = np.array(m2.covariance)[:-1, :-1] csqr = cs2 elif frac_errors1 < frac_errors2: pars = np.array(m.values)[:-1] - errs = np.array(m.errors)[:-2] + errs = np.array(m.errors)[:-3] cov = np.array(m.covariance)[:-1, :-1] csqr = cs elif frac_errors1 > frac_errors2: pars = np.array(m2.values)[:-1] - errs = np.array(m2.errors)[:-2] + errs = np.array(m2.errors)[:-3] cov = np.array(m2.covariance)[:-1, :-1] csqr = cs2 @@ -480,16 +479,14 @@ def get_peak_fwhm_with_dt_corr( guess=guess, tol=tol, ) - if func == pgd.hpge_peak.pdf_ext: + if func == pgf.extended_radford_pdf: if energy_pars[3] < 1e-6 and energy_err[3] < 1e-6: fwhm = energy_pars[2] * 2 * np.sqrt(2 * np.log(2)) fwhm_err = np.sqrt(cov[2][2]) * 2 * np.sqrt(2 * np.log(2)) else: - fwhm = pghpf.hpge_peak_fwhm( - energy_pars[2], energy_pars[3], energy_pars[4] - ) + fwhm = pgf.radford_fwhm(energy_pars[2], energy_pars[3], energy_pars[4]) - elif func == pgd.gauss_on_step.pdf_ext: + elif func == pgf.extended_gauss_step_pdf: fwhm = energy_pars[2] * 2 * np.sqrt(2 * np.log(2)) fwhm_err = np.sqrt(cov[2][2]) * 2 * np.sqrt(2 * np.log(2)) @@ -507,18 +504,18 @@ def get_peak_fwhm_with_dt_corr( yerr_boot = np.nanstd(y_max, axis=0) - if func == pgd.hpge_peak.pdf_ext and not ( + if func == pgf.extended_radford_pdf and not ( energy_pars[3] < 1e-6 and energy_err[3] < 1e-6 ): y_b = np.zeros(len(par_b)) for i, p in enumerate(par_b): try: - y_b[i] = pghpf.hpge_peak_fwhm(p[2], p[3], p[4]) # + y_b[i] = pgf.radford_fwhm(p[2], p[3], p[4]) # except: y_b[i] = np.nan fwhm_err = np.nanstd(y_b, axis=0) if fwhm_err == 0: - fwhm, fwhm_err = pghpf.hpge_peak_fwhm( + fwhm, fwhm_err = pgf.radford_fwhm( energy_pars[2], energy_pars[3], energy_pars[4], diff --git a/src/pygama/pargen/extract_tau.py b/src/pygama/pargen/extract_tau.py index e7b02e0fa..61e833994 100644 --- a/src/pygama/pargen/extract_tau.py +++ b/src/pygama/pargen/extract_tau.py @@ -19,8 +19,8 @@ import matplotlib.pyplot as plt import numpy as np -import pygama.math.binned_fitting as pgbf import pygama.math.histogram as pgh +import pygama.math.peak_fitting as pgf import pygama.pargen.cuts as cts import pygama.pargen.dsp_optimize as opt import pygama.pargen.energy_optimisation as om @@ -99,7 +99,7 @@ def get_decay_constant( bin_centres = pgh.get_bin_centers(bins) high_bin = bin_centres[np.argmax(counts)] try: - pars, cov = pgbf.gauss_mode_width_max( + pars, cov = pgf.gauss_mode_width_max( counts, bins, n_bins=10, @@ -162,7 +162,7 @@ def fom_dpz(tb_data, verbosity=0, rand_arg=None): max_idx = np.argmax(counts) mu = start_bins[max_idx] try: - pars, cov = pgbf.gauss_mode_width_max( + pars, cov = pgf.gauss_mode_width_max( counts, start_bins, mode_guess=mu, diff --git a/src/pygama/pargen/mse_psd.py b/src/pygama/pargen/mse_psd.py index 9b158f8b5..11a78f65b 100644 --- a/src/pygama/pargen/mse_psd.py +++ b/src/pygama/pargen/mse_psd.py @@ -8,8 +8,8 @@ import numpy as np from matplotlib.colors import LogNorm -from pygama.math.distributions import * from pygama.math.histogram import get_bin_centers +from pygama.math.peak_fitting import * def get_avse_cut(e_cal, current, plotFigure=None): @@ -59,17 +59,17 @@ def get_avse_cut(e_cal, current, plotFigure=None): p0 = get_gaussian_guess(h, a_bins_cent) fit_idxs = a_bins_cent > p0[0] - 5 * p0[1] - p = fit_binned(nb_gauss, h[fit_idxs], a_bins_cent[fit_idxs], p0) + p = fit_binned(gauss, h[fit_idxs], a_bins_cent[fit_idxs], p0) y_max[i] = p[0] # plt.plot(a_bins_cent,h,ls="steps") # plt.axvline(a_mode, c="r") # plt.title("Energy: {} keV".format(e_cent[i])) # - # fit = nb_gauss(a_bins_cent[fit_idxs], *p) + # fit = gauss(a_bins_cent[fit_idxs], *p) # plt.plot(a_bins_cent[fit_idxs], fit, c="g") - # guess = nb_gauss(a_bins_cent[fit_idxs], *p0) + # guess = gauss(a_bins_cent[fit_idxs], *p0) # plt.plot(a_bins_cent[fit_idxs], guess, c="r") # inp = input("q to quit") @@ -99,8 +99,8 @@ def get_avse_cut(e_cal, current, plotFigure=None): h_bgs = h_dep - h_bg # fit AvsE peak to gaussian to get the 90% cut p0 = get_gaussian_guess(h_bgs, bin_centers) - p = fit_binned(nb_gauss, h_bgs, bin_centers, p0) - fit = nb_gauss(bin_centers, *p) + p = fit_binned(gauss, h_bgs, bin_centers, p0) + fit = gauss(bin_centers, *p) ae_mean, ae_std = p[0], p[1] ae_cut = p[0] - 1.28 * p[1] # cuts at 10% of CDF @@ -250,8 +250,8 @@ def get_ae_cut(e_cal, current, plotFigure=None): h_bgs = h_dep - h_bg p0 = get_gaussian_guess(h_bgs, bin_centers) - p = fit_binned(nb_gauss, h_bgs, bin_centers, p0) - fit = nb_gauss(bin_centers, *p) + p = fit_binned(gauss, h_bgs, bin_centers, p0) + fit = gauss(bin_centers, *p) ae_mean, ae_std = p[0], p[1] ae_cut = p[0] - 1.28 * p[1] # cuts at 10% of CDF From 2da52fce1dd5121e7e25aa10da564762ad351540 Mon Sep 17 00:00:00 2001 From: SamuelBorden Date: Wed, 6 Mar 2024 23:16:09 -0800 Subject: [PATCH 191/191] added boost-histogram to dependencies --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 0b196a424..b4edbf1ef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ classifiers = [ ] requires-python = ">=3.9" dependencies = [ + "boost-histogram", "colorlog", "dspeed>=1.3", "h5py>=3.2",