From d3fac0af0efe62b5beabadfc43f578de9de09f47 Mon Sep 17 00:00:00 2001 From: Mikael Mieskolainen Date: Sat, 2 Nov 2024 08:09:08 +0000 Subject: [PATCH] extend Poisson loss 2 --- icefit/icepeak.py | 48 ++++++++++++++++++++++++++++--------------- icefit/peakfit.py | 10 ++++++--- tests/test_peakfit.sh | 11 ++++++++++ 3 files changed, 49 insertions(+), 20 deletions(-) create mode 100644 tests/test_peakfit.sh diff --git a/icefit/icepeak.py b/icefit/icepeak.py index a2785aeb..c6de805b 100644 --- a/icefit/icepeak.py +++ b/icefit/icepeak.py @@ -515,7 +515,7 @@ def binned_1D_fit(hist: dict, param: dict, fitfunc: dict, techno: dict, par_fixe ### [Chi2 loss function] def chi2_loss(par): - total = 0 + tot = 0 # Over histograms for key in counts.keys(): @@ -529,14 +529,14 @@ def chi2_loss(par): residual = (y_pred[fmask] - counts[key][rmask][fmask]) / errors[key][rmask][fmask] - total += np.sum(residual**2) + tot += np.sum(residual**2) - return total + return tot ### [Huber loss function] def huber_loss(par): - total = 0 + tot = 0 delta = techno['huber_delta'] # Over histograms @@ -552,14 +552,15 @@ def huber_loss(par): T = huber_lossfunc(y_true=counts[key][rmask][fmask], y_pred=y_pred[fmask], sigma=errors[key][rmask][fmask], delta=delta) - total += T + tot += T - return total + return tot - ### [Poissonian negative log-likelihood loss function] + ### [Poissonian negative delta log-likelihood loss function] def poiss_nll_loss(par): - total = 0 + EPS = 1e-9 + nll = 0 # Over histograms for key in counts.keys(): @@ -571,16 +572,27 @@ def poiss_nll_loss(par): # ** Note use x=cbins[range_mask] here, due to trapz integral in fitfunc ! ** y_pred = fitfunc[key](cbins[key][rmask], par, par_fixed) - valid = (y_pred > 0) & fmask + valid = (y_pred > 0) if np.sum(valid) == 0: return 1e9 - T1 = counts[key][rmask][valid] * np.log(y_pred[valid]) - T2 = y_pred[valid] - - total += (-1)*(np.sum(T1) - np.sum(T2)) - - return total - + y_pred[~valid] = EPS + + # Bohm-Zech scale transform for weighted events (https://arxiv.org/abs/1309.1287) + # https://scikit-hep.org/iminuit/notebooks/weighted_histograms.html + s = counts[key][rmask][fmask] / (errors[key][rmask][fmask]**2) # Per bin + + n = counts[key][rmask][fmask] # Observed + mu = y_pred[fmask] # Predicted + + # Factor 2 x taken into account by setting `errordef = iminuit.Minuit.LIKELIHOOD` + + # Delta log-likelihood with Bohm-Zech scale + nll += np.sum( s * (n * (np.log(n) - np.log(mu)) - (n - mu)) ) + + # Simple log-likelihood (NB. x -1) + #nll += (-1) * np.sum(n * np.log(mu) - mu) + + return nll # -------------------------------------------------------------------- loss_type = techno['loss_type'] @@ -739,9 +751,11 @@ def optimizer_execute(trials, loss, param, techno): m1.fixed[k] = param['fixed'][k] # -------------------------------------------------------------------- - if techno['loss_type'] == 'nll': + if 'nll' in techno['loss_type']: + cprint('Setting errordef "LIKELIHOOD" ', 'yellow') m1.errordef = iminuit.Minuit.LIKELIHOOD else: + cprint('Setting errordef "LEAST_SQUARES" ', 'yellow') m1.errordef = iminuit.Minuit.LEAST_SQUARES # Set parameter bounds diff --git a/icefit/peakfit.py b/icefit/peakfit.py index a4f1cd24..a5eb5432 100644 --- a/icefit/peakfit.py +++ b/icefit/peakfit.py @@ -6,8 +6,12 @@ # - Keep all pdf functions normalized in the steering yml (norm: True), # otherwise fit stability problems and uncertainty estimation is not consistent. # -# - Use 'chi2' loss if using weighted event histograms (either MC or data) -# Use 'nll' for unweighted Poisson count histograms (both MC and data) +# - Use 'chi2' or 'huber' loss if using weighted event histograms (either MC or data) +# Use 'nll' for unweighted Poisson count histograms +# and a weighted count histograms via a scale transform (experimental) +# +# - For different fit types see: /docs/pdf/peakfit.pdf +# # # Run with: python icefit/peakfit.py --analyze --group (--test_mode) # @@ -34,7 +38,7 @@ import ray -__VERSION__ = 0.02 +__VERSION__ = 0.03 __AUTHOR__ = 'm.mieskolainen@imperial.ac.uk' # ======================================================================== diff --git a/tests/test_peakfit.sh b/tests/test_peakfit.sh new file mode 100644 index 00000000..5f5c8f96 --- /dev/null +++ b/tests/test_peakfit.sh @@ -0,0 +1,11 @@ +#!/bin/sh +# +# Test peakfit + +git clone https://github.com/mieskolainen/actions-stash.git + +for L in chi2 huber nll; do + for T in single dual dual-unitary-I dual-unitary-II; do + python icefit/peakfit.py --num_cpus 1 --test_mode --analyze --group --loss_type "$L" --fit_type "$T" --output_name "test_${L}_${T}" > "peakfit_${L}_${T}.log" + done +done