From 689d770aae256df330d7dcfb87a71abc2661282d Mon Sep 17 00:00:00 2001 From: Mikael Mieskolainen <mikael.mieskolainen@cern.ch> Date: Fri, 12 Apr 2024 00:41:05 +0100 Subject: [PATCH] update plotting --- icefit/lognormal.py | 207 +++++++++++++++++++++++++------------------- iceplot/iceplot.py | 21 +++-- 2 files changed, 133 insertions(+), 95 deletions(-) diff --git a/icefit/lognormal.py b/icefit/lognormal.py index 3caed01b..ff36a353 100644 --- a/icefit/lognormal.py +++ b/icefit/lognormal.py @@ -1,15 +1,14 @@ # Log-normal density parameters & generation # -# Run tests with: pytest lognormal.py -rP +# Run with: python lognormal.py # -# m.mieskolainen@imperial.ac.uk, 2023 +# m.mieskolainen@imperial.ac.uk, 2024 import numpy as np import matplotlib.pyplot as plt -from scipy import optimize +from scipy.optimize import fsolve - -def lognormal_param(m, v, inmode='mean', verbose=False): +def lognormal_param(m, v, match_mode='mean', verbose=True): """ Compute log-normal distribution \mu and \sigma parameters from the target mean (or median) m and variance v @@ -20,40 +19,47 @@ def lognormal_param(m, v, inmode='mean', verbose=False): Args: m : mean (or median or mode) value of the target log-normal pdf v : variance value of the target log-normal pdf - inmode : input m as 'mean', 'median', or 'mode' + match_mode : input m as 'mean', 'median', or 'mode' """ - if inmode == 'mean': + if match_mode == 'mean': """ CLOSED FORM SOLUTION """ mu = np.log(m**2/np.sqrt(v + m**2)) sigma = np.sqrt(np.log(v/m**2 + 1)) - elif inmode == 'median': + elif match_mode == 'median': """ CLOSED FORM SOLUTION """ mu = np.log(m) sigma = np.sqrt(np.log((np.sqrt(m**2 + 4*v)/m + 1)/2)) - elif inmode == 'mode': + elif match_mode == 'mode': """ NUMERICAL SOLUTION """ - def fun(x): + + def mode_func(x): d = np.array([m - np.exp(x[0] - x[1]), v - (np.exp(x[1]) - 1)*np.exp(2*x[0] + x[1])]) - return np.sum(d**2) - - sol = optimize.minimize(fun=fun, x0=[1, 1], method='nelder-mead') - mu = sol['x'][0] - sigma = np.sqrt(sol['x'][1]) - + return d + + init_guess = (np.log(m), np.log(1 + np.sqrt(v)/m)) + sol = fsolve(mode_func, init_guess) + mu = sol[0] + sigma = np.sqrt(sol[1]) + + elif match_mode == 'geometric': + """ CLOSED FORM SOLUTION """ + mu = np.log(m) # m is median and also geometric mean + sigma = np.log(1 + np.sqrt(v)/m) # from log(s_G), where s_G = exp(STD(log(X))) + else: - raise Except('lognormal_param: Unknown inmode') + raise Exception(f'lognormal_param: Unknown match_mode {match_mode}') if verbose: print(f'lognormal_param:') - print(f' - Input: m = {m:0.5f}, v = {v:0.5f}, target = {inmode}') - print(f' - Obtained parameters: mu = {mu:0.5f}, sigma = {sigma:0.5f}') - + print(f' - Input: m = {m:0.3f}, v = {v:0.3f}, sqrt[v] = {np.sqrt(v):0.3f}, sqrt[v] / m = {np.sqrt(v)/m:0.3f}, target mode = {match_mode}') + print(f' - Obtained parameters: mu = {mu:0.3f}, sigma = {sigma:0.3f}, exp(sigma) = {np.exp(sigma):0.3f}') + return mu, sigma -def rand_lognormal(m, v, N, inmode='mean'): +def rand_lognormal(m, v, N, match_mode='mean'): """ Generate random numbers from log-normal density with the distribution target mean (or median) m and variance v @@ -62,131 +68,158 @@ def rand_lognormal(m, v, N, inmode='mean'): m : mean (or median or mode) value of the target log-normal pdf v : variance value of the target log-normal pdf N : number of samples - inmode : input m as 'mean', 'median', or 'mode' + match_mode : input m as 'mean', 'median', or 'mode' """ - mu,sigma = lognormal_param(m=m, v=v, inmode=inmode) + mu,sigma = lognormal_param(m=m, v=v, match_mode=match_mode) # Standard transform y = np.exp(mu + sigma * np.random.randn(N)) return y - -def rand_powexp(b, sigma, N): +def rand_powexp(m, std, N): """ - Power+exp type parametrization (positive definite) + Power+exp type parametrization for median matching Args: - sigma : standard deviation + m : median target + std : standard deviation target N : number of samples """ theta = np.random.randn(N) # theta ~ exp(-0.5 * sigma^2) - return b*(1 + sigma)**theta + return m*(1 + std/m)**theta + +def create_label(X): + mean = np.mean(X) + med = np.median(X) + std = np.std(X) + gsd = np.exp(np.std(np.log(X))) # Geometric standard deviation + + # Find histogram mode + bins = np.linspace(0, 5*med, 300) + counts, xval = np.histogram(X, bins) + mode = xval[np.argmax(counts)] + return f'(mean,med,mode) = ({mean:0.1f}, {med:0.1f}, {mode:0.1f}), STD[X] = {std:0.2f}, GSD[X] = {gsd:0.2f}, std/mean = {std/mean:0.1f}, std/med = {std/med:0.1f}, std/mode = {std/mode:0.1f}' def test_lognormal(): import pytest - N = int(1e6) - + N = int(1e7) + # Nominal value - b0 = 1.5 + m0 = 1.5 # Different relative uncertainties - delta_val = np.array([0.1, 0.4, 1.5]) - + delta_val = np.array([0.1, 0.4, 0.8, 1.6]) + # Different input modes - modes = ['mean', 'median', 'mode'] - - fig, ax = plt.subplots(nrows=len(delta_val), ncols=len(modes), figsize=(14,9)) + modes = ['mean', 'median', 'mode', 'geometric'] + symbol = ['\\bar{{m}}', 'm', '\\tilde{{m}}', 'm'] + + fig, ax = plt.subplots(nrows=len(delta_val), ncols=len(modes), figsize=(20,15)) for i in range(len(delta_val)): - delta = delta_val[i] + + # log-normal pdf targets + delta = delta_val[i] + std = delta*m0 + + # 2. Generate power-approx formula distributed variables + Y = rand_powexp(m=m0, std=std, N=N) + Y_label = create_label(Y) + for j in range(len(modes)): # 1. Generate log-normally distributed variables - sigma = delta*b0 - b = rand_lognormal(m=b0, v=sigma**2, N=N, inmode=modes[j]) - - # Computete statistics - mean = np.mean(b) - med = np.median(b) - std = np.std(b) - # Find mode - bins = np.linspace(0, 5*b0, 300) - counts, xval = np.histogram(b, bins) - mode = xval[np.argmax(counts)] - - label = f'b_0 = {b0:0.1f}, \delta = {delta:0.2f} :: (mean,med,mode) = ({mean:0.2f}, {med:0.2f}, {mode:0.2f}), std = {std:0.2f}, std/mean = {std/mean:0.2f}, std/med = {std/med:0.2f}, std/mode = {std/mode:0.2f}' - print(f'{label} \n') + X = rand_lognormal(m=m0, v=std**2, N=N, match_mode=modes[j]) + X_label = create_label(X) - if (i == 0): - ax[i,j].set_title(f'<{modes[j]}> as the target $m$') + # Compute sample point statistics + print(f'** {modes[j]} matching: m_0 = {m0:0.1f}, \delta = {delta:0.2f} **') + print(f'{X_label} \n') - # 2. Generate power-exp distributed variables - c = rand_powexp(b=b0, sigma=sigma, N=N) + title = f'$({symbol[j]} = {m0:0.1f}, \sqrt{{v}}/{symbol[j]} = {delta:0.1f})$' + ax[i,j].set_title(f'<{modes[j]}> matching: {title}', fontsize=9) - ax[i,j].hist(b, bins, label=f'exact log-N: $m = {b0:0.2f}, \\sqrt{{v}} = {delta:0.2f}$', histtype='step') - ax[i,j].hist(c, bins, label=f'approx power: $s = {delta:0.2f}$', histtype='step') + # --------------- - ax[i,j].legend(fontsize=9, loc='lower right') - ax[i,j].set_xlim([0, b0*3]) - ax[i,j].set_xlabel('$x$') + bins = np.linspace(0, 5*np.median(X), 200) + + ax[i,j].hist(X, bins, label=f'E: {X_label}', histtype='step', density=True) + ax[i,j].hist(Y, bins, label=f'A: {Y_label}', histtype='step', density=True) + + ax[i,j].legend(fontsize=4, loc='lower right') + ax[i,j].set_xlim([0, m0*2.5]) + ax[i,j].set_ylim([0, None]) + + if i == len(delta_val) - 1: + ax[i,j].set_xlabel('$x$') print('----------------------------------------------------------') - plt.savefig('sampling.pdf', bbox_inches='tight') - plt.show() - + figname = 'sampling.pdf' + print(f'Saving figure: {figname}') + plt.savefig(figname, bbox_inches='tight') + plt.close() def test_accuracy(): """ Test accuracy made in the power-exp approximation with median = 1 """ - def func_s(v, order=1): - - A = np.sqrt(v) + def expanded_s(v, m, order=1): + if order >= 1: + A = np.sqrt(v)/m if order >= 2: - A += v/2 + A += (1/2)*(v/m**2) if order >= 3: - A += -7*v**(3/2)/12 + A += -(7/12)*(v**(3/2)/m**3) if order >= 4: - A += -17*v**2/24 + A += -(17/24)*(v**2/m**4) if order >= 5: - A += 163*v**(5/2)/160 + A += (163/160)*(v**(5/2)/m**5) if order >= 6: - A += 1111*v**3/720 + A += (1111/720)*(v**3/m**6) if order >= 7: - A += -96487*v**(7/2)/40320 - - return A / (np.exp(np.sqrt(np.log(1/2 * (1 + np.sqrt(1 + 4*v))))) - 1) + A += -(96487/40320)*(v**(7/2)/m**7) - - v = np.linspace(1e-9, 1.5**2, 10000) + return A + + def exact_s(v, m): + return np.exp(np.sqrt(np.log(1/2 * (np.sqrt(4*v/m**2 + 1) + 1)))) - 1 + + # Fixed m value + m = 1.5 + v = np.linspace(1e-9, m*4, 10000) fig, ax = plt.subplots() - plt.plot(np.sqrt(v), np.ones(len(v)), color=(0.5,0.5,0.5), ls='--') + plt.plot(np.sqrt(v) / m, np.ones(len(v)), color=(0.5,0.5,0.5), ls='--') for i in range(1,7+1): if i % 2: - txt = f'$O(v^{{{i:.0f}/2}})$' + txt = f'$O(v^{{{i:.0f}/2}} / m^{{{i:.0f}}})$' else: - txt = f'$O(v^{{{i/2:.0f}}})$' - plt.plot(np.sqrt(v), func_s(v=v, order=i), label=f'{txt}') + txt = f'$O(v^{{{i/2:.0f}}} / m^{{{i:.0f}}})$' + plt.plot(np.sqrt(v) / m, expanded_s(v=v, m=m, order=i) / exact_s(v=v, m=m), label=f'{txt}') - plt.ylabel('[Expansion of $s$ $|_{near \\;\\; v=0}$] / Exact $s$') - plt.xlabel('$\\sqrt{v}$') + plt.ylabel('[Expansion of $s$ $|_{near \\;\\; \\sqrt{v}/m=0}$] / Exact $s$') + plt.xlabel('$\\sqrt{v} / m$') plt.legend() plt.xlim([0, 1.5]) plt.ylim([0.8, 1.5]) - plt.savefig('series_approx.pdf', bbox_inches='tight') - plt.show() - + figname = 'series_approx.pdf' + print(f'Saving figure: {figname}') + plt.savefig(figname, bbox_inches='tight') + plt.close() +def main(): + test_accuracy() + test_lognormal() + -#test_lognormal() -#test_accuracy() \ No newline at end of file +if __name__ == "__main__": + main() diff --git a/iceplot/iceplot.py b/iceplot/iceplot.py index e0a7e38d..c670e1cc 100644 --- a/iceplot/iceplot.py +++ b/iceplot/iceplot.py @@ -85,6 +85,10 @@ def __init__(self, counts = 0, errs = 0, bins = 0, cbins = 0, binscale=1.0): else: self.is_empty = False + # Compute histogram integral (piece-wise differential sum) + def integral(self): + return np.sum(self.binscale * self.counts * binwidth(self.bins)) + # + operator def __add__(self, other): @@ -100,7 +104,7 @@ def __add__(self, other): counts = self.counts + other.counts errs = np.sqrt(self.errs**2 + other.errs**2) - return hobj(counts, errs, bins, cbins, binscale) + return hobj(counts, errs, self.bins, self.cbins, binscale) # += operator def __iadd__(self, other): @@ -503,7 +507,7 @@ def histmc(mcdata, all_obs, density=False, scale=None, color=(0,0,1), label='non # Compute differential cross section within histogram range # Note that division by sum(weights) handles the histogram range integral (overflow) properly binscale = mcdata['xsection_pb'] / binwidth(bins) / np.sum(mcdata['weights']) - + # Additional scale factor if scale is not None: binscale *= scale @@ -516,6 +520,8 @@ def histmc(mcdata, all_obs, density=False, scale=None, color=(0,0,1), label='non obj[OBS] = {'hdata': hobj(counts, errs, bins, cbins, binscale), 'hfunc' : 'hist', 'color': color, 'label': label, 'style' : style} + print(f'histmc: integral = {obj[OBS]["hdata"].integral():0.2E} ({OBS})') + return obj @@ -525,13 +531,11 @@ def histhepdata(hepdata, all_obs, scale=None, density=False, MC_XS_SCALE=1E12, l obj = {} for OBS in all_obs.keys(): - - # Over all DATA files (now fixed to one) - data_obj = [] - + y = hepdata[OBS]['y'] yerr = hepdata[OBS]['y_err'] bins = hepdata[OBS]['bins'] + binwidth = hepdata[OBS]['binwidth'] cbins = hepdata[OBS]['x'] binscale = hepdata[OBS]['scale'] * MC_XS_SCALE @@ -542,14 +546,15 @@ def histhepdata(hepdata, all_obs, scale=None, density=False, MC_XS_SCALE=1E12, l # Density integral 1 over the histogram bins if density: - # note .mean(), each element is already differentially normalized - norm = hepdata[OBS]['binwidth'].mean() * y.sum() + norm = (y * binwidth).sum() y /= norm yerr /= norm binscale = 1.0 obj[OBS] = {'hdata': hobj(y, yerr, bins, cbins, binscale), 'hfunc' : 'hist', 'color': (0,0,0), 'label': label, 'style' : style} + + print(f'histhepdata: integral = {obj[OBS]["hdata"].integral():0.2E} ({OBS})') return obj