diff --git a/configs/peakfit/tune0.yml b/configs/peakfit/tune0.yml index 5cebbacf..a624b189 100644 --- a/configs/peakfit/tune0.yml +++ b/configs/peakfit/tune0.yml @@ -76,13 +76,13 @@ fit: techno: rng_seed: 1234 - loss_type: 'chi2' # 'chi2', 'huber', 'nll' + loss_type: 'nll' # 'chi2', 'huber', 'nll' use_limits: True # Use parameter bounds zerobin: 0.1 # Minimum number of counts in a valid bin huber_delta: 1.0 # 'huber' loss threshold parameter - # Use enough calls / iterations (otherwise can get stuck in bad local minima) + # Use enough 'trials' (otherwise can get stuck in bad local minima) ncall_scipy_simplex: 0 # Number of scipy search calls ncall_mystic_diffev2: 1 # Number of Mystic solver calls @@ -97,7 +97,8 @@ techno: min_ndof: 1 # Minimum number of d.o.f required max_chi2: 1200 # Maximum chi2/ndf for a succesful fit min_count: 5 # Minimum number of total entries (sum of bin content) in the histogram - max_trials: 3 # Number of restarts + trials: 10 # Number of random restarts (all are executed and inspected, the best is kept) + rand_sigma: 0.5 # Random perturbation scale for initial values per trial set_to_nan: True # Set parameters after the fit to NaN if not passing max_chi2 or min_count diff --git a/configs/peakfit/tune2.yml b/configs/peakfit/tune2.yml index 6f3d422b..8f645012 100644 --- a/configs/peakfit/tune2.yml +++ b/configs/peakfit/tune2.yml @@ -67,13 +67,13 @@ fit: techno: rng_seed: 1234 - loss_type: 'chi2' # 'chi2', 'huber', 'nll' + loss_type: 'nll' # 'chi2', 'huber', 'nll' use_limits: True # Use parameter bounds zerobin: 0.1 # Minimum number of counts in a valid bin huber_delta: 1.0 # 'huber' loss threshold parameter - # Use enough calls / iterations (otherwise can get stuck in bad local minima) + # Use enough 'trials' (otherwise can get stuck in bad local minima) ncall_scipy_simplex: 0 # Number of scipy search calls ncall_mystic_diffev2: 1 # Number of Mystic solver calls @@ -83,16 +83,17 @@ techno: ncall_minuit_simplex: 0 # Number of Nelder-Mead simplex search calls ncall_minuit_gradient: 10000 # Number of Minuit gradient search calls - minos: True # If True, Minuit MINOS uncertainties (slower but best), otherwise HESSE + minos: True # If True, Minuit MINOS uncertainties (slower but best), otherwise HESSE - min_ndof: 1 # Minimum number of d.o.f required - max_chi2: 1200 # Maximum chi2/ndf for a succesful fit - min_count: 5 # Minimum number of total entries (sum of bin content) in the histogram - max_trials: 3 # Number of restarts - - set_to_nan: True # Set parameters after the fit to NaN if not passing max_chi2 or min_count + min_ndof: 1 # Minimum number of d.o.f required + max_chi2: 1200 # Maximum chi2/ndf for a succesful fit + min_count: 5 # Minimum number of total entries (sum of bin content) in the histogram + trials: 10 # Number of random restarts (all are executed and inspected, the best is kept) + rand_sigma: 0.5 # Random perturbation scale for initial values per trial + + set_to_nan: True # Set parameters after the fit to NaN if not passing max_chi2 or min_count - strategy: 1 # Default 1 - tol: 0.1 # Default 0.1, https://iminuit.readthedocs.io/en/stable/reference.html#iminuit.Minuit.tol + strategy: 1 # Default 1 + tol: 0.1 # Default 0.1, https://iminuit.readthedocs.io/en/stable/reference.html#iminuit.Minuit.tol - cov_eps: 0.0 # Covariance matrix post-regularization (added to diagonal) (set to 0 for none, can bias!) + cov_eps: 0.0 # Covariance matrix post-regularization (added to diagonal) (set to 0 for none, can bias!) diff --git a/icefit/icepeak.py b/icefit/icepeak.py index c6de805b..c584277f 100644 --- a/icefit/icepeak.py +++ b/icefit/icepeak.py @@ -413,14 +413,15 @@ def make_positive_semi_definite(cov_matrix: np.ndarray, epsilon=1e-6): Adjusted covariance matrix """ - new_cov = np.array(cov_matrix) - min_eigenvalue = np.min(np.linalg.eigvalsh(new_cov)) + # Check eigenvalues of the matrix + min_eigenvalue = np.min(np.linalg.eigvalsh(cov_matrix)) - if min_eigenvalue < 0: - new_cov -= min_eigenvalue * np.eye(new_cov.shape[0]) - new_cov += epsilon * np.eye(new_cov.shape[0]) + # Only adjust if the matrix has negative eigenvalues or if we need to add epsilon + if min_eigenvalue < 0 or epsilon > 0: + adjustment = max(0, -min_eigenvalue + epsilon) + cov_matrix = cov_matrix + adjustment * np.eye(cov_matrix.shape[0]) - return new_cov + return cov_matrix def get_ndf(fitbin_mask: np.ndarray, par: np.ndarray, fit_type: str): """ @@ -485,7 +486,6 @@ def binned_1D_fit(hist: dict, param: dict, fitfunc: dict, techno: dict, par_fixe Returns: par, cov, var2pos """ - print(__name__ + f'.binned_1D_fit:') h = {} for key in hist.keys(): @@ -595,57 +595,68 @@ def poiss_nll_loss(par): return nll # -------------------------------------------------------------------- + loss_type = techno['loss_type'] cprint(__name__ + f".binned_1D_fit: Executing fit with loss_type: '{loss_type}'", 'magenta') if loss_type == 'chi2': - loss = chi2_loss + lossfunc = chi2_loss elif loss_type == 'huber': - loss = huber_loss + lossfunc = huber_loss elif loss_type == 'nll': - loss = poiss_nll_loss + lossfunc = poiss_nll_loss else: raise Exception(f'Unknown loss_type chosen <{loss_type}>') + # -------------------------------------------------------------------- + # Optimization loop + + best_m1 = None + best_loss = 1e20 + best_trial = None trials = 0 - - while True: + + while trials < techno['trials']: # Execute optimizers - m1 = optimizer_execute(trials=trials, loss=loss, param=param, techno=techno) - - ### Collect output - par = m1.values - cov = m1.covariance - var2pos = m1.var2pos - chi2 = chi2_loss(par) - - ndof = 0 - for key in fitbin_mask.keys(): - ndof += np.sum(fitbin_mask[key]) - ndof -= len(par) # num_data - num_param + m1 = optimizer_execute(trials=trials, loss=lossfunc, param=param, techno=techno) + + if (m1.fval < best_loss) or (trials == 0): + best_m1 = copy.deepcopy(m1) + best_loss = m1.fval + best_trial = trials trials += 1 - - if (chi2 / ndof < techno['max_chi2']): - break - - # Could improve this logic (save the best trial if all are ~ weak, recover that) - elif trials == techno['max_trials']: - break - print(f'Parameters: {par}') - print(f'Covariance: {cov}') - # -------------------------------------------------------------------- - # Improve numerical stability + # Collect values + + var2pos = best_m1.var2pos + par = best_m1.values + cov = best_m1.covariance + chi2 = chi2_loss(par) + + # Calculate total DOF over each histogram (handles both single and dual fits) + ndof = sum(np.sum(mask) for mask in fitbin_mask.values()) - len(par) + # ------------------------------- + + print('') + cprint(f'Best trial number: {best_trial} | Loss: {best_loss:0.3E}') + cprint(f'Valid minimum: {best_m1.valid} | Accurate covariance: {best_m1.accurate}', 'magenta') + print('') + cprint(f'Parameters: {par}') + cprint(f'Covariance: {cov}') + + # -------------------------------------------------------------------- + # Improve numerical stability of the covariance matrix + if cov is not None and techno['cov_eps'] > 0: cov = make_positive_semi_definite(cov, epsilon=techno['cov_eps']) # -------------------------------------------------------------------- - ## Inspect special edge cases + # Inspect special edge cases par, cov = edge_cases(par=par, cov=cov, techno=techno, num_counts_in_fit=num_counts_in_fit, chi2=chi2, ndof=ndof) @@ -696,13 +707,16 @@ def optimizer_execute(trials, loss, param, techno): Optimizer execution wrapper """ + cprint(__name__ + f'.optimizer_execute: Optimization Trial {trials}', 'red') + if trials == 0: start_values = param['start_values'] else: # Randomly perturb around the default starting point and clip start_values = param['start_values'] for i in range(len(start_values)): - start_values[i] = np.clip(start_values[i] + 0.2 * start_values[i] * np.random.randn(), param['limits'][i][0], param['limits'][i][1]) + start_values[i] = np.clip(start_values[i] + techno['rand_sigma'] * np.random.randn(), + param['limits'][i][0], param['limits'][i][1]) # ------------------------------------------------------------ # Nelder-Mead search from scipy @@ -725,15 +739,13 @@ def optimizer_execute(trials, loss, param, techno): # Differential evolution 2 solver if techno['ncall_mystic_diffev2'] > 0: - x0 = start_values - start_values = diffev2(loss, x0=x0, bounds=bounds) + start_values = diffev2(loss, x0=start_values, bounds=bounds) cprint(f'Mystic diffev2 solution: {start_values}', 'green') # Fmin-Powell solver if techno['ncall_mystic_fmin_powell'] > 0: - x0 = start_values - start_values = fmin_powell(loss, x0=x0, bounds=bounds) + start_values = fmin_powell(loss, x0=start_values, bounds=bounds) cprint(f'Mystic fmin_powell solution: {start_values}', 'green') # -------------------------------------------------------------------- @@ -776,19 +788,6 @@ def optimizer_execute(trials, loss, param, techno): m1.simplex(ncall=techno['ncall_minuit_simplex']) print(m1.fmin) - # -------------------------------------------------------------------- - # Raytune - """ - values = m1.values - param_new = copy.deepcopy(param) - - for i in range(len(param_new['start_values'])): - param_new['limits'][i] = [values[i]-1, values[i]+1] - - out = raytune_main(param=param_new, loss_func=loss) - """ - # -------------------------------------------------------------------- - # Minuit Gradient search m1.migrad(ncall=techno['ncall_minuit_gradient']) print(m1.fmin) @@ -844,10 +843,6 @@ def analyze_1D_fit(hist, param: dict, techno: dict, fitfunc, # -------------------------------------------------------------------- ## Create fit functions - # Samples on x-axis between [first_edge, ..., last_edge] - #index = np.where(range_mask)[0] - #x = np.linspace(np.min(bin_edges[index]), np.max(bin_edges[index+1]), int(nsamples)) - # Samples on x-axis between [fit central value, ..., last central value] # ** This should be consistent with fitfunc trapz normalization ** x = np.linspace(np.min(cbins[range_mask]), np.max(cbins[range_mask]), int(nsamples)) @@ -1480,94 +1475,3 @@ def fitfunc_fail(x, par, par_fixed=None, components=['S', 'B']): else: raise Exception('get_fit_functions: Unknown fit_type chosen') - - -""" -# Raytune -from ray import tune -from ray.tune.suggest.hyperopt import HyperOptSearch -from ray.tune import CLIReporter -from ray.tune.schedulers import ASHAScheduler -from functools import partial -import multiprocessing -import torch -""" - -""" -def raytune_main(param, loss_func=None, inputs={}, num_samples=20, max_num_epochs=20): - # - # Raytune mainloop - # - def raytune_loss(p, **args): - # - # Loss Wrapper - # - par_arr = np.zeros(len(param['name'])) - i = 0 - for key in param['name']: - par_arr[i] = p[key] - i += 1 - - loss = loss_func(par_arr) - - yield {'loss': loss} - - - ### Construct hyperparameter config (setup) from yaml - config = {} - i = 0 - for key in param['name']: - config[key] = tune.uniform(param['limits'][i][0], param['limits'][i][1]) - i += 1 - - - # Raytune basic metrics - reporter = CLIReporter(metric_columns = ["loss", "training_iteration"]) - - # Raytune search algorithm - metric = 'loss' - mode = 'min' - - # Hyperopt Bayesian / - search_alg = HyperOptSearch(metric=metric, mode=mode) - - # Raytune scheduler - scheduler = ASHAScheduler( - metric = metric, - mode = mode, - max_t = max_num_epochs, - grace_period = 1, - reduction_factor = 2) - - # Raytune main setup - analysis = tune.run( - partial(raytune_loss, **inputs), - search_alg = search_alg, - resources_per_trial = {"cpu": multiprocessing.cpu_count(), "gpu": 1 if torch.cuda.is_available() else 0}, - config = config, - num_samples = num_samples, - scheduler = scheduler, - progress_reporter = reporter) - - # Get the best config - best_trial = analysis.get_best_trial(metric=metric, mode=mode, scope="last") - - print(f'raytune: Best trial config: {best_trial.config}', 'green') - print(f'raytune: Best trial final validation loss: {best_trial.last_result["loss"]}', 'green') - #cprint(f'raytune: Best trial final validation chi2: {best_trial.last_result["chi2"]}', 'green') - - # Get the best config - config = best_trial.config - - # Load the optimal values for the given hyperparameters - optimal_param = np.zeros(len(param['name'])) - i = 0 - for key in param['name']: - optimal_param[i] = config[key] - i += 1 - - print('Best parameters:') - print(optimal_param) - - return optimal_param -""" diff --git a/icefit/peakfit.py b/icefit/peakfit.py index a5eb5432..2f9b146e 100644 --- a/icefit/peakfit.py +++ b/icefit/peakfit.py @@ -38,7 +38,7 @@ import ray -__VERSION__ = 0.03 +__VERSION__ = 0.04 __AUTHOR__ = 'm.mieskolainen@imperial.ac.uk' # ======================================================================== diff --git a/tests/test_peakfit.sh b/tests/test_peakfit.sh index 5f5c8f96..90955f8a 100644 --- a/tests/test_peakfit.sh +++ b/tests/test_peakfit.sh @@ -5,7 +5,7 @@ git clone https://github.com/mieskolainen/actions-stash.git for L in chi2 huber nll; do - for T in single dual dual-unitary-I dual-unitary-II; do + for T in single dual ; do python icefit/peakfit.py --num_cpus 1 --test_mode --analyze --group --loss_type "$L" --fit_type "$T" --output_name "test_${L}_${T}" > "peakfit_${L}_${T}.log" done done