float64 precision update; template for icezee

mieskolainen · May 5, 2024 · 408a4ac · 408a4ac
1 parent 70f192c
commit 408a4ac
Show file tree

Hide file tree

Showing 18 changed files with 671 additions and 20 deletions.
diff --git a/analysis/_icepaths_.py b/analysis/_icepaths_.py
@@ -10,6 +10,7 @@
 		 '/iceid/',
 		 '/icefit/',
 		 '/icebrem/',
+		 '/icezee/'
         ]
 
 for p in paths:

diff --git a/analysis/zee.py b/analysis/zee.py
@@ -0,0 +1,19 @@
+# Zee steering code
+# 
+# [email protected], 2024
+
+import sys
+sys.path.append(".")
+
+# Configure plotting backend
+import matplotlib
+matplotlib.use('Agg')
+
+from icenet.tools import process
+from icezee import common
+
+def main():
+    args = process.generic_flow(rootname='zee', func_loader=common.load_root_file, func_factor=common.splitfactor)
+
+if __name__ == '__main__' :
+    main()
diff --git a/configs/zee/__init__.py b/configs/zee/__init__.py
@@ -0,0 +1,3 @@
+#
+#
+#
diff --git a/configs/zee/cuts.py b/configs/zee/cuts.py
@@ -0,0 +1,15 @@
+# Basic kinematic fiducial cuts, use only variables available in real data.
+#
+# [email protected], 2024
+
+import numpy as np
+import numba
+import matplotlib.pyplot as plt
+
+from icenet.tools import stx
+
+
+def cut_nocut(X, ids, isMC, xcorr_flow=False):
+    """ No cuts """
+    return np.ones(X.shape[0], dtype=np.bool_) # # Note datatype np.bool_
+
diff --git a/configs/zee/filter.py b/configs/zee/filter.py
@@ -0,0 +1,15 @@
+# Data filtering rules
+#
+# Note! Physics observable (fiducial / kinematic) cuts are defined in cuts.py, not here.
+#
+# [email protected], 2024
+
+import numpy as np
+import numba
+
+from icenet.tools import stx
+
+
+def filter_nofilter(X, ids, isMC, xcorr_flow=False):
+    """ All pass """
+    return np.ones(X.shape[0], dtype=np.bool_) # Note datatype np.bool_
diff --git a/configs/zee/models.yml b/configs/zee/models.yml
@@ -0,0 +1,99 @@
+## MVA models
+
+# XGBoost
+# https://xgboost.readthedocs.io/en/latest/parameter.html
+xgb0:
+  train:   'xgb'
+  predict: 'xgb_logistic'
+  label:   'XGB'
+  raytune:  xgb_trial_0
+
+  # ** Custom set of variables **
+  #include_MVA_vars: ['.*']
+  #exclude_MVA_vars: ['.*']
+
+  # booster parameters
+  model_param:
+    num_boost_round: 200       # number of epochs (equal to the number of trees!)
+
+    booster: 'gbtree'         # 'gbtree' (default), 'dart' (dropout boosting)
+    tree_method: 'hist'
+    device:      'auto'       # 'auto', 'cpu', 'cuda'
+
+    learning_rate: 0.1
+    gamma: 1.67
+    max_depth: 10
+    min_child_weight: 1.0
+    max_delta_step: 1
+    subsample: 1
+
+    colsample_bytree:  0.86
+    colsample_bylevel: 0.6
+    colsample_bynode:  0.8
+
+    reg_lambda: 1.0            # L2 regularization
+    reg_alpha: 0.05            # L1 regularization
+
+    # learning task parameters
+    objective: 'custom:binary_cross_entropy' # Note that 'multi:softprob' does not work with distillation
+    eval_metric: ['custom']                  # for custom losses, otherwise 'logloss', 'mlogloss' ...
+
+  # BCE loss domains
+  BCE_param:
+    main:
+      classes: [0,1]
+      beta: 1.0
+      #set_filter: *MAIN_DOMAIN_FILTER # Comment out for 'inclusive'
+
+  plot_trees: False
+
+  # Read/Write of epochs
+  savemode: 'all'              # 'all', 'latest'
+  readmode: -1                 # -1 is the last saved epoch
+
+
+# Deep MLP
+dmlp0:
+  train:   'torch_generic'
+  predict: 'torch_vector'
+  label:   'DMLP'
+  raytune:  null
+
+  # ** Custom set of variables **
+  #include_MVA_vars: ['.*']
+  #exclude_MVA_vars: ['.*']
+
+  # Model
+  conv_type: 'dmlp'
+  model_param:
+    mlp_dim: [32, 32]     # hidden layer dimensions
+    activation: 'relu'
+    batch_norm: False
+    dropout: 0.01
+
+  # Optimization
+  opt_param:  
+    lossfunc: 'cross_entropy'  # cross_entropy, focal_entropy, logit_norm_cross_entropy
+    gamma: 2                   # focal_entropy exponent
+    temperature: 1             # logit norm temperature
+
+    optimizer: 'AdamW'
+    clip_norm: 1.0
+
+    epochs: 150
+    batch_size: 256
+    lr: 3.0e-4
+    weight_decay: 0.00001      # L2-regularization
+
+  # Scheduler
+  scheduler_param:
+    step_size: 250             # Number of epochs for drop
+    gamma: 0.1
+
+  device: 'auto'               # alternative 'cpu:0', 'cuda:0'
+  num_workers: 4
+
+  # Read/Write of epochs
+  savemode: 'all'              # 'all', 'latest'
+  readmode: -1                 # -1 is the last saved epoch
+
diff --git a/configs/zee/mvavars.py b/configs/zee/mvavars.py
@@ -0,0 +1,25 @@
+
+KINEMATIC_VARS = [
+  'probe_eta',
+  'probe_pt',
+  'fixedGridRhoAll'  
+]
+
+MVA_SCALAR_VARS = [
+    'probe_sieie',
+    'probe_sieip',
+    'probe_s4',
+    'probe_r9',
+    'probe_pfChargedIsoWorstVtx',
+    'probe_esEnergyOverRawE',
+    'probe_esEffSigmaRR',
+    'probe_ecalPFClusterIso',
+    'probe_phiWidth',
+    'probe_etaWidth',
+    'probe_trkSumPtHollowConeDR03',
+    'probe_trkSumPtSolidConeDR04',
+    #'probe_pfChargedIso', # not found in data
+]
+
+LOAD_VARS = KINEMATIC_VARS + MVA_SCALAR_VARS
+
diff --git a/configs/zee/plots.yml b/configs/zee/plots.yml
@@ -0,0 +1,74 @@
+# Plot steering
+
+# -----------------------------------------------------------------------
+
+basic:
+  active: True
+  nbins:  70
+  percentile_range: [0.5, 99.5]
+  exclude_vals: [null, -999]
+  plot_unweighted: True
+
+corrmat:
+  active: false
+
+contours:
+  active: false
+
+ROC:
+  active: true
+  num_bootstrap: 200
+  xmin: 1.0E-4
+  #set_filter: *FINAL_STATE_FILTER
+
+
+## Binned ROC plots can be 1D or 2D (powerset filtering not supported here)
+ROC_binned:
+  active: false
+  num_bootstrap: 200
+  xmin: 1.0E-4
+
+  #plot[0]:
+  #  var:   ['x_hlt_pt']
+  #  edges: [4.0, 6, 8.0, 10.0, 12.0, 15.0, 10000]
+
+  #plot[1]:
+  #  var:   ['x_hlt_eta', 'x_hlt_pt']
+  #  edges: [[-1.5, -1.15, -0.75, 0.0, 0.75, 1.15, 1.5],
+  #        [4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 15.0, 10000]]
+
+## MVA output density (1D)
+MVA_output:
+  active: true
+  edges: 80
+  #set_filter: *FINAL_STATE_FILTER
+
+## (MVA output x external variable) density (2D)
+# Set filter can be applied only per one plot[i] identifier!
+MVA_2D:
+  active: false
+
+  plot[0]:
+    var:   ['tagger_score']
+    edges: [{'nbin': 50, 'q': [0.0001, 0.9999],  'space': 'linear'},
+            {'nbin': 50, 'minmax': [0.0, 1.0],   'space': 'linear'}]
+    density: True
+
+    #set_filter: *FINAL_STATE_FILTER
+
+    # -----------------------------
+    # Powerset correlation plot parameters
+    xlim:
+      # For each class [[lower, upper], ... [lower, upper]
+      pearson:     [[-0.15, 0.30], [-0.15, 0.30]]
+      abs_pearson: [[0.0, 0.30],   [0.0, 0.30]]
+      disco:       [[0.0, 0.30],   [0.0, 0.30]]
+      MI:          [[0.0, 0.12],   [0.0, 0.12]]
+    # -----------------------------
+
+  #plot[1]:
+  #  var:   ['.?hlt_pms2.?'] # RegExp supported
+  #  edges: [{'nbin': 50, 'minmax': [0.0, 1.0], 'space': 'linear'},
+  #          {'nbin': 50, 'q': [0.0, 0.95],     'space': 'log10'}]
+  #  density: True
+
diff --git a/configs/zee/raytune.yml b/configs/zee/raytune.yml
@@ -0,0 +1,20 @@
+param:
+
+  #active:         ['xgb1']
+  active:  [null]
+  num_samples:    10  # Trial count parameter
+
+
+setup:
+
+  xgb_trial_0:
+    search_algo: 'HyperOpt'
+
+    search_metric:
+      metric: 'AUC'
+      mode: 'max'
+
+    param:
+
+      num_boost_round:
+        type: "tune.randint(20, 300)"
-Original file line number
+Diff line change
@@ -0,0 +1,3 @@
+    #
+    #
+    #