Skip to content

Commit

Permalink
float64 precision update; template for icezee
Browse files Browse the repository at this point in the history
  • Loading branch information
mieskolainen committed May 5, 2024
1 parent 70f192c commit 408a4ac
Show file tree
Hide file tree
Showing 18 changed files with 671 additions and 20 deletions.
1 change: 1 addition & 0 deletions analysis/_icepaths_.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
'/iceid/',
'/icefit/',
'/icebrem/',
'/icezee/'
]

for p in paths:
Expand Down
19 changes: 19 additions & 0 deletions analysis/zee.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Zee steering code
#
# [email protected], 2024

import sys
sys.path.append(".")

# Configure plotting backend
import matplotlib
matplotlib.use('Agg')

from icenet.tools import process
from icezee import common

def main():
args = process.generic_flow(rootname='zee', func_loader=common.load_root_file, func_factor=common.splitfactor)

if __name__ == '__main__' :
main()
3 changes: 3 additions & 0 deletions configs/zee/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#
#
#
15 changes: 15 additions & 0 deletions configs/zee/cuts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Basic kinematic fiducial cuts, use only variables available in real data.
#
# [email protected], 2024

import numpy as np
import numba
import matplotlib.pyplot as plt

from icenet.tools import stx


def cut_nocut(X, ids, isMC, xcorr_flow=False):
""" No cuts """
return np.ones(X.shape[0], dtype=np.bool_) # # Note datatype np.bool_

15 changes: 15 additions & 0 deletions configs/zee/filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Data filtering rules
#
# Note! Physics observable (fiducial / kinematic) cuts are defined in cuts.py, not here.
#
# [email protected], 2024

import numpy as np
import numba

from icenet.tools import stx


def filter_nofilter(X, ids, isMC, xcorr_flow=False):
""" All pass """
return np.ones(X.shape[0], dtype=np.bool_) # Note datatype np.bool_
99 changes: 99 additions & 0 deletions configs/zee/models.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
## MVA models

# XGBoost
# https://xgboost.readthedocs.io/en/latest/parameter.html
xgb0:
train: 'xgb'
predict: 'xgb_logistic'
label: 'XGB'
raytune: xgb_trial_0

# ** Custom set of variables **
#include_MVA_vars: ['.*']
#exclude_MVA_vars: ['.*']

# booster parameters
model_param:
num_boost_round: 200 # number of epochs (equal to the number of trees!)

booster: 'gbtree' # 'gbtree' (default), 'dart' (dropout boosting)
tree_method: 'hist'
device: 'auto' # 'auto', 'cpu', 'cuda'

learning_rate: 0.1
gamma: 1.67
max_depth: 10
min_child_weight: 1.0
max_delta_step: 1
subsample: 1

colsample_bytree: 0.86
colsample_bylevel: 0.6
colsample_bynode: 0.8

reg_lambda: 1.0 # L2 regularization
reg_alpha: 0.05 # L1 regularization

# learning task parameters
objective: 'custom:binary_cross_entropy' # Note that 'multi:softprob' does not work with distillation
eval_metric: ['custom'] # for custom losses, otherwise 'logloss', 'mlogloss' ...

# BCE loss domains
BCE_param:
main:
classes: [0,1]
beta: 1.0
#set_filter: *MAIN_DOMAIN_FILTER # Comment out for 'inclusive'

plot_trees: False

# Read/Write of epochs
savemode: 'all' # 'all', 'latest'
readmode: -1 # -1 is the last saved epoch


# Deep MLP
dmlp0:
train: 'torch_generic'
predict: 'torch_vector'
label: 'DMLP'
raytune: null

# ** Custom set of variables **
#include_MVA_vars: ['.*']
#exclude_MVA_vars: ['.*']

# Model
conv_type: 'dmlp'
model_param:
mlp_dim: [32, 32] # hidden layer dimensions
activation: 'relu'
batch_norm: False
dropout: 0.01

# Optimization
opt_param:
lossfunc: 'cross_entropy' # cross_entropy, focal_entropy, logit_norm_cross_entropy
gamma: 2 # focal_entropy exponent
temperature: 1 # logit norm temperature

optimizer: 'AdamW'
clip_norm: 1.0

epochs: 150
batch_size: 256
lr: 3.0e-4
weight_decay: 0.00001 # L2-regularization

# Scheduler
scheduler_param:
step_size: 250 # Number of epochs for drop
gamma: 0.1

device: 'auto' # alternative 'cpu:0', 'cuda:0'
num_workers: 4

# Read/Write of epochs
savemode: 'all' # 'all', 'latest'
readmode: -1 # -1 is the last saved epoch

25 changes: 25 additions & 0 deletions configs/zee/mvavars.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@

KINEMATIC_VARS = [
'probe_eta',
'probe_pt',
'fixedGridRhoAll'
]

MVA_SCALAR_VARS = [
'probe_sieie',
'probe_sieip',
'probe_s4',
'probe_r9',
'probe_pfChargedIsoWorstVtx',
'probe_esEnergyOverRawE',
'probe_esEffSigmaRR',
'probe_ecalPFClusterIso',
'probe_phiWidth',
'probe_etaWidth',
'probe_trkSumPtHollowConeDR03',
'probe_trkSumPtSolidConeDR04',
#'probe_pfChargedIso', # not found in data
]

LOAD_VARS = KINEMATIC_VARS + MVA_SCALAR_VARS

74 changes: 74 additions & 0 deletions configs/zee/plots.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Plot steering

# -----------------------------------------------------------------------

basic:
active: True
nbins: 70
percentile_range: [0.5, 99.5]
exclude_vals: [null, -999]
plot_unweighted: True

corrmat:
active: false

contours:
active: false

ROC:
active: true
num_bootstrap: 200
xmin: 1.0E-4
#set_filter: *FINAL_STATE_FILTER


## Binned ROC plots can be 1D or 2D (powerset filtering not supported here)
ROC_binned:
active: false
num_bootstrap: 200
xmin: 1.0E-4

#plot[0]:
# var: ['x_hlt_pt']
# edges: [4.0, 6, 8.0, 10.0, 12.0, 15.0, 10000]

#plot[1]:
# var: ['x_hlt_eta', 'x_hlt_pt']
# edges: [[-1.5, -1.15, -0.75, 0.0, 0.75, 1.15, 1.5],
# [4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 15.0, 10000]]

## MVA output density (1D)
MVA_output:
active: true
edges: 80
#set_filter: *FINAL_STATE_FILTER

## (MVA output x external variable) density (2D)
# Set filter can be applied only per one plot[i] identifier!
MVA_2D:
active: false

plot[0]:
var: ['tagger_score']
edges: [{'nbin': 50, 'q': [0.0001, 0.9999], 'space': 'linear'},
{'nbin': 50, 'minmax': [0.0, 1.0], 'space': 'linear'}]
density: True

#set_filter: *FINAL_STATE_FILTER

# -----------------------------
# Powerset correlation plot parameters
xlim:
# For each class [[lower, upper], ... [lower, upper]
pearson: [[-0.15, 0.30], [-0.15, 0.30]]
abs_pearson: [[0.0, 0.30], [0.0, 0.30]]
disco: [[0.0, 0.30], [0.0, 0.30]]
MI: [[0.0, 0.12], [0.0, 0.12]]
# -----------------------------

#plot[1]:
# var: ['.?hlt_pms2.?'] # RegExp supported
# edges: [{'nbin': 50, 'minmax': [0.0, 1.0], 'space': 'linear'},
# {'nbin': 50, 'q': [0.0, 0.95], 'space': 'log10'}]
# density: True

20 changes: 20 additions & 0 deletions configs/zee/raytune.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
param:

#active: ['xgb1']
active: [null]
num_samples: 10 # Trial count parameter


setup:

xgb_trial_0:
search_algo: 'HyperOpt'

search_metric:
metric: 'AUC'
mode: 'max'

param:

num_boost_round:
type: "tune.randint(20, 300)"
Loading

0 comments on commit 408a4ac

Please sign in to comment.