Skip to content

Commit

Permalink
Modified PostfitPlot task
Browse files Browse the repository at this point in the history
  • Loading branch information
Lara813 committed Jan 21, 2025
1 parent 490fcc1 commit 13970d5
Show file tree
Hide file tree
Showing 11 changed files with 287 additions and 148 deletions.
17 changes: 13 additions & 4 deletions hbw/config/defaults_and_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,16 @@ def set_config_defaults_and_groups(config_inst):
"vv",
"h_ggf", "h_vbf", "zh", "wh", "zh_gg", "tth",
],
"test_postfit": [
"hh_vbf_hbb_hww2l2nu_kv1_k2v1_kl1",
"hh_ggf_hbb_hww2l2nu_kl1_kt1",
"st",
"tt",
"dy",
"w_lnu",
"vv",
"h",
],
"all": ["*"],
"default": ["hh_ggf_hbb_hvv_kl1_kt1", "hh_vbf_hbb_hvv_kv1_k2v1_kl1", "tt", "dy", "st", "vv", "w_lnu", "h"], # noqa: E501
"sl": ["hh_ggf_hbb_hvv_kl1_kt1", "hh_vbf_hbb_hvv_kv1_k2v1_kl1", "tt", "qcd", "st", "dy", "vv", "w_lnu", "h"], # noqa: E501
Expand Down Expand Up @@ -303,6 +313,7 @@ def set_config_defaults_and_groups(config_inst):
"sr__2mu__1b__ml_hh_ggf_hbb_hvv2l2nu_kl1_kt1", "sr__2mu__2b__ml_hh_ggf_hbb_hvv2l2nu_kl1_kt1",
"sr__2e__1b__ml_hh_ggf_hbb_hvv2l2nu_kl1_kt1", "sr__2e__2b__ml_hh_ggf_hbb_hvv2l2nu_kl1_kt1",
"sr__emu__1b__ml_hh_ggf_hbb_hvv2l2nu_kl1_kt1", "sr__emu__2b__ml_hh_ggf_hbb_hvv2l2nu_kl1_kt1",
"sr__1b", "sr__2b",
),
"vbfSR_dl": (
"sr__1b__ml_hh_vbf_hbb_hvv2l2nu_kv1_k2v1_kl1", "sr__2b__ml_hh_vbf_hbb_hvv2l2nu_kv1_k2v1_kl1",
Expand Down Expand Up @@ -375,10 +386,8 @@ def set_config_defaults_and_groups(config_inst):
for proc, _, _ in config_inst.walk_processes() if proc.has_tag("is_signal")
},
"dilep": {
"hh_ggf_hbb_hvv2l2nu_kl0_kt1": {"scale": 10000, "unstack": True},
"hh_ggf_hbb_hvv2l2nu_kl1_kt1": {"scale": 10000, "unstack": True},
"hh_ggf_hbb_hvv2l2nu_kl2p45_kt1": {"scale": 10000, "unstack": True},
"hh_ggf_hbb_hvv2l2nu_kl5_kt1": {"scale": 10000, "unstack": True},
"hh_vbf_hbb_hww2l2nu_kv1_k2v1_kl1": {"scale": 90000, "unstack": True},
"hh_ggf_hbb_hww2l2nu_kl1_kt1": {"scale": 10000, "unstack": True},
},
"dileptest": {
"hh_ggf_hbb_hvv2l2nu_kl1_kt1": {"scale": 10000, "unstack": True},
Expand Down
2 changes: 1 addition & 1 deletion hbw/inference/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def config_variable(self: InferenceModel, config_cat_inst: od.Config):
dnn_proc = dnn_cat.replace("ml_", "")
return f"mlscore.{dnn_proc}"
else:
return "mli_mbb"
return "mli_lep_pt"

def customize_category(self: InferenceModel, cat_inst: DotDict, config_cat_inst: od.Config):
""" Function to allow customizing the inference category """
Expand Down
54 changes: 51 additions & 3 deletions hbw/inference/dl.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@
})


dl.derive("dl_ml_study_1", cls_dict={
dl_ml_study_1 = dl.derive("dl_ml_study_1", cls_dict={
"ml_model_name": "dl_22post_ml_study_1",
"config_categories": [
"sr__1b__ml_signal_ggf",
Expand Down Expand Up @@ -283,7 +283,11 @@
"systematics": rate_systematics,
})

dl.derive("dl_ml_study_3", cls_dict={
dl_ml_study_1.derive("dl_ml_study_1_handle", cls_dict={
"ml_model_name": "dl_22post_ml_study_1_handle",
})

dl_ml_study_3 = dl.derive("dl_ml_study_3", cls_dict={
"ml_model_name": "dl_22_procs1_w0",
"config_categories": [
"sr__1b__ml_hh_ggf_hbb_hvv2l2nu_kl1_kt1",
Expand Down Expand Up @@ -325,7 +329,11 @@
"systematics": rate_systematics,
})

dl.derive("dl_ml_study_2", cls_dict={
dl_ml_study_3.derive("dl_ml_study_3_handle", cls_dict={
"ml_model_name": "dl_22_procs1_w0_handle",
})

dl_ml_study_2 = dl.derive("dl_ml_study_2", cls_dict={
"ml_model_name": "dl_22post_ml_study_2",
"config_categories": [
"sr__1b__ml_signal_ggf2",
Expand Down Expand Up @@ -367,6 +375,14 @@
"systematics": rate_systematics,
})

dl_ml_study_2.derive("dl_ml_study_2_handle", cls_dict={
"ml_model_name": "dl_22post_ml_study_2_handle",
})

dl_ml_study_2.derive("dl_ml_study_2_ignore", cls_dict={
"ml_model_name": "dl_22post_ml_study_2",
})

dl.derive("dl_hww_and_hzz", cls_dict={
"processes": [
"hh_ggf_hbb_hww_kl0_kt1",
Expand Down Expand Up @@ -531,3 +547,35 @@
"systematics": rate_systematics},
)
dl.derive("dl_rates_only", cls_dict={"systematics": rate_systematics})

dl.derive("dl_postfit_test", cls_dict={
"ml_model_name": None,
"config_categories": [
"sr__1b",
"sr__2b",
],
"processes": [
# "hh_vbf_hbb_hww2l2nu_kvm0p012_k2v0p03_kl10p2",
"hh_vbf_hbb_hww2l2nu_kv1p74_k2v1p37_kl14p4",
"hh_vbf_hbb_hww2l2nu_kvm0p758_k2v1p44_klm19p3",
"hh_vbf_hbb_hww2l2nu_kvm0p012_k2v0p03_kl10p2",
"hh_vbf_hbb_hww2l2nu_kvm2p12_k2v3p87_klm5p96",
"hh_vbf_hbb_hww2l2nu_kv1_k2v1_kl1",
"hh_vbf_hbb_hww2l2nu_kv1_k2v0_kl1",
"hh_vbf_hbb_hww2l2nu_kvm0p962_k2v0p959_klm1p43",
"hh_vbf_hbb_hww2l2nu_kvm1p21_k2v1p94_klm0p94",
"hh_vbf_hbb_hww2l2nu_kvm1p6_k2v2p72_klm1p36",
"hh_vbf_hbb_hww2l2nu_kvm1p83_k2v3p57_klm3p39",
"hh_ggf_hbb_hww2l2nu_kl0_kt1",
"hh_ggf_hbb_hww2l2nu_kl1_kt1",
"hh_ggf_hbb_hww2l2nu_kl2p45_kt1",
"hh_ggf_hbb_hww2l2nu_kl5_kt1",
"st",
"tt",
"dy",
"w_lnu",
"vv",
"h_ggf", "h_vbf", "zh", "wh", "zh_gg", "tth",
],
"systematics": rate_systematics,
})
46 changes: 40 additions & 6 deletions hbw/ml/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,11 @@ def __init__(
"""
self._ml_model_inst = ml_model_inst
self._process = process
self._skip_mask = skip_mask

proc_mask, _ = get_proc_mask(events, process, ml_model_inst.config_inst)
self._stats = stats
# __import__("IPython").embed()
# del_sub_proc_stats(process, sub_id)
if not skip_mask:
self._events = events[proc_mask]
Expand Down Expand Up @@ -177,6 +179,10 @@ def parameters(self):
}
return self._parameters

@property
def skip_mask(self):
return self._skip_mask

@property
def ml_model_inst(self):
return self._ml_model_inst
Expand Down Expand Up @@ -255,6 +261,14 @@ def shuffle_indices(self) -> np.ndarray:
self._shuffle_indices = np.random.permutation(self.n_events)
return self._shuffle_indices

@property
def num_event_per_process(self) -> str:
if not self.skip_mask:
self._num_events_per_process = "num_events_pos_weights_per_process"
else:
self._num_events_per_process = "num_events_per_process"
return self._num_events_per_process

def get_xsec_train_weights(self) -> np.ndarray:
"""
Weighting such that each event has roughly the same weight,
Expand All @@ -267,10 +281,20 @@ def get_xsec_train_weights(self) -> np.ndarray:
raise Exception("cannot determine train weights without stats")

_, sub_id = get_proc_mask(self._events, self.process, self.ml_model_inst.config_inst)
sum_abs_weights = np.sum([self.stats[self.process]["sum_abs_weights_per_process"][str(id)] for id in sub_id])
num_events = np.sum([self.stats[self.process]["num_events_per_process"][str(id)] for id in sub_id])
sum_weights = np.sum([self.stats[self.process]["sum_pos_weights_per_process"][str(id)] for id in sub_id])
num_events = np.sum(
[self.stats[self.process][self.num_event_per_process][str(id)] for id in sub_id],
)
# if not self.skip_mask:
# num_events = np.sum(
# [self.stats[self.process]["num_events_pos_weights_per_process"][str(id)] for id in sub_id],
# )
# else:
# num_events = np.sum(
# [self.stats[self.process]["num_events_per_process"][str(id)] for id in sub_id],
# )

xsec_train_weights = self.weights / sum_abs_weights * num_events
xsec_train_weights = self.weights / sum_weights * num_events

return xsec_train_weights

Expand All @@ -286,7 +310,15 @@ def get_equal_train_weights(self) -> np.ndarray:

combined_proc_inst = self.ml_model_inst.config_inst.get_process(self.process)
_, sub_id_proc = get_proc_mask(self._events, self.process, self.ml_model_inst.config_inst)
num_events = np.sum([self.stats[self.process]["num_events_per_process"][str(id)] for id in sub_id_proc])
num_events = np.sum(
[self.stats[self.process][self.num_event_per_process][str(id)] for id in sub_id_proc],
)
# if not self.skip_mask:
# num_events = np.sum(
# [self.stats[self.process]["num_events_pos_weights_per_process"][str(id)] for id in sub_id_proc],
# )
# else:
# num_events = np.sum([self.stats[self.process]["num_events_per_process"][str(id)] for id in sub_id_proc])
targeted_sum_of_weights_per_process = (
num_events / len(combined_proc_inst.x.ml_config.sub_processes)
)
Expand Down Expand Up @@ -724,8 +756,10 @@ def prediction(self) -> np.ndarray:
self._prediction = self.load_data("prediction")
else:
# calcluate prediction if needed
if not hasattr(self._ml_model_inst, "trained_model"):
if not hasattr(self._ml_model_inst, "best_model"):
# if not hasattr(self._ml_model_inst, "trained_model"):
raise Exception("No trained model found in the MLModel instance. Cannot calculate prediction.")
self._prediction = predict_numpy_on_batch(self._ml_model_inst.trained_model, self.features)
# self._prediction = predict_numpy_on_batch(self._ml_model_inst.trained_model, self.features)
self._prediction = predict_numpy_on_batch(self._ml_model_inst.best_model, self.features)

return self._prediction # TODO ML best model
21 changes: 19 additions & 2 deletions hbw/ml/derived/dl.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ def setup(self):
"hh_vbf_hbb_hvv2l2nu_kvm1p6_k2v2p72_klm1p36",
"hh_vbf_hbb_hvv2l2nu_kvm1p83_k2v3p57_klm3p39",
],
"weighting": "xsec",
"weighting": "equal",
},
},
"processes": [
Expand Down Expand Up @@ -345,6 +345,7 @@ def setup(self):

dl_22post_ml_study_1 = dl_22post.derive("dl_22post_ml_study_1", cls_dict={
"training_configs": lambda self, requested_configs: ["c22post"],
"negative_weights": "ignore",
"combine_processes": {
"signal_ggf": {
# "name": "tt_and_st",
Expand Down Expand Up @@ -381,8 +382,13 @@ def setup(self):
],
})

dl_22post_ml_study_1_handle = dl_22post_ml_study_1.derive("dl_22post_ml_study_1_handle", cls_dict={
"negative_weights": "handle",
})

dl_22post_ml_study_2 = dl_22post.derive("dl_22post_ml_study_2", cls_dict={
"training_configs": lambda self, requested_configs: ["c22post"],
"negative_weights": "ignore",
"combine_processes": {
"signal_ggf2": {
# "name": "tt_and_st",
Expand All @@ -407,7 +413,7 @@ def setup(self):
"hh_vbf_hbb_hvv2l2nu_kvm1p83_k2v3p57_klm3p39",

],
"weighting": "xsec",
"weighting": "equal",
},
},
"processes": [
Expand All @@ -419,6 +425,11 @@ def setup(self):
"h",
],
})

dl_22post_ml_study_2_handle = dl_22post_ml_study_2.derive("dl_22post_ml_study_2_handle", cls_dict={
"negative_weights": "handle",
})

#
# setups with different processes (0: baseline, 1: add SM vbf + single H, 2: add SL+all HH variations)
# NOTE: we should decide which signal processes exactly to use:
Expand All @@ -435,6 +446,7 @@ def setup(self):
})
dl_22_procs1_w0 = dl_22_procs1.derive("dl_22_procs1_w0", cls_dict={
"training_configs": lambda self, requested_configs: ["c22post"],
"negative_weights": "ignore",
"ml_process_weights": {
"hh_ggf_hbb_hvv2l2nu_kl1_kt1": 1,
"hh_vbf_hbb_hvv2l2nu_kv1_k2v1_kl1": 1,
Expand All @@ -444,6 +456,11 @@ def setup(self):
"h": 1,
},
})

dl_22_procs1_w0_handle = dl_22_procs1_w0.derive("dl_22_procs1_w0_handle", cls_dict={
"negative_weights": "handle",
})

dl_22_procs1_w1 = dl_22_procs1.derive("dl_22_procs1_w1", cls_dict={
"ml_process_weights": {
"hh_ggf_hbb_hvv2l2nu_kl1_kt1": 1,
Expand Down
4 changes: 2 additions & 2 deletions hbw/ml/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def prepare_ml_model(
import tensorflow.keras as keras
from keras.models import Sequential
from keras.layers import Dense, BatchNormalization
# from hbw.ml.tf_util import cumulated_crossentropy, categorical_crossentropy
from hbw.ml.tf_util import cumulated_crossentropy # , categorical_crossentropy

n_inputs = len(set(self.input_features))
n_outputs = len(self.processes)
Expand Down Expand Up @@ -110,7 +110,7 @@ def prepare_ml_model(
model.compile(
# NOTE: we'd preferrably use the Keras CCE, but it does not work when assigning one event
# to multiple classes (target with multiple entries != 0)
loss="cumulated_crossentropy",
loss=cumulated_crossentropy,
optimizer=optimizer,
metrics=["categorical_accuracy"],
weighted_metrics=["categorical_accuracy"],
Expand Down
9 changes: 5 additions & 4 deletions hbw/ml/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,16 @@ def ml_preparation(
events = set_ak_column_f32(events, "event_weight", weight)
stats["sum_weights"] += float(ak.sum(weight, axis=0))
weight_map["sum_weights"] = weight
weight_map["sum_abs_weights"] = (weight, weight > 0)
weight_map["sum_pos_weights"] = np.abs(weight)
weight_map["sum_pos_weights"] = (weight, weight > 0)
weight_map["sum_abs_weights"] = np.abs(weight)
weight_map["num_events_pos_weights"] = weight > 0

# normalization weight only
norm_weight = events["stitched_normalization_weight"]
stats["sum_norm_weights"] += float(ak.sum(norm_weight, axis=0))
weight_map["sum_norm_weights"] = norm_weight
weight_map["sum_abs_norm_weights"] = (norm_weight, norm_weight > 0)
weight_map["sum_pos_norm_weights"] = np.abs(norm_weight)
weight_map["sum_pos_norm_weights"] = (norm_weight, norm_weight > 0)
weight_map["sum_abs_norm_weights"] = np.abs(norm_weight)

group_map = {
"process": {
Expand Down
3 changes: 2 additions & 1 deletion hbw/plotting/plot_fits.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ def scalable_exponnorm(x, A, loc, scale, K=1):


def plot_fit(
hists: OrderedDict[od.Process, hist.Hist],
hists: dict[str, OrderedDict[od.Process, hist.Hist]],
# hists: OrderedDict[od.Process, hist.Hist],
config_inst: od.Config,
category_inst: od.Category,
variable_insts: list[od.Variable],
Expand Down
Loading

0 comments on commit 13970d5

Please sign in to comment.