diff --git a/hbw/categorization/categories.py b/hbw/categorization/categories.py
index bfde28f3..24857d7c 100644
--- a/hbw/categorization/categories.py
+++ b/hbw/categorization/categories.py
@@ -274,6 +274,7 @@ def catid_2b(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, a
 # TODO: not hard-coded -> use config?
 ml_processes = [
     "signal_ggf", "signal_ggf2", "signal_vbf", "signal_vbf2",
+    "signal_ggf4", "signal_ggf5", "signal_vbf4", "signal_vbf5",
     "hh_ggf_hbb_hvv_kl1_kt1", "hh_vbf_hbb_hvv_kv1_k2v1_kl1",
     "hh_ggf_hbb_hvvqqlnu_kl1_kt1", "hh_vbf_hbb_hvvqqlnu_kv1_k2v1_kl1",
     "hh_ggf_hbb_hvv2l2nu_kl1_kt1", "hh_vbf_hbb_hvv2l2nu_kv1_k2v1_kl1",
diff --git a/hbw/config/defaults_and_groups.py b/hbw/config/defaults_and_groups.py
index 5c9c9ce2..392b4515 100644
--- a/hbw/config/defaults_and_groups.py
+++ b/hbw/config/defaults_and_groups.py
@@ -133,6 +133,38 @@ def set_config_defaults_and_groups(config_inst):
     # process groups for conveniently looping over certain processs
     # (used in wrapper_factory and during plotting)
     config_inst.x.process_groups = {
+        "ml_study": [
+            "hh_vbf_hbb_hww2l2nu_kv1p74_k2v1p37_kl14p4",
+            "hh_vbf_hbb_hww2l2nu_kvm0p758_k2v1p44_klm19p3",
+            "hh_vbf_hbb_hww2l2nu_kvm0p012_k2v0p03_kl10p2",
+            "hh_vbf_hbb_hww2l2nu_kvm2p12_k2v3p87_klm5p96",
+            "hh_vbf_hbb_hww2l2nu_kv1_k2v1_kl1",
+            "hh_vbf_hbb_hww2l2nu_kv1_k2v0_kl1",
+            "hh_vbf_hbb_hww2l2nu_kvm0p962_k2v0p959_klm1p43",
+            "hh_vbf_hbb_hww2l2nu_kvm1p21_k2v1p94_klm0p94",
+            "hh_vbf_hbb_hww2l2nu_kvm1p6_k2v2p72_klm1p36",
+            "hh_vbf_hbb_hww2l2nu_kvm1p83_k2v3p57_klm3p39",
+            "hh_ggf_hbb_hww2l2nu_kl0_kt1",
+            "hh_ggf_hbb_hww2l2nu_kl1_kt1",
+            "hh_ggf_hbb_hww2l2nu_kl2p45_kt1",
+            "hh_ggf_hbb_hww2l2nu_kl5_kt1",
+            "st",
+            "tt",
+            "dy_m4to10", "dy_m10to50", "dy_m50toinf",
+            "w_lnu",
+            "vv",
+            "h_ggf", "h_vbf", "zh", "wh", "zh_gg", "tth",
+        ],
+        "test_postfit": [
+            # "hh_vbf_hbb_hww2l2nu",
+            "hh_ggf_hbb_hww2l2nu",
+            "st",
+            "tt",
+            "dy",
+            "w_lnu",
+            "vv",
+            "h",
+        ],
         "all": ["*"],
         "default": ["hh_ggf_hbb_hvv_kl1_kt1", "hh_vbf_hbb_hvv_kv1_k2v1_kl1", "tt", "dy", "st", "vv", "w_lnu", "h"],  # noqa: E501
         "sl": ["hh_ggf_hbb_hvv_kl1_kt1", "hh_vbf_hbb_hvv_kv1_k2v1_kl1", "tt", "qcd", "st", "dy", "vv", "w_lnu", "h"],  # noqa: E501
@@ -282,10 +314,15 @@ def set_config_defaults_and_groups(config_inst):
         ),
         # Dilepton
         "SR_dl": (
+            "sr__1b__ml_signal_ggf", "sr__1b__ml_signal_ggf2", "sr__2b__ml_signal_ggf", "sr__2b__ml_signal_ggf2",
+            "sr__1b__ml_signal_vbf", "sr__1b__ml_signal_vbf2", "sr__2b__ml_signal_vbf", "sr__2b__ml_signal_vbf2",
+            "sr__1b__ml_signal_ggf4", "sr__1b__ml_signal_ggf5", "sr__2b__ml_signal_ggf4", "sr__2b__ml_signal_ggf5",
+            "sr__1b__ml_signal_vbf4", "sr__1b__ml_signal_vbf5", "sr__2b__ml_signal_vbf4", "sr__2b__ml_signal_vbf5",
             "sr__1b__ml_hh_ggf_hbb_hvv2l2nu_kl1_kt1", "sr__2b__ml_hh_ggf_hbb_hvv2l2nu_kl1_kt1",
             "sr__2mu__1b__ml_hh_ggf_hbb_hvv2l2nu_kl1_kt1", "sr__2mu__2b__ml_hh_ggf_hbb_hvv2l2nu_kl1_kt1",
             "sr__2e__1b__ml_hh_ggf_hbb_hvv2l2nu_kl1_kt1", "sr__2e__2b__ml_hh_ggf_hbb_hvv2l2nu_kl1_kt1",
             "sr__emu__1b__ml_hh_ggf_hbb_hvv2l2nu_kl1_kt1", "sr__emu__2b__ml_hh_ggf_hbb_hvv2l2nu_kl1_kt1",
+            "sr__1b", "sr__2b",
         ),
         "vbfSR_dl": (
             "sr__1b__ml_hh_vbf_hbb_hvv2l2nu_kv1_k2v1_kl1", "sr__2b__ml_hh_vbf_hbb_hvv2l2nu_kv1_k2v1_kl1",
@@ -372,10 +409,8 @@ def set_config_defaults_and_groups(config_inst):
             for proc, _, _ in config_inst.walk_processes() if proc.has_tag("is_signal")
         },
         "dilep": {
-            "hh_ggf_hbb_hvv2l2nu_kl0_kt1": {"scale": 10000, "unstack": True},
-            "hh_ggf_hbb_hvv2l2nu_kl1_kt1": {"scale": 10000, "unstack": True},
-            "hh_ggf_hbb_hvv2l2nu_kl2p45_kt1": {"scale": 10000, "unstack": True},
-            "hh_ggf_hbb_hvv2l2nu_kl5_kt1": {"scale": 10000, "unstack": True},
+            "hh_vbf_hbb_hww2l2nu": {"scale": 90000, "unstack": True},
+            "hh_ggf_hbb_hww2l2nu": {"scale": 10000, "unstack": True},
         },
         "dileptest": {
             "hh_ggf_hbb_hvv2l2nu_kl1_kt1": {"scale": 10000, "unstack": True},
@@ -478,4 +513,5 @@ def set_config_defaults_and_groups(config_inst):
         "vbfSR_dl_resolved": is_signal_sm,
         "vbfSR_dl_boosted": is_signal_sm,
         "BR_dl": is_background,
+
     }
diff --git a/hbw/inference/base.py b/hbw/inference/base.py
index 5ee3d013..22f1c17b 100644
--- a/hbw/inference/base.py
+++ b/hbw/inference/base.py
@@ -89,7 +89,7 @@ def config_variable(self: InferenceModel, config_cat_inst: od.Config):
             dnn_proc = dnn_cat.replace("ml_", "")
             return f"mlscore.{dnn_proc}"
         else:
-            return "mli_mbb"
+            return "mli_lep_pt"
 
     def customize_category(self: InferenceModel, cat_inst: DotDict, config_cat_inst: od.Config):
         """ Function to allow customizing the inference category """
diff --git a/hbw/inference/dl.py b/hbw/inference/dl.py
index 9bde981f..5a4f42fc 100644
--- a/hbw/inference/dl.py
+++ b/hbw/inference/dl.py
@@ -153,8 +153,94 @@
 # "hh_ggf_hbb_hvv2l2nu_kl2p45_kt1",
 # "hh_ggf_hbb_hvv2l2nu_kl5_kt1",
 
+dl.derive("dl_ml_study_5", cls_dict={
+    "ml_model_name": "dl_22post_ml_study_5",
+    "config_categories": [
+        "sr__1b__ml_signal_ggf5",
+        "sr__1b__ml_signal_vbf5",
+        "sr__1b__ml_tt",
+        "sr__1b__ml_st",
+        "sr__1b__ml_dy",
+        "sr__1b__ml_h",
+        "sr__2b__ml_signal_ggf5",
+        "sr__2b__ml_signal_vbf5",
+        "sr__2b__ml_tt",
+        "sr__2b__ml_st",
+        "sr__2b__ml_dy",
+        "sr__2b__ml_h",
+    ],
+    "processes": [
+        # qqHH_CV_m0p012_C2V_0p03_kl_10p2
+        # "hh_vbf_hbb_hww2l2nu_kvm0p012_k2v0p03_kl10p2",
+        "hh_vbf_hbb_hww2l2nu_kv1p74_k2v1p37_kl14p4",
+        "hh_vbf_hbb_hww2l2nu_kvm0p758_k2v1p44_klm19p3",
+        "hh_vbf_hbb_hww2l2nu_kvm0p012_k2v0p03_kl10p2",
+        "hh_vbf_hbb_hww2l2nu_kvm2p12_k2v3p87_klm5p96",
+        "hh_vbf_hbb_hww2l2nu_kv1_k2v1_kl1",
+        "hh_vbf_hbb_hww2l2nu_kv1_k2v0_kl1",
+        "hh_vbf_hbb_hww2l2nu_kvm0p962_k2v0p959_klm1p43",
+        "hh_vbf_hbb_hww2l2nu_kvm1p21_k2v1p94_klm0p94",
+        "hh_vbf_hbb_hww2l2nu_kvm1p6_k2v2p72_klm1p36",
+        "hh_vbf_hbb_hww2l2nu_kvm1p83_k2v3p57_klm3p39",
+        "hh_ggf_hbb_hww2l2nu_kl0_kt1",
+        "hh_ggf_hbb_hww2l2nu_kl1_kt1",
+        "hh_ggf_hbb_hww2l2nu_kl2p45_kt1",
+        "hh_ggf_hbb_hww2l2nu_kl5_kt1",
+        "st",
+        "tt",
+        "dy",
+        "w_lnu",
+        "vv",
+        "h_ggf", "h_vbf", "zh", "wh", "zh_gg", "tth",
+    ],
+    "systematics": rate_systematics,
+})
 
-dl.derive("dl_ml_study_1", cls_dict={
+dl.derive("dl_ml_study_4", cls_dict={
+    "ml_model_name": "dl_22post_ml_study_4",
+    "config_categories": [
+        "sr__1b__ml_signal_ggf4",
+        "sr__1b__ml_signal_vbf4",
+        "sr__1b__ml_tt",
+        "sr__1b__ml_st",
+        "sr__1b__ml_dy",
+        "sr__1b__ml_h",
+        "sr__2b__ml_signal_ggf4",
+        "sr__2b__ml_signal_vbf4",
+        "sr__2b__ml_tt",
+        "sr__2b__ml_st",
+        "sr__2b__ml_dy",
+        "sr__2b__ml_h",
+    ],
+    "processes": [
+        # qqHH_CV_m0p012_C2V_0p03_kl_10p2
+        # "hh_vbf_hbb_hww2l2nu_kvm0p012_k2v0p03_kl10p2",
+        "hh_vbf_hbb_hww2l2nu_kv1p74_k2v1p37_kl14p4",
+        "hh_vbf_hbb_hww2l2nu_kvm0p758_k2v1p44_klm19p3",
+        "hh_vbf_hbb_hww2l2nu_kvm0p012_k2v0p03_kl10p2",
+        "hh_vbf_hbb_hww2l2nu_kvm2p12_k2v3p87_klm5p96",
+        "hh_vbf_hbb_hww2l2nu_kv1_k2v1_kl1",
+        "hh_vbf_hbb_hww2l2nu_kv1_k2v0_kl1",
+        "hh_vbf_hbb_hww2l2nu_kvm0p962_k2v0p959_klm1p43",
+        "hh_vbf_hbb_hww2l2nu_kvm1p21_k2v1p94_klm0p94",
+        "hh_vbf_hbb_hww2l2nu_kvm1p6_k2v2p72_klm1p36",
+        "hh_vbf_hbb_hww2l2nu_kvm1p83_k2v3p57_klm3p39",
+        "hh_ggf_hbb_hww2l2nu_kl0_kt1",
+        "hh_ggf_hbb_hww2l2nu_kl1_kt1",
+        "hh_ggf_hbb_hww2l2nu_kl2p45_kt1",
+        "hh_ggf_hbb_hww2l2nu_kl5_kt1",
+        "st",
+        "tt",
+        "dy",
+        "w_lnu",
+        "vv",
+        "h_ggf", "h_vbf", "zh", "wh", "zh_gg", "tth",
+    ],
+    "systematics": rate_systematics,
+})
+
+
+dl_ml_study_1 = dl.derive("dl_ml_study_1", cls_dict={
     "ml_model_name": "dl_22post_ml_study_1",
     "config_categories": [
         "sr__1b__ml_signal_ggf",
@@ -171,16 +257,69 @@
         "sr__2b__ml_h",
     ],
     "processes": [
-        "hh_vbf_hbb_hvv2l2nu_kv1_k2v1_kl1",
-        "hh_vbf_hbb_hvv2l2nu_kv1_k2v0_kl1",
-        "hh_vbf_hbb_hvv2l2nu_kvm0p962_k2v0p959_klm1p43",
-        "hh_vbf_hbb_hvv2l2nu_kvm1p21_k2v1p94_klm0p94",
-        "hh_vbf_hbb_hvv2l2nu_kvm1p6_k2v2p72_klm1p36",
-        "hh_vbf_hbb_hvv2l2nu_kvm1p83_k2v3p57_klm3p39",
-        "hh_ggf_hbb_hvv2l2nu_kl0_kt1",
-        "hh_ggf_hbb_hvv2l2nu_kl1_kt1",
-        "hh_ggf_hbb_hvv2l2nu_kl2p45_kt1",
-        "hh_ggf_hbb_hvv2l2nu_kl5_kt1",
+        # qqHH_CV_m0p012_C2V_0p03_kl_10p2
+        # "hh_vbf_hbb_hww2l2nu_kvm0p012_k2v0p03_kl10p2",
+        "hh_vbf_hbb_hww2l2nu_kv1p74_k2v1p37_kl14p4",
+        "hh_vbf_hbb_hww2l2nu_kvm0p758_k2v1p44_klm19p3",
+        "hh_vbf_hbb_hww2l2nu_kvm0p012_k2v0p03_kl10p2",
+        "hh_vbf_hbb_hww2l2nu_kvm2p12_k2v3p87_klm5p96",
+        "hh_vbf_hbb_hww2l2nu_kv1_k2v1_kl1",
+        "hh_vbf_hbb_hww2l2nu_kv1_k2v0_kl1",
+        "hh_vbf_hbb_hww2l2nu_kvm0p962_k2v0p959_klm1p43",
+        "hh_vbf_hbb_hww2l2nu_kvm1p21_k2v1p94_klm0p94",
+        "hh_vbf_hbb_hww2l2nu_kvm1p6_k2v2p72_klm1p36",
+        "hh_vbf_hbb_hww2l2nu_kvm1p83_k2v3p57_klm3p39",
+        "hh_ggf_hbb_hww2l2nu_kl0_kt1",
+        "hh_ggf_hbb_hww2l2nu_kl1_kt1",
+        "hh_ggf_hbb_hww2l2nu_kl2p45_kt1",
+        "hh_ggf_hbb_hww2l2nu_kl5_kt1",
+        "st",
+        "tt",
+        "dy",
+        "w_lnu",
+        "vv",
+        "h_ggf", "h_vbf", "zh", "wh", "zh_gg", "tth",
+    ],
+    "systematics": rate_systematics,
+})
+
+dl_ml_study_1.derive("dl_ml_study_1_handle", cls_dict={
+    "ml_model_name": "dl_22post_ml_study_1_handle",
+})
+
+dl_ml_study_3 = dl.derive("dl_ml_study_3", cls_dict={
+    "ml_model_name": "dl_22_procs1_w0",
+    "config_categories": [
+        "sr__1b__ml_hh_ggf_hbb_hvv2l2nu_kl1_kt1",
+        "sr__1b__ml_hh_vbf_hbb_hvv2l2nu_kv1_k2v1_kl1",
+        "sr__1b__ml_tt",
+        "sr__1b__ml_st",
+        "sr__1b__ml_dy",
+        "sr__1b__ml_h",
+        "sr__2b__ml_hh_ggf_hbb_hvv2l2nu_kl1_kt1",
+        "sr__2b__ml_hh_vbf_hbb_hvv2l2nu_kv1_k2v1_kl1",
+        "sr__2b__ml_tt",
+        "sr__2b__ml_st",
+        "sr__2b__ml_dy",
+        "sr__2b__ml_h",
+    ],
+    "processes": [
+        # "hh_vbf_hbb_hww2l2nu_kvm0p012_k2v0p03_kl10p2",
+        "hh_vbf_hbb_hww2l2nu_kv1p74_k2v1p37_kl14p4",
+        "hh_vbf_hbb_hww2l2nu_kvm0p758_k2v1p44_klm19p3",
+        "hh_vbf_hbb_hww2l2nu_kvm0p012_k2v0p03_kl10p2",
+        "hh_vbf_hbb_hww2l2nu_kvm2p12_k2v3p87_klm5p96",
+        "hh_vbf_hbb_hww2l2nu_kv1_k2v1_kl1",
+        "hh_vbf_hbb_hww2l2nu_kv1_k2v0_kl1",
+        "hh_vbf_hbb_hww2l2nu_kvm0p962_k2v0p959_klm1p43",
+        "hh_vbf_hbb_hww2l2nu_kvm1p21_k2v1p94_klm0p94",
+        "hh_vbf_hbb_hww2l2nu_kvm1p6_k2v2p72_klm1p36",
+        "hh_vbf_hbb_hww2l2nu_kvm1p83_k2v3p57_klm3p39",
+        "hh_ggf_hbb_hww2l2nu_kl0_kt1",
+        "hh_ggf_hbb_hww2l2nu_kl1_kt1",
+        "hh_ggf_hbb_hww2l2nu_kl2p45_kt1",
+        "hh_ggf_hbb_hww2l2nu_kl5_kt1",
+        "st",
         "tt",
         "dy",
         "w_lnu",
@@ -190,7 +329,11 @@
     "systematics": rate_systematics,
 })
 
-dl.derive("dl_ml_study_2", cls_dict={
+dl_ml_study_3.derive("dl_ml_study_3_handle", cls_dict={
+    "ml_model_name": "dl_22_procs1_w0_handle",
+})
+
+dl_ml_study_2 = dl.derive("dl_ml_study_2", cls_dict={
     "ml_model_name": "dl_22post_ml_study_2",
     "config_categories": [
         "sr__1b__ml_signal_ggf2",
@@ -207,16 +350,22 @@
         "sr__2b__ml_h",
     ],
     "processes": [
-        "hh_vbf_hbb_hvv2l2nu_kv1_k2v1_kl1",
-        "hh_vbf_hbb_hvv2l2nu_kv1_k2v0_kl1",
-        "hh_vbf_hbb_hvv2l2nu_kvm0p962_k2v0p959_klm1p43",
-        "hh_vbf_hbb_hvv2l2nu_kvm1p21_k2v1p94_klm0p94",
-        "hh_vbf_hbb_hvv2l2nu_kvm1p6_k2v2p72_klm1p36",
-        "hh_vbf_hbb_hvv2l2nu_kvm1p83_k2v3p57_klm3p39",
-        "hh_ggf_hbb_hvv2l2nu_kl0_kt1",
-        "hh_ggf_hbb_hvv2l2nu_kl1_kt1",
-        "hh_ggf_hbb_hvv2l2nu_kl2p45_kt1",
-        "hh_ggf_hbb_hvv2l2nu_kl5_kt1",
+        # "hh_vbf_hbb_hww2l2nu_kvm0p012_k2v0p03_kl10p2",
+        "hh_vbf_hbb_hww2l2nu_kv1p74_k2v1p37_kl14p4",
+        "hh_vbf_hbb_hww2l2nu_kvm0p758_k2v1p44_klm19p3",
+        "hh_vbf_hbb_hww2l2nu_kvm0p012_k2v0p03_kl10p2",
+        "hh_vbf_hbb_hww2l2nu_kvm2p12_k2v3p87_klm5p96",
+        "hh_vbf_hbb_hww2l2nu_kv1_k2v1_kl1",
+        "hh_vbf_hbb_hww2l2nu_kv1_k2v0_kl1",
+        "hh_vbf_hbb_hww2l2nu_kvm0p962_k2v0p959_klm1p43",
+        "hh_vbf_hbb_hww2l2nu_kvm1p21_k2v1p94_klm0p94",
+        "hh_vbf_hbb_hww2l2nu_kvm1p6_k2v2p72_klm1p36",
+        "hh_vbf_hbb_hww2l2nu_kvm1p83_k2v3p57_klm3p39",
+        "hh_ggf_hbb_hww2l2nu_kl0_kt1",
+        "hh_ggf_hbb_hww2l2nu_kl1_kt1",
+        "hh_ggf_hbb_hww2l2nu_kl2p45_kt1",
+        "hh_ggf_hbb_hww2l2nu_kl5_kt1",
+        "st",
         "tt",
         "dy",
         "w_lnu",
@@ -226,6 +375,14 @@
     "systematics": rate_systematics,
 })
 
+dl_ml_study_2.derive("dl_ml_study_2_handle", cls_dict={
+    "ml_model_name": "dl_22post_ml_study_2_handle",
+})
+
+dl_ml_study_2.derive("dl_ml_study_2_ignore", cls_dict={
+    "ml_model_name": "dl_22post_ml_study_2",
+})
+
 dl.derive("dl_hww_and_hzz", cls_dict={
     "processes": [
         "hh_ggf_hbb_hww_kl0_kt1",
@@ -390,3 +547,35 @@
     "systematics": rate_systematics},
 )
 dl.derive("dl_rates_only", cls_dict={"systematics": rate_systematics})
+
+dl.derive("dl_postfit_test", cls_dict={
+    "ml_model_name": None,
+    "config_categories": [
+        "sr__1b",
+        "sr__2b",
+    ],
+    "processes": [
+        # "hh_vbf_hbb_hww2l2nu_kvm0p012_k2v0p03_kl10p2",
+        "hh_vbf_hbb_hww2l2nu_kv1p74_k2v1p37_kl14p4",
+        "hh_vbf_hbb_hww2l2nu_kvm0p758_k2v1p44_klm19p3",
+        "hh_vbf_hbb_hww2l2nu_kvm0p012_k2v0p03_kl10p2",
+        "hh_vbf_hbb_hww2l2nu_kvm2p12_k2v3p87_klm5p96",
+        "hh_vbf_hbb_hww2l2nu_kv1_k2v1_kl1",
+        "hh_vbf_hbb_hww2l2nu_kv1_k2v0_kl1",
+        "hh_vbf_hbb_hww2l2nu_kvm0p962_k2v0p959_klm1p43",
+        "hh_vbf_hbb_hww2l2nu_kvm1p21_k2v1p94_klm0p94",
+        "hh_vbf_hbb_hww2l2nu_kvm1p6_k2v2p72_klm1p36",
+        "hh_vbf_hbb_hww2l2nu_kvm1p83_k2v3p57_klm3p39",
+        "hh_ggf_hbb_hww2l2nu_kl0_kt1",
+        "hh_ggf_hbb_hww2l2nu_kl1_kt1",
+        "hh_ggf_hbb_hww2l2nu_kl2p45_kt1",
+        "hh_ggf_hbb_hww2l2nu_kl5_kt1",
+        "st",
+        "tt",
+        "dy",
+        "w_lnu",
+        "vv",
+        "h_ggf", "h_vbf", "zh", "wh", "zh_gg", "tth",
+    ],
+    "systematics": rate_systematics,
+})
diff --git a/hbw/ml/base.py b/hbw/ml/base.py
index dfb134a3..7644f7a2 100644
--- a/hbw/ml/base.py
+++ b/hbw/ml/base.py
@@ -336,6 +336,7 @@ def open_model(self, target: law.LocalDirectoryTarget) -> dict[str, Any]:
 
         # custom loss needed due to output layer changes for negative weights
         from hbw.ml.tf_util import cumulated_crossentropy
+
         models["model"] = tf.keras.models.load_model(
             target["mlmodel"].path, custom_objects={cumulated_crossentropy.__name__: cumulated_crossentropy},
         )
@@ -373,7 +374,6 @@ def load_data(
         # load into memory
         validation.load_all
         log_memory("loading validation data")
-
         # store input features as an output
         output["mlmodel"].child("input_features.pkl", type="f").dump(self.input_features_ordered, formatter="pickle")
 
@@ -414,7 +414,6 @@ def train(
         #
         # training
         #
-
         self.fit_ml_model(task, model, train, validation, output)
         log_memory("training")
         # save the model and history; TODO: use formatter
@@ -454,7 +453,7 @@ def evaluate(
         """
         Evaluation function that is run as part of the MLEvaluation task
         """
-        use_best_model = False
+        use_best_model = False  # TODO ML, hier auf True setzen?
 
         if len(events) == 0:
             logger.warning(f"Dataset {task.dataset} is empty. No columns are produced.")
@@ -467,7 +466,7 @@ def evaluate(
         process = task.dataset_inst.x("ml_process", task.dataset_inst.processes.get_first().name)
         process_inst = task.config_inst.get_process(process)
 
-        ml_dataset = self.data_loader(self, process_inst, events)
+        ml_dataset = self.data_loader(self, process_inst, events, skip_mask=True)
 
         # # store the ml truth label in the events
         # events = set_ak_column(
@@ -499,7 +498,6 @@ def evaluate(
             if len(pred[0]) != len(self.processes):
                 raise Exception("Number of output nodes should be equal to number of processes")
             predictions.append(pred)
-
             # store predictions for each model
             for j, proc in enumerate(self.processes):
                 events = set_ak_column(
@@ -567,11 +565,18 @@ def prepare_ml_model(
         # compile the network
         # NOTE: the custom loss needed due to output layer changes for negative weights
         optimizer = keras.optimizers.Adam(learning_rate=0.00050)
-        model.compile(
-            loss=cumulated_crossentropy,
-            optimizer=optimizer,
-            weighted_metrics=["categorical_accuracy"],
-        )
+        if self.negative_weights == "ignore":
+            model.compile(
+                loss="categorical_crossentropy",
+                optimizer=optimizer,
+                weighted_metrics=["categorical_accuracy"],
+            )
+        else:
+            model.compile(
+                loss=cumulated_crossentropy,
+                optimizer=optimizer,
+                weighted_metrics=["categorical_accuracy"],
+            )
 
         return model
 
diff --git a/hbw/ml/data_loader.py b/hbw/ml/data_loader.py
index 9b6a1457..87826ee9 100644
--- a/hbw/ml/data_loader.py
+++ b/hbw/ml/data_loader.py
@@ -97,7 +97,14 @@ class MLDatasetLoader:
     input_arrays: tuple = ("features", "weights", "train_weights", "equal_weights")
     evaluation_arrays: tuple = ("prediction",)
 
-    def __init__(self, ml_model_inst: MLModel, process: "str", events: ak.Array, stats: dict | None = None):
+    def __init__(
+        self,
+        ml_model_inst: MLModel,
+        process: "str",
+        events: ak.Array,
+        stats: dict | None = None,
+        skip_mask=False,
+    ):
         """
         Initializes the MLDatasetLoader with the given parameters.
 
@@ -112,10 +119,15 @@ def __init__(self, ml_model_inst: MLModel, process: "str", events: ak.Array, sta
         """
         self._ml_model_inst = ml_model_inst
         self._process = process
+        self._skip_mask = skip_mask
 
         proc_mask, _ = get_proc_mask(events, process, ml_model_inst.config_inst)
         self._stats = stats
-        self._events = events[proc_mask]
+        if not skip_mask:
+            self._events = events[proc_mask]
+            self._events = events[events.event_weight >= 0.0]
+        else:
+            self._events = events
 
     def __repr__(self):
         return f"{self.__class__.__name__}({self.ml_model_inst.cls_name}, {self.process})"
@@ -143,6 +155,10 @@ def parameters(self):
         }
         return self._parameters
 
+    @property
+    def skip_mask(self):
+        return self._skip_mask
+
     @property
     def ml_model_inst(self):
         return self._ml_model_inst
@@ -221,6 +237,13 @@ def shuffle_indices(self) -> np.ndarray:
         self._shuffle_indices = np.random.permutation(self.n_events)
         return self._shuffle_indices
 
+    @property
+    def num_event_per_process(self) -> str:
+        if not self.skip_mask:
+            return "num_events_pos_weights_per_process"
+        else:
+            return "num_events_per_process"
+
     def get_xsec_train_weights(self) -> np.ndarray:
         """
         Weighting such that each event has roughly the same weight,
@@ -233,10 +256,20 @@ def get_xsec_train_weights(self) -> np.ndarray:
             raise Exception("cannot determine train weights without stats")
 
         _, sub_id = get_proc_mask(self._events, self.process, self.ml_model_inst.config_inst)
-        sum_abs_weights = np.sum([self.stats[self.process]["sum_abs_weights_per_process"][str(id)] for id in sub_id])
-        num_events = np.sum([self.stats[self.process]["num_events_per_process"][str(id)] for id in sub_id])
+        sum_weights = np.sum([self.stats[self.process]["sum_pos_weights_per_process"][str(id)] for id in sub_id])
+        num_events = np.sum(
+            [self.stats[self.process][self.num_event_per_process][str(id)] for id in sub_id],
+        )
+        # if not self.skip_mask:
+        #     num_events = np.sum(
+        #         [self.stats[self.process]["num_events_pos_weights_per_process"][str(id)] for id in sub_id],
+        #     )
+        # else:
+        #     num_events = np.sum(
+        #         [self.stats[self.process]["num_events_per_process"][str(id)] for id in sub_id],
+        #     )
 
-        xsec_train_weights = self.weights / sum_abs_weights * num_events
+        xsec_train_weights = self.weights / sum_weights * num_events
 
         return xsec_train_weights
 
@@ -252,7 +285,15 @@ def get_equal_train_weights(self) -> np.ndarray:
 
         combined_proc_inst = self.ml_model_inst.config_inst.get_process(self.process)
         _, sub_id_proc = get_proc_mask(self._events, self.process, self.ml_model_inst.config_inst)
-        num_events = np.sum([self.stats[self.process]["num_events_per_process"][str(id)] for id in sub_id_proc])
+        num_events = np.sum(
+            [self.stats[self.process][self.num_event_per_process][str(id)] for id in sub_id_proc],
+        )
+        # if not self.skip_mask:
+        #     num_events = np.sum(
+        #         [self.stats[self.process]["num_events_pos_weights_per_process"][str(id)] for id in sub_id_proc],
+        #     )
+        # else:
+        #     num_events = np.sum([self.stats[self.process]["num_events_per_process"][str(id)] for id in sub_id_proc])
         targeted_sum_of_weights_per_process = (
             num_events / len(combined_proc_inst.x.ml_config.sub_processes)
         )
@@ -685,8 +726,10 @@ def prediction(self) -> np.ndarray:
             self._prediction = self.load_data("prediction")
         else:
             # calcluate prediction if needed
-            if not hasattr(self._ml_model_inst, "trained_model"):
+            if not hasattr(self._ml_model_inst, "best_model"):
+                # if not hasattr(self._ml_model_inst, "trained_model"):
                 raise Exception("No trained model found in the MLModel instance. Cannot calculate prediction.")
-            self._prediction = predict_numpy_on_batch(self._ml_model_inst.trained_model, self.features)
+            # self._prediction = predict_numpy_on_batch(self._ml_model_inst.trained_model, self.features)
+            self._prediction = predict_numpy_on_batch(self._ml_model_inst.best_model, self.features)
 
-        return self._prediction
+        return self._prediction  # TODO ML best model
diff --git a/hbw/ml/derived/dl.py b/hbw/ml/derived/dl.py
index 05791fe7..1a4563a6 100644
--- a/hbw/ml/derived/dl.py
+++ b/hbw/ml/derived/dl.py
@@ -80,7 +80,7 @@ class DenseClassifierDL(DenseModelMixin, ModelFitMixin, MLClassifierBase):
     store_name: str = "inputs_inclusive"
 
     folds: int = 5
-    negative_weights: str = "handle"
+    negative_weights: str = "ignore"
 
     # overwriting DenseModelMixin parameters
     activation: str = "relu"
@@ -242,8 +242,83 @@ def setup(self) -> None:
     "processes": ["signal", "top"],
 })
 
+dl_22post_ml_study_5 = dl_22post.derive("dl_22post_ml_study_5", cls_dict={
+    "training_configs": lambda self, requested_configs: ["c22post"],
+    "combine_processes": {
+        "signal_ggf5": {
+            # "name": "tt_and_st",
+            "label": "Signal GGF",
+            "sub_processes": [
+                "hh_ggf_hbb_hvv2l2nu_kl0_kt1",
+                "hh_ggf_hbb_hvv2l2nu_kl1_kt1",
+                "hh_ggf_hbb_hvv2l2nu_kl5_kt1",
+            ],
+            "weighting": "equal",
+        },
+        "signal_vbf5": {
+            # "name": "tt_and_st",
+            "label": "Signal VBF",
+            "sub_processes": [
+                "hh_vbf_hbb_hvv2l2nu_kv1_k2v1_kl1",
+                "hh_vbf_hbb_hvv2l2nu_kv1_k2v0_kl1",
+                "hh_vbf_hbb_hvv2l2nu_kvm0p962_k2v0p959_klm1p43",
+                "hh_vbf_hbb_hvv2l2nu_kvm1p21_k2v1p94_klm0p94",
+                "hh_vbf_hbb_hvv2l2nu_kvm1p6_k2v2p72_klm1p36",
+                "hh_vbf_hbb_hvv2l2nu_kvm1p83_k2v3p57_klm3p39",
+            ],
+            "weighting": "equal",
+        },
+    },
+    "processes": [
+        "signal_ggf5",
+        "signal_vbf5",
+        "tt",
+        "st",
+        "dy",
+        "h",
+    ],
+})
+
+dl_22post_ml_study_4 = dl_22post.derive("dl_22post_ml_study_4", cls_dict={
+    "training_configs": lambda self, requested_configs: ["c22post"],
+    "combine_processes": {
+        "signal_ggf4": {
+            # "name": "tt_and_st",
+            "label": "Signal GGF",
+            "sub_processes": [
+                "hh_ggf_hbb_hvv2l2nu_kl0_kt1",
+                "hh_ggf_hbb_hvv2l2nu_kl1_kt1",
+                "hh_ggf_hbb_hvv2l2nu_kl2p45_kt1",
+            ],
+            "weighting": "xsec",
+        },
+        "signal_vbf4": {
+            # "name": "tt_and_st",
+            "label": "Signal VBF",
+            "sub_processes": [
+                "hh_vbf_hbb_hvv2l2nu_kv1_k2v1_kl1",
+                "hh_vbf_hbb_hvv2l2nu_kv1_k2v0_kl1",
+                "hh_vbf_hbb_hvv2l2nu_kvm0p962_k2v0p959_klm1p43",
+                "hh_vbf_hbb_hvv2l2nu_kvm1p21_k2v1p94_klm0p94",
+                "hh_vbf_hbb_hvv2l2nu_kvm1p6_k2v2p72_klm1p36",
+                "hh_vbf_hbb_hvv2l2nu_kvm1p83_k2v3p57_klm3p39",
+            ],
+            "weighting": "xsec",
+        },
+    },
+    "processes": [
+        "signal_ggf4",
+        "signal_vbf4",
+        "tt",
+        "st",
+        "dy",
+        "h",
+    ],
+})
+
 dl_22post_ml_study_1 = dl_22post.derive("dl_22post_ml_study_1", cls_dict={
     "training_configs": lambda self, requested_configs: ["c22post"],
+    "negative_weights": "ignore",
     "combine_processes": {
         "signal_ggf": {
             # "name": "tt_and_st",
@@ -280,8 +355,13 @@ def setup(self) -> None:
     ],
 })
 
+dl_22post_ml_study_1_handle = dl_22post_ml_study_1.derive("dl_22post_ml_study_1_handle", cls_dict={
+    "negative_weights": "handle",
+})
+
 dl_22post_ml_study_2 = dl_22post.derive("dl_22post_ml_study_2", cls_dict={
     "training_configs": lambda self, requested_configs: ["c22post"],
+    "negative_weights": "ignore",
     "combine_processes": {
         "signal_ggf2": {
             # "name": "tt_and_st",
@@ -306,7 +386,7 @@ def setup(self) -> None:
                 "hh_vbf_hbb_hvv2l2nu_kvm1p83_k2v3p57_klm3p39",
 
             ],
-            "weighting": "xsec",
+            "weighting": "equal",
         },
     },
     "processes": [
@@ -318,6 +398,11 @@ def setup(self) -> None:
         "h",
     ],
 })
+
+dl_22post_ml_study_2_handle = dl_22post_ml_study_2.derive("dl_22post_ml_study_2_handle", cls_dict={
+    "negative_weights": "handle",
+})
+
 #
 # setups with different processes (0: baseline, 1: add SM vbf + single H, 2: add SL+all HH variations)
 # NOTE: we should decide which signal processes exactly to use:
@@ -332,16 +417,23 @@ def setup(self) -> None:
     "training_configs": lambda self, requested_configs: ["c22post", "c22pre"],
     "processes": ["hh_ggf_hbb_hvv2l2nu_kl1_kt1", "hh_vbf_hbb_hvv2l2nu_kv1_k2v1_kl1", "tt", "st", "dy", "h"],
 })
-dl_22_procs1_w0 = dl_22_procs1.derive("dl_22_procs1_w1", cls_dict={
+dl_22_procs1_w0 = dl_22_procs1.derive("dl_22_procs1_w0", cls_dict={
+    "training_configs": lambda self, requested_configs: ["c22post"],
+    "negative_weights": "ignore",
     "ml_process_weights": {
         "hh_ggf_hbb_hvv2l2nu_kl1_kt1": 1,
         "hh_vbf_hbb_hvv2l2nu_kv1_k2v1_kl1": 1,
         "tt": 2,
         "st": 2,
         "dy": 2,
-        "h": 2,
+        "h": 1,
     },
 })
+
+dl_22_procs1_w0_handle = dl_22_procs1_w0.derive("dl_22_procs1_w0_handle", cls_dict={
+    "negative_weights": "handle",
+})
+
 dl_22_procs1_w1 = dl_22_procs1.derive("dl_22_procs1_w1", cls_dict={
     "ml_process_weights": {
         "hh_ggf_hbb_hvv2l2nu_kl1_kt1": 1,
diff --git a/hbw/ml/mixins.py b/hbw/ml/mixins.py
index 38c632e2..3a423119 100644
--- a/hbw/ml/mixins.py
+++ b/hbw/ml/mixins.py
@@ -97,16 +97,13 @@ def prepare_ml_model(
             epsilon=1e-6, amsgrad=False,
         )
 
-        model.compile(
-            loss=cumulated_crossentropy,
-            # NOTE: we'd preferrably use the Keras CCE, but it does not work when assigning one event
-            #       to multiple classes (target with multiple entries != 0)
-            # loss ="categorical_crossentropy",
-            # loss=tf.keras.losses.CategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE),
-            optimizer=optimizer,
-            metrics=["categorical_accuracy"],
-            weighted_metrics=["categorical_accuracy"],
-        )
+        model_compile_kwargs = {
+            "loss": "categorical_crossentropy" if self.negative_weights == "ignore" else cumulated_crossentropy,
+            "optimizer": optimizer,
+            "metrics": ["categorical_accuracy"],
+            "weighted_metrics": ["categorical_accuracy"],
+        }
+        model.compile(**model_compile_kwargs)
 
         return model
 
diff --git a/hbw/ml/stats.py b/hbw/ml/stats.py
index 1a4b0a88..9a68c8e6 100644
--- a/hbw/ml/stats.py
+++ b/hbw/ml/stats.py
@@ -76,15 +76,16 @@ def ml_preparation(
         events = set_ak_column_f32(events, "event_weight", weight)
         stats["sum_weights"] += float(ak.sum(weight, axis=0))
         weight_map["sum_weights"] = weight
-        weight_map["sum_abs_weights"] = (weight, weight > 0)
-        weight_map["sum_pos_weights"] = np.abs(weight)
+        weight_map["sum_pos_weights"] = (weight, weight > 0)
+        weight_map["sum_abs_weights"] = np.abs(weight)
+        weight_map["num_events_pos_weights"] = weight > 0
 
         # normalization weight only
         norm_weight = events["stitched_normalization_weight"]
         stats["sum_norm_weights"] += float(ak.sum(norm_weight, axis=0))
         weight_map["sum_norm_weights"] = norm_weight
-        weight_map["sum_abs_norm_weights"] = (norm_weight, norm_weight > 0)
-        weight_map["sum_pos_norm_weights"] = np.abs(norm_weight)
+        weight_map["sum_pos_norm_weights"] = (norm_weight, norm_weight > 0)
+        weight_map["sum_abs_norm_weights"] = np.abs(norm_weight)
 
     group_map = {
         "process": {
diff --git a/hbw/tasks/inference.py b/hbw/tasks/inference.py
index 98b76320..f291765d 100644
--- a/hbw/tasks/inference.py
+++ b/hbw/tasks/inference.py
@@ -8,11 +8,10 @@
 
 from typing import Callable
 
-# import luigi
 import law
 import order as od
 
-from columnflow.tasks.framework.base import Requirements, RESOLVE_DEFAULT, AnalysisTask
+from columnflow.tasks.framework.base import Requirements, RESOLVE_DEFAULT, AnalysisTask, ConfigTask
 from columnflow.tasks.framework.parameters import SettingsParameter
 from columnflow.tasks.framework.mixins import (
     CalibratorsMixin, SelectorStepsMixin, ProducersMixin, MLModelsMixin, InferenceModelMixin,
@@ -22,14 +21,72 @@
 from columnflow.util import dev_sandbox, maybe_import
 from hbw.tasks.base import HBWTask
 
-# import hbw.inference.constants as const
-
 array = maybe_import("array")
+uproot = maybe_import("uproot")
+np = maybe_import("numpy")
+hist = maybe_import("hist")
 
 
 logger = law.logger.get_logger(__name__)
 
 
+# Function copied from Mathis Hist hook commit
+# TODO: define once at central place (hist_util.py)
+def apply_rebinning_edges(h: hist.Histogram, axis_name: str, edges: list):
+    """
+    Generalized rebinning of a single axis from a hist.Histogram, using predefined edges.
+
+    :param h: histogram to rebin
+    :param axis_name: string representing the axis to rebin
+    :param edges: list of floats representing the new bin edges. Must be a subset of the original edges.
+    :return: rebinned histogram
+    """
+    if isinstance(edges, int):
+        return h[{axis_name: hist.rebin(edges)}]
+
+    ax = h.axes[axis_name]
+    ax_idx = [a.name for a in h.axes].index(axis_name)
+    if not all([np.isclose(x, ax.edges).any() for x in edges]):
+        raise ValueError(
+            f"Cannot rebin histogram due to incompatible edges for axis '{ax.name}'\n"
+            f"Edges of histogram are {ax.edges}, requested rebinning to {edges}",
+        )
+
+    # If you rebin to a subset of initial range, keep the overflow and underflow
+    overflow = ax.traits.overflow or (edges[-1] < ax.edges[-1] and not np.isclose(edges[-1], ax.edges[-1]))
+    underflow = ax.traits.underflow or (edges[0] > ax.edges[0] and not np.isclose(edges[0], ax.edges[0]))
+    flow = overflow or underflow
+    new_ax = hist.axis.Variable(edges, name=ax.name, overflow=overflow, underflow=underflow)
+    axes = list(h.axes)
+    axes[ax_idx] = new_ax
+
+    hnew = hist.Hist(*axes, name=h.name, storage=h._storage_type())
+
+    # Offset from bin edge to avoid numeric issues
+    offset = 0.5 * np.min(ax.edges[1:] - ax.edges[:-1])
+    edges_eval = edges + offset
+    edge_idx = ax.index(edges_eval)
+    # Avoid going outside the range, reduceat will add the last index anyway
+    if edge_idx[-1] == ax.size + ax.traits.overflow:
+        edge_idx = edge_idx[:-1]
+
+    if underflow:
+        # Only if the original axis had an underflow should you offset
+        if ax.traits.underflow:
+            edge_idx += 1
+        edge_idx = np.insert(edge_idx, 0, 0)
+
+    # Take is used because reduceat sums i:len(array) for the last entry, in the case
+    # where the final bin isn't the same between the initial and rebinned histogram, you
+    # want to drop this value. Add tolerance of 1/2 min bin width to avoid numeric issues
+    hnew.values(flow=flow)[...] = np.add.reduceat(h.values(flow=flow), edge_idx,
+            axis=ax_idx).take(indices=range(new_ax.size + underflow + overflow), axis=ax_idx)
+    if hnew._storage_type() == hist.storage.Weight():
+        hnew.variances(flow=flow)[...] = np.add.reduceat(h.variances(flow=flow), edge_idx,
+                axis=ax_idx).take(indices=range(new_ax.size + underflow + overflow), axis=ax_idx)
+    return hnew
+
+
 def get_hist_name(cat_name: str, proc_name: str, syst_name: str | None = None) -> str:
     hist_name = f"{cat_name}/{proc_name}"
     if syst_name:
@@ -68,41 +125,34 @@ def get_rebin_values(hist, N_bins_final: int = 10):
     Function that determines how to rebin a hist to *N_bins_final* bins such that
     the resulting histogram is flat
     """
-    N_bins_input = hist.GetNbinsX()
-
-    # # replace empty bin values (TODO: remove as soon as we can disable the empty bin filling)
-    # EMPTY_BIN_VALUE = 1e-5
-    # for i in range(1, N_bins_input + 1):
-    #     if hist.GetBinContent(i) == EMPTY_BIN_VALUE:
-    #         hist.SetBinContent(i, 0)
+    N_bins_input = hist.axes[0].size
 
     # determine events per bin the final histogram should have
-    events_per_bin = hist.Integral() / N_bins_final
+    events_per_bin = hist.sum().value / N_bins_final
     logger.info(f"============ {round(events_per_bin, 3)} events per bin")
 
     # bookkeeping number of bins and number of events
     bin_count = 1
     N_events = 0
-    rebin_values = [hist.GetBinLowEdge(1)]
+    rebin_values = [int(hist.axes[0].edges[0])]
 
     # starting loop at 1 to exclude underflow
     # ending at N_bins_input + 1 excludes overflow
-    for i in range(1, N_bins_input + 1):
+    for i in range(1, N_bins_input):
         if bin_count == N_bins_final:
             # break as soon as N-1 bin edges have been determined --> final bin is x_max
             break
-
-        N_events += hist.GetBinContent(i)
+        N_events += hist.view()["value"][i]
         if i % 100 == 0:
             logger.info(f"========== Bin {i} of {N_bins_input}, {N_events} events")
         if N_events >= events_per_bin * bin_count:
             # when *N_events* surpasses threshold, append the corresponding bin edge and count
-            logger.info(f"++++++++++ Append bin edge {bin_count} of {N_bins_final} at edge {hist.GetBinLowEdge(i)}")
-            rebin_values.append(hist.GetBinLowEdge(i + 1))
+            logger.info(f"++++++++++ Append bin edge {bin_count} of {N_bins_final} at edge {hist.axes[0].edges[i]}")
+            rebin_values.append(hist.axes[0].edges[i])
             bin_count += 1
 
     # final bin is x_max
-    x_max = hist.GetBinLowEdge(N_bins_input + 1)
+    x_max = hist.axes[0].edges[N_bins_input]
     rebin_values.append(x_max)
     logger.info(f"final bin edges: {rebin_values}")
     return rebin_values
@@ -111,7 +161,7 @@ def get_rebin_values(hist, N_bins_final: int = 10):
 def apply_binning(hist, rebin_values: list):
     N_bins = len(rebin_values) - 1
     rebin_values_ptr = array.array("d", rebin_values)
-
+    # rebin_values_ptr = np.array(rebin_values, dtype="float64")
     h_out = hist.Rebin(N_bins, hist.GetName(), rebin_values_ptr)
     return h_out
 
@@ -202,10 +252,12 @@ class ModifyDatacardsFlatRebin(
     ProducersMixin,
     SelectorStepsMixin,
     CalibratorsMixin,
+    ConfigTask,
     law.LocalWorkflow,
     RemoteWorkflow,
 ):
-    sandbox = dev_sandbox("bash::$CF_BASE/sandboxes/cmssw_default.sh")
+
+    sandbox = dev_sandbox(law.config.get("analysis", "default_columnar_sandbox"))
 
     bins_per_category = SettingsParameter(
         default=(RESOLVE_DEFAULT,),
@@ -268,7 +320,6 @@ def get_rebin_processes(self):
         """
         config_category = self.branch_data.config_category
         processes = self.branch_data.processes.copy()
-        # inf_proc_names = [self.inference_model_inst.inf_proc(proc.name) for proc in self.branch_data.processes]
 
         rebin_process_condition = self.inference_category_rebin_processes.get(config_category, None)
         if not rebin_process_condition:
@@ -321,11 +372,6 @@ def output(self):
         }
 
     def run(self):
-        import uproot
-        from ROOT import TH1
-
-        # config_inst = self.config_inst
-        # inference_model = self.inference_model_inst
 
         inputs = self.input()
         outputs = self.output()
@@ -339,7 +385,6 @@ def run(self):
         outputs["card"].dump(datacard, formatter="text")
 
         with uproot.open(inp_shapes.fn) as file:
-            # logger.info(f"File keys: {file.keys()}")
             # determine which histograms are present
             cat_names, proc_names, syst_names = get_cat_proc_syst_names(file)
 
@@ -355,7 +400,8 @@ def run(self):
 
             # get all nominal histograms
             nominal_hists = {
-                proc_name: file[get_hist_name(cat_name, proc_name)].to_pyroot()
+                proc_name: file[get_hist_name(cat_name, proc_name)].to_hist()
+                # proc_name: file[get_hist_name(cat_name, proc_name)].to_pyroot()
                 for proc_name in proc_names
             }
 
@@ -379,22 +425,23 @@ def run(self):
 
             # apply rebinning on all histograms and store resulting hists in a ROOT file
             out_file = uproot.recreate(outputs["shapes"].fn)
+            rebinned_histograms = {}
             for key, h in file.items():
-                # remove ";1" appendix
+
                 key = key.split(";")[0]
-                try:
-                    # convert histograms to pyroot
-                    h = h.to_pyroot()
-                except AttributeError:
-                    # skip non-histograms
-                    continue
-                if not isinstance(h, TH1):
-                    raise Exception(f"{h} is not a TH1 histogram")
+                if "TH1" in str(h):
+                    try:
+                        h = file[key].to_hist()
+                    except AttributeError:
+                        continue
+                    h_rebin = apply_rebinning_edges(h, h.axes[0].name, rebin_values)
+                    # problematic_bin_count = check_empty_bins(h_rebin)
+                    # TODO: we should reimplement check empty bins, check for all bkg not per process
+
+                    rebinned_histograms[key] = h_rebin
 
-                h_rebin = apply_binning(h, rebin_values)
-                problematic_bin_count = check_empty_bins(h_rebin)  # noqa
-                logger.info(f"Inserting histogram with name {key}")
-                out_file[key] = uproot.from_pyroot(h_rebin)
+                    logger.info(f"Inserting histogram with name {key}")
+                    out_file[key] = h_rebin
 
 
 class PrepareInferenceTaskCalls(
@@ -404,6 +451,7 @@ class PrepareInferenceTaskCalls(
     ProducersMixin,
     SelectorStepsMixin,
     CalibratorsMixin,
+    ConfigTask,
 ):
     """
     Simple task that produces string to run certain tasks in Inference
@@ -457,6 +505,9 @@ def run(self):
         # combine category names with card fn to a single string
         datacards = ",".join([f"{cat_name}=$CARDS_PATH/{card_fn}" for cat_name, card_fn in zip(cat_names, card_fns)])
 
+        # # name of the output root file that contains the Pre+Postfit shapes
+        # output_file = ""
+
         base_cmd = f"export CARDS_PATH={cards_path}" + "\n"
 
         print("\n\n")
@@ -490,4 +541,11 @@ def run(self):
             f"--order-by-impact"
         )
         print(cmd, "\n\n")
+
+        # running PreAndPostfitShapes for Pre+Postfit plots
+        cmd = base_cmd + (
+            f"law run PreAndPostFitShapes --version {identifier} --datacards {datacards} "
+            # f"--output-name {output_file}"
+        )
+        print(cmd, "\n\n")
         output["FitDiagnostics"].dump(cmd, formatter="text")
diff --git a/hbw/tasks/ml.py b/hbw/tasks/ml.py
index ac2eb876..44b8ffa2 100644
--- a/hbw/tasks/ml.py
+++ b/hbw/tasks/ml.py
@@ -330,6 +330,8 @@ def output(self):
         # the stats dict is created per process+fold, but should always be identical, therefore we store it only once
         outputs["stats"] = self.target("stats.json")
 
+        outputs["cross_check_weights"] = self.target(f"cross_check_weights_{process}_{self.fold}.yaml")
+
         outputs["parameters"] = self.target("parameters.yaml")
 
         return outputs
@@ -398,23 +400,36 @@ def run(self):
         outputs["stats"].dump(stats, formatter="json")
 
         # initialize the DatasetLoader
-        ml_dataset = self.ml_model_inst.data_loader(self.ml_model_inst, process, events, stats)
-        # NOTE: Almost closure
+        if self.ml_model_inst.negative_weights == "ignore":
+            ml_dataset = self.ml_model_inst.data_loader(self.ml_model_inst, process, events, stats)
+            logger.info(
+                f"{self.ml_model_inst.negative_weights} method chosen to hanlde negative weights:",
+                " All negative weights will be removed from training.",
+            )
+        else:
+            ml_dataset = self.ml_model_inst.data_loader(self.ml_model_inst, process, events, stats, skip_mask=True)
+
         sum_train_weights = np.sum(ml_dataset.train_weights)
         n_events_per_fold = len(ml_dataset.train_weights)
-        logger.info(f"Sum of traing weights is: {sum_train_weights} for {n_events_per_fold} {process} events")
-
-        # check that equal weighting works as intended
+        logger.info(f"Sum of training weights is: {sum_train_weights} for {n_events_per_fold} {process} events")
+        xcheck = {}
         if self.ml_model_inst.config_inst.get_process(process).x("ml_config", None):
+            xcheck[process] = {}
             if self.ml_model_inst.config_inst.get_process(process).x.ml_config.weighting == "equal":
                 for sub_proc in self.ml_model_inst.config_inst.get_process(process).x.ml_config.sub_processes:
-                    proc_mask, sub_id = get_proc_mask(events, sub_proc, self.ml_model_inst.config_inst)
+                    proc_mask, _ = get_proc_mask(ml_dataset._events, sub_proc, self.ml_model_inst.config_inst)
                     xcheck_weight_sum = np.sum(ml_dataset.train_weights[proc_mask])
                     xcheck_n_events = len(ml_dataset.train_weights[proc_mask])
                     logger.info(
                         f"For the equal weighting method the sum of weights for {sub_proc} is {xcheck_weight_sum} "
                         f"(No. of events: {xcheck_n_events})",
                     )
+                    if sub_proc not in xcheck[process]:
+                        xcheck[process][sub_proc] = {}
+                    xcheck[process][sub_proc]["weight_sum"] = int(xcheck_weight_sum)
+                    xcheck[process][sub_proc]["num_events"] = xcheck_n_events
+                    # __import__("IPython").embed()
+        outputs["cross_check_weights"].dump(xcheck, formatter="yaml")
 
         for input_array in self.ml_model_inst.data_loader.input_arrays:
             logger.info(f"loading data for input array {input_array}")
diff --git a/hbw/tasks/postfit_plots.py b/hbw/tasks/postfit_plots.py
index 9df3745a..4069b211 100644
--- a/hbw/tasks/postfit_plots.py
+++ b/hbw/tasks/postfit_plots.py
@@ -6,54 +6,101 @@
 
 from __future__ import annotations
 
-from collections import OrderedDict
+from collections import OrderedDict, defaultdict
 
 import law
 import luigi
 import order as od
-
 from columnflow.tasks.framework.base import ConfigTask
 from columnflow.tasks.framework.mixins import (
     InferenceModelMixin, MLModelsMixin, ProducersMixin, SelectorStepsMixin,
-    CalibratorsMixin,
+    CalibratorsMixin, DatasetsProcessesMixin,
 )
 from columnflow.tasks.framework.plotting import (
-    PlotBase, PlotBase1D,
+    PlotBase, PlotBase1D, ProcessPlotSettingMixin,  # VariablesPlotSettingMixin,
 )
 from columnflow.tasks.framework.decorators import view_output_plots
 from hbw.tasks.base import HBWTask
 
 from columnflow.util import dev_sandbox, DotDict, maybe_import
 
+# from hbw.inference.base import inf_proc
+
 uproot = maybe_import("uproot")
+hist = maybe_import("hist")
 
 
 logger = law.logger.get_logger(__name__)
 
 
-def load_hists_uproot(fit_diagnostics_path):
+def load_hists_uproot(fit_diagnostics_path, fit_type):
     """ Helper to load histograms from a fit_diagnostics file """
+    with uproot.open(fit_diagnostics_path) as tfile:
+        if any("shapes_fit_s" in _k for _k in tfile.keys()):
+            if fit_type == "postfit":
+                fit_type = "fit_s"
+            hists = get_hists_from_fit_diagnostics(tfile)[f"shapes_{fit_type}"]
+        else:
+            hists = get_hists_from_multidimfit(tfile)[f"{fit_type}"]
+
+    return hists
+
+
+def get_hists_from_fit_diagnostics(tfile):
+    """ Helper function to load histograms from root file created by FitDiagnostics """
+
     # prepare output dict
     hists = DotDict()
-    with uproot.open(fit_diagnostics_path) as tfile:
-        keys = [key.split("/") for key in tfile.keys()]
-        for key in keys:
-            if len(key) != 3:
-                continue
 
-            # get the histogram from the tfile
-            h_in = tfile["/".join(key)]
+    keys = [key.split("/") for key in tfile.keys()]
+    for key in keys:
+        if len(key) != 3:
+            continue
 
-            # unpack key
-            fit, channel, process = key
-            process = process.split(";")[0]
+        # get the histogram from the tfile
+        h_in = tfile["/".join(key)]
 
-            if "data" not in process:
-                # transform TH1F to hist
-                h_in = h_in.to_hist()
+        # unpack key
+        fit, channel, process = key
+        process = process.split(";")[0]
 
-            # set the histogram in a deep dictionary
-            hists = law.util.merge_dicts(hists, DotDict.wrap({fit: {channel: {process: h_in}}}), deep=True)
+        if "data" in process or "total" in process:
+            continue
+        else:
+            # transform TH1F to hist
+            h_in = h_in.to_hist()
+
+        # set the histogram in a deep dictionary
+        hists = law.util.merge_dicts(hists, DotDict.wrap({fit: {channel: {process: h_in}}}), deep=True)
+    return hists
+
+
+def get_hists_from_multidimfit(tfile):
+    """ Helper function to load histograms from root file created by MultiDimFit """
+    # prepare output dict
+    hists = DotDict()
+    keys = [key.split("/") for key in tfile.keys()]
+    for key in keys:
+        if len(key) != 2:
+            continue
+        # get the histogram from the tfile
+        h_in = tfile["/".join(key)]
+
+        # unpack key
+        fit_and_channel, process = key
+        fit = fit_and_channel.split("_")[-1]
+        fit = fit.replace("_", "")
+        channel = fit_and_channel.replace(f"_{fit}", "")
+        process = process.split(";")[0]
+
+        if "data" in process or "Total" in process:
+            continue
+            # transform TH1F to hist
+        else:
+            h_in = h_in.to_hist()
+
+        # set the histogram in a deep dictionary
+        hists = law.util.merge_dicts(hists, DotDict.wrap({fit: {channel: {process: h_in}}}), deep=True)
 
     return hists
 
@@ -67,11 +114,12 @@ def load_hists_uproot(fit_diagnostics_path):
 from columnflow.plotting.plot_util import (
     prepare_plot_config,
     prepare_style_config,
+    apply_process_settings,
 )
 
 
 def plot_postfit_shapes(
-    hists: OrderedDict,
+    hists: OrderedDict[od.Process, hist.Hist],
     config_inst: od.Config,
     category_inst: od.Category,
     variable_insts: list[od.Variable],
@@ -85,7 +133,7 @@ def plot_postfit_shapes(
     **kwargs,
 ) -> tuple(plt.Figure, tuple(plt.Axes)):
     variable_inst = law.util.make_tuple(variable_insts)[0]
-
+    hists = apply_process_settings(hists, process_settings)
     plot_config = prepare_plot_config(
         hists,
         shape_norm=shape_norm,
@@ -111,6 +159,8 @@ class PlotPostfitShapes(
     # NOTE: mixins might be wrong and could (should?) be extended to MultiConfigTask
     HBWTask,
     PlotBase1D,
+    ProcessPlotSettingMixin,
+    DatasetsProcessesMixin,
     # to correctly setup our InferenceModel, we need all these mixins, but hopefully, all these
     # parameters are automatically resolved correctly
     InferenceModelMixin,
@@ -131,9 +181,10 @@ class PlotPostfitShapes(
     """
 
     sandbox = dev_sandbox(law.config.get("analysis", "default_columnar_sandbox"))
+    datasets = None
 
     plot_function = PlotBase.plot_function.copy(
-        default="hbw.tasks.plotting.plot_postfit_shapes",
+        default="hbw.tasks.postfit_plots.plot_postfit_shapes",
         add_default_to_description=True,
     )
 
@@ -147,11 +198,60 @@ class PlotPostfitShapes(
         description="Whether to do prefit or postfit plots; defaults to False",
     )
 
+    @property
+    def fit_type(self) -> str:
+        if self.prefit:
+            return "prefit"
+        else:
+            return "postfit"
+
     def requires(self):
         return {}
 
     def output(self):
-        return {"plots": self.target("plots", dir=True)}
+        return {"plots": self.target(f"plots_{self.fit_type}", dir=True)}
+
+    # map processes in root shape to corresponding process instance used for plotting
+    def prepare_hist_map(self, hist_processes: set, process_insts: list) -> defaultdict(list):
+
+        hist_map = defaultdict(list)
+
+        for proc_key in hist_processes:
+            proc_inst = None
+            # try getting the config process via InferenceModel
+            # NOTE: Only the first category is taken to get the config process instance,
+            # assuming they are the same for all categories
+            channel = self.inference_model_inst.get_categories_with_process(proc_key)[0]
+            has_category = self.inference_model_inst.has_category(channel)
+            if has_category:
+                inference_process = self.inference_model_inst.get_process(proc_key, channel)
+                proc_inst = self.config_inst.get_process(inference_process.config_process)
+            else:
+                # try getting proc inst directly via config
+                proc_inst = self.config_inst.get_process(proc_key, default=None)
+
+            # replace string keys with process instances
+            # map HHinference processes to process instances for plotting
+            if proc_inst:
+                plot_proc = [
+                    proc for proc in process_insts if proc.has_process(proc_inst) or proc.name == proc_inst.name
+                ]
+                if len(plot_proc) > 1:
+                    logger.warning(
+                        f"{proc_key} was assigned to ({','.join([p.name for p in plot_proc])})",
+                        f" but {plot_proc[0].name} was chosen",
+                    )
+                elif len(plot_proc) == 0:
+                    logger.warning(f"{proc_key} in root file, but won't be plotted.")
+                    continue
+                plot_proc = plot_proc[0]
+
+                if plot_proc not in hist_map:
+                    hist_map[plot_proc] = [proc_key]
+                else:
+                    hist_map[plot_proc].append(proc_key)
+
+        return hist_map
 
     @view_output_plots
     def run(self):
@@ -159,45 +259,40 @@ def run(self):
             f"Note! It is important that the requested inference_model {self.inference_model} "
             "is identical to the one that has been used to create the datacards",
         )
-        all_hists = load_hists_uproot(self.fit_diagnostics_file)
 
         outp = self.output()
-        if self.prefit:
-            fit_type = "prefit"
-        else:
-            fit_type = "fit_s"
 
-        all_hists = all_hists[f"shapes_{fit_type}"]
+        # Load all required histograms corresponding to the fit_type from the root input file
+        all_hists = load_hists_uproot(self.fit_diagnostics_file, self.fit_type)
+
+        # Get list of all process instances required for plotting
+        process_insts = list(map(self.config_inst.get_process, self.processes))
+
+        # map processes in root shape to corresponding process instance used for plotting
+        hist_processes = {key for _, h_in in all_hists.items() for key in h_in.keys()}
+        hist_map = self.prepare_hist_map(hist_processes, process_insts)
 
-        for channel, hists in all_hists.items():
+        # Plot Pre/Postfit plot for each channel
+        for channel, h_in in all_hists.items():
+
+            # Check for coherence between inference and pre/postfit categories
             has_category = self.inference_model_inst.has_category(channel)
             if not has_category:
                 logger.warning(f"Category {channel} is not part of the inference model {self.inference_model}")
+                continue
 
-            for proc_key in list(hists.keys()):
-                # remove unnecessary histograms
-                if "data" in proc_key or "total" in proc_key:
-                    hists.pop(proc_key)
+            # Create Histograms
+            hists = defaultdict(OrderedDict)
+            for proc in hist_map:
+                plot_proc = proc.copy()  # NOTE: copy produced, so actual process is not modified by process settings
+                if hist_map[proc][0] not in h_in:
+                    logger.warning(f"No histograms for {proc.name} found in {channel}.")
                     continue
+                hists[plot_proc] = h_in[hist_map[proc][0]]
+                for p in hist_map[proc][1:]:
+                    hists[plot_proc] += h_in[p]
 
-                proc_inst = None
-                # try getting the config process via InferenceModel
-                if has_category:
-                    # TODO: process customization based on inference process? e.g. scale
-                    inference_process = self.inference_model_inst.get_process(proc_key, channel)
-                    proc_inst = self.config_inst.get_process(inference_process.config_process)
-                else:
-                    # try getting proc inst directly via config
-                    proc_inst = self.config_inst.get_process(proc_key, default=None)
-
-                # replace string keys with process instances
-                if proc_inst:
-                    hists[proc_inst] = hists[proc_key]
-                    hists.pop(proc_key)
-
-            # try getting the config category and variable via InferenceModel
             if has_category:
-                # TODO: category/variable customization based on inference model?
                 inference_category = self.inference_model_inst.get_category(channel)
                 config_category = self.config_inst.get_category(inference_category.config_category)
                 variable_inst = self.config_inst.get_variable(inference_category.config_variable)
@@ -207,13 +302,14 @@ def run(self):
                 variable_inst = od.Variable("dummy")
 
             # call the plot function
+            h = hists.copy()  # NOTE: copy produced, so actual process is not modified by process settings
             fig, _ = self.call_plot_func(
                 self.plot_function,
-                hists=hists,
+                hists=h,
                 config_inst=self.config_inst,
                 category_inst=config_category,
                 variable_insts=variable_inst,
                 **self.get_plot_parameters(),
             )
 
-            outp["plots"].child(f"{channel}_{fit_type}.pdf", type="f").dump(fig, formatter="mpl")
+            outp["plots"].child(f"{channel}_{self.fit_type}.pdf", type="f").dump(fig, formatter="mpl")