From 860cdd48d03945a3d374cc07b2c044ce899dbf34 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Wed, 4 Dec 2024 09:04:06 +0100
Subject: [PATCH 01/28] cleanup in scripts

---
 hbw/scripts/hbwtasks.sh    |  6 +++---
 hbw/scripts/test_config.py | 12 ++++++------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/hbw/scripts/hbwtasks.sh b/hbw/scripts/hbwtasks.sh
index 7ebc65ff..2486a8a1 100644
--- a/hbw/scripts/hbwtasks.sh
+++ b/hbw/scripts/hbwtasks.sh
@@ -40,7 +40,7 @@ hbw_calibration(){
 	--cf.CalibrateEvents-no-poll \
 	--cf.CalibrateEvents-parallel-jobs 4000 \
 	--cf.CalibrateEvents-retries 1 \
-	--cf.CalibrateEvents-tasks-per-job 2 \
+	--cf.CalibrateEvents-tasks-per-job 1 \
 	--cf.CalibrateEvents-job-workers 1 \
 	--cf.BundleRepo-custom-checksum $(checksum) \
 	$@
@@ -55,7 +55,7 @@ hbw_reduction(){
 	--cf.ReduceEvents-no-poll \
 	--cf.ReduceEvents-parallel-jobs 4000 \
 	--cf.ReduceEvents-retries 1 \
-	--cf.ReduceEvents-tasks-per-job 2 \
+	--cf.ReduceEvents-tasks-per-job 1 \
 	--cf.ReduceEvents-job-workers 1 \
 	--cf.BundleRepo-custom-checksum $(checksum) \
 	$@
@@ -71,7 +71,7 @@ hbw_merge_reduction(){
 	--cf.ReduceEvents-pilot \
 	--cf.ReduceEvents-parallel-jobs 4000 \
 	--cf.ReduceEvents-retries 1 \
-	--cf.ReduceEvents-tasks-per-job 2 \
+	--cf.ReduceEvents-tasks-per-job 1 \
 	--cf.ReduceEvents-job-workers 1 \
 	--cf.BundleRepo-custom-checksum $(checksum) \
 	$@
diff --git a/hbw/scripts/test_config.py b/hbw/scripts/test_config.py
index b81fc515..0a40ff1e 100644
--- a/hbw/scripts/test_config.py
+++ b/hbw/scripts/test_config.py
@@ -9,7 +9,7 @@
 
 default_analysis = law.config.get_expanded("analysis", "default_analysis")
 default_config = law.config.get_expanded("analysis", "default_config")
-
+default_config = "c22uhhpost"
 analysis_inst = ana = AnalysisTask.get_analysis_inst(default_analysis)
 config_inst = cfg = ana.get_config(default_config)
 
@@ -110,11 +110,11 @@
 print("Direction:", shift_inst.direction)
 print("Aliases:", shift_inst.x.column_aliases)
 
-# get some exemplary aux (all 3 methods get you the same result)
-default_selector = cfg.get_aux("default_selector")
-default_selector = cfg.aux["default_selector"]
-default_selector = cfg.x.default_selector
-print("================= default selector:", default_selector, "=======")
+# # get some exemplary aux (all 3 methods get you the same result)
+# default_selector = cfg.get_aux("default_selector")
+# default_selector = cfg.aux["default_selector"]
+# default_selector = cfg.x.default_selector
+# print("================= default selector:", default_selector, "=======")
 
 # set some exemplary aux youself
 cfg.set_aux("example", "test")

From 1d057d5c2433d9a6a105163a0a13b99d5e6b10be Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Wed, 4 Dec 2024 09:04:56 +0100
Subject: [PATCH 02/28] use correct btag reweighting and add vjets
 weight_producer

---
 hbw/weight/default.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/hbw/weight/default.py b/hbw/weight/default.py
index 501ef584..2b472e03 100644
--- a/hbw/weight/default.py
+++ b/hbw/weight/default.py
@@ -96,6 +96,10 @@ def base_init(self: WeightProducer) -> None:
         # remove dependency towards top pt weights
         self.weight_columns.pop("top_pt_weight", None)
 
+    if not self.dataset_inst.has_tag("is_v_jets"):
+        # remove dependency towards vjets weights
+        self.weight_columns.pop("vjets_weight", None)
+
     self.shifts = set()
 
     # when jec sources are known btag SF source, then propagate the shift to the WeightProducer
@@ -152,7 +156,14 @@ def base_init(self: WeightProducer) -> None:
     **default_correction_weights,
 }
 default_weight_producer = base.derive("default", cls_dict={"weight_columns": default_weight_columns})
-base.derive("unstitched", cls_dict={"weight_columns": {**default_correction_weights, "normalization_weight": []}})
+with_vjets_weight = default_weight_producer.derive("with_vjets_weight", cls_dict={"weight_columns": {
+    **default_correction_weights,
+    "vjets_weight": [],  # TODO: corrections/shift missing
+    "stitched_normalization_weight": [],
+}})
+base.derive("unstitched", cls_dict={"weight_columns": {
+    **default_correction_weights, "normalization_weight": [],
+}})
 
 weight_columns_execpt_btag = default_weight_columns.copy()
 weight_columns_execpt_btag.pop("normalized_ht_njet_nhf_btag_weight")

From a274649d9a77ca278df6c5909fd678fad6e72a44 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Thu, 5 Dec 2024 12:20:21 +0100
Subject: [PATCH 03/28] switch vjets pt reweighting json and apply NLO EW
 weights

---
 hbw/config/config_run2.py | 32 ++++++++++++++++++++++++--------
 hbw/production/gen_v.py   |  8 +++++---
 2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/hbw/config/config_run2.py b/hbw/config/config_run2.py
index cce3738e..854705b5 100644
--- a/hbw/config/config_run2.py
+++ b/hbw/config/config_run2.py
@@ -357,14 +357,28 @@ def if_era(
 
     # V+jets reweighting
     cfg.x.vjets_reweighting = DotDict.wrap({
-        "w": {
-            "value": "wjets_kfactor_value",
-            "error": "wjets_kfactor_error",
-        },
         "z": {
-            "value": "zjets_kfactor_value",
-            "error": "zjets_kfactor_error",
+            "value": "eej_pTV_kappa_NLO_EW",
+            "ew": "eej_pTV_kappa_NLO_EW",
+            "error": "eej_pTV_d1kappa_EW",  # NOTE: not sure if this is correct to use as error (d2,d3?)
+            "d2": "eej_pTV_d2kappa_EW",
+            "d3": "eej_pTV_d3kappa_EW",
+        },
+        "w": {
+            "value": "aj_pTV_kappa_NLO_EW",
+            "ew": "aj_pTV_kappa_NLO_EW",
+            "error": "aj_pTV_d1kappa_EW",  # NOTE: not sure if this is correct to use as error (d2,d3?)
+            "d2": "aj_pTV_d2kappa_EW",
+            "d3": "aj_pTV_d3kappa_EW",
         },
+        # "w": {
+        #     "value": "wjets_kfactor_value",
+        #     "error": "wjets_kfactor_error",
+        # },
+        # "z": {
+        #     "value": "zjets_kfactor_value",
+        #     "error": "zjets_kfactor_error",
+        # },
     })
 
     ################################################################################################
@@ -587,8 +601,10 @@ def add_external(name, value):
     add_external("muon_sf", (f"{json_mirror}/POG/MUO/{corr_tag}/muon_Z.json.gz", "v1"))
     # btag scale factor
     add_external("btag_sf_corr", (f"{json_mirror}/POG/BTV/{corr_tag}/btagging.json.gz", "v1"))
-    # V+jets reweighting (still unused and not centrally produced)
-    add_external("vjets_reweighting", f"{json_mirror}/data/json/vjets_reweighting.json.gz")
+    # V+jets reweighting (derived for 13 TeV, custom json converted from ROOT, not centrally produced)
+    # ROOT files (eej.root and aj.root) taken from here:
+    # https://github.com/UHH2/2HDM/tree/ultra_legacy/data/ScaleFactors/VJetsCorrections
+    add_external("vjets_reweighting", (f"{json_mirror}/data/json/vjets_pt.json.gz", "v1"))
     if cfg.x.run == 2:
         # met phi corrector (still unused and missing in Run3)
         add_external("met_phi_corr", (f"{json_mirror}/POG/JME/{corr_tag}/met.json.gz", "v1"))
diff --git a/hbw/production/gen_v.py b/hbw/production/gen_v.py
index a564359f..fb87f611 100644
--- a/hbw/production/gen_v.py
+++ b/hbw/production/gen_v.py
@@ -178,9 +178,10 @@ def get_kfactor(obj_name, key, obj):
         kfactor[key] = get_kfactor(boson, key, events.GenVBoson)
 
     weights = {
-        "nominal": kfactor.value,
-        "up": kfactor.value + kfactor.error,
-        "down": kfactor.value - kfactor.error,
+        # NOTE: 1-kfactor for "ew" correction
+        "nominal": 1 - kfactor.value,
+        "up": 1 - kfactor.value + kfactor.error,
+        "down": 1 - kfactor.value - kfactor.error,
     }
 
     # save the weights
@@ -232,6 +233,7 @@ def vjets_weight_setup(self: Producer, reqs: dict, inputs: dict, reader_targets:
         self.get_vjets_reweighting_file(bundle.files).load(formatter="gzip").decode("utf-8"),
     )
     corrections = self.get_vjets_reweighting_config()
+
     self.vjets_reweighting_evaluators = {
         obj_name: {
             key: correction_set[correction_name]

From 14f95cb668f82b0fff31983b3969f823f14cbdd4 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Tue, 10 Dec 2024 11:24:05 +0100
Subject: [PATCH 04/28] add simple producer for simple normalization weights

---
 hbw/production/dataset_normalization.py | 79 +++++++++++++++++++++++++
 hbw/production/process_ids.py           |  3 +-
 hbw/production/weights.py               | 16 ++---
 3 files changed, 90 insertions(+), 8 deletions(-)
 create mode 100644 hbw/production/dataset_normalization.py

diff --git a/hbw/production/dataset_normalization.py b/hbw/production/dataset_normalization.py
new file mode 100644
index 00000000..eb92353d
--- /dev/null
+++ b/hbw/production/dataset_normalization.py
@@ -0,0 +1,79 @@
+# coding: utf-8
+
+"""
+Column production methods related to sample normalization event weights.
+"""
+
+from columnflow.production import Producer, producer
+from columnflow.util import maybe_import, InsertableDict
+from columnflow.columnar_util import set_ak_column
+
+np = maybe_import("numpy")
+sp = maybe_import("scipy")
+maybe_import("scipy.sparse")
+ak = maybe_import("awkward")
+
+
+@producer(
+    uses={"mc_weight"},
+    produces={"dataset_normalization_weight"},
+    # only run on mc
+    mc_only=True,
+)
+def dataset_normalization_weight(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
+    """
+    Uses luminosity information of internal py:attr:`config_inst`, the cross section of a process
+    obtained from the dataset inst and the sum of event weights from the
+    py:attr:`selection_stats` attribute to assign each event a normalization weight
+    independent of the sub-processes of the dataset.
+    Can only be used when there is a one-to-one mapping between datasets and processes.
+    """
+    # get the lumi
+    lumi = self.config_inst.x.luminosity.nominal
+
+    # compute the weight and store it
+    norm_weight = events.mc_weight * lumi * self.xs / self.sum_weights
+    events = set_ak_column(events, "dataset_normalization_weight", norm_weight, value_type=np.float32)
+
+    return events
+
+
+@dataset_normalization_weight.requires
+def dataset_normalization_weight_requires(self: Producer, reqs: dict) -> None:
+    """
+    Adds the requirements needed by the underlying py:attr:`task` to access selection stats into
+    *reqs*.
+    """
+    # TODO: for actual sample stitching, we don't need the selection stats for that dataset, but
+    #       rather the one merged for either all datasets, or the "stitching group"
+    #       (i.e. all datasets that might contain any of the sub processes found in a dataset)
+    from columnflow.tasks.selection import MergeSelectionStats
+    reqs["selection_stats"] = MergeSelectionStats.req(
+        self.task,
+        tree_index=0,
+        branch=-1,
+        _exclude=MergeSelectionStats.exclude_params_forest_merge,
+    )
+
+
+@dataset_normalization_weight.setup
+def dataset_normalization_weight_setup(
+    self: Producer,
+    reqs: dict,
+    inputs: dict,
+    reader_targets: InsertableDict,
+) -> None:
+    """
+    Load inclusive selection stats and cross sections for the normalization weight calculation.
+    """
+    # load the selection stats
+    selection_stats = inputs["selection_stats"]["collection"][0]["stats"].load(formatter="json")
+
+    process_inst = self.dataset_inst.processes.get_first()
+
+    xs = process_inst.xsecs.get(self.config_inst.campaign.ecm, None)
+    if not xs:
+        raise Exception(f"no cross section found for process {process_inst.name}")
+
+    self.xs = xs.nominal
+    self.sum_weights = selection_stats["sum_mc_weight"]
diff --git a/hbw/production/process_ids.py b/hbw/production/process_ids.py
index 2d5315d8..25f78b8b 100644
--- a/hbw/production/process_ids.py
+++ b/hbw/production/process_ids.py
@@ -62,7 +62,8 @@ def hbw_process_ids_init(self: Producer) -> None:
 
     if self.dataset_inst.has_tag("is_hbv"):
         self.process_producer = hh_bbvv_process_producer
-    elif "dy" in self.dataset_inst.name:
+    elif "dy" in self.dataset_inst.name and "amcatnlo" in self.dataset_inst.name:
+        # stitching of DY NLO samples
         self.process_producer = dy_nlo_process_producer
     elif len(self.dataset_inst.processes) == 1:
         self.process_producer = process_ids
diff --git a/hbw/production/weights.py b/hbw/production/weights.py
index 5e038c3c..07301324 100644
--- a/hbw/production/weights.py
+++ b/hbw/production/weights.py
@@ -5,6 +5,7 @@
 """
 
 import functools
+import law
 
 from columnflow.util import maybe_import
 from columnflow.columnar_util import set_ak_column
@@ -25,6 +26,7 @@
 from hbw.production.gen_v import gen_v_boson, vjets_weight
 from hbw.production.normalized_weights import normalized_weight_factory
 from hbw.production.normalized_btag import normalized_btag_weights
+from hbw.production.dataset_normalization import dataset_normalization_weight
 from hbw.util import has_tag
 
 
@@ -224,11 +226,11 @@ def combined_normalization_weights(self: Producer, events: ak.Array, **kwargs) -
     when stitching our signal samples, but we want to calculate the BRs ourselved for other
     types of sample stitching (e.g. DY).
     """
-    # NOTE: I would like to produce the unstitched normalization weights for cross checks,
-    # but for DY, this is not possible at the moment, since we assign processes (hf/lf) for which no
-    # xsecs are available
-    # events = self[normalization_weights](events, **kwargs)
     events = self[self.norm_weights_producer](events, **kwargs)
+
+    # very simple Producer that creates normalization weight without any stitching
+    # (can only be used when there is a one-to-one mapping between datasets and processes)
+    events = self[dataset_normalization_weight](events, **kwargs)
     return events
 
 
@@ -244,8 +246,8 @@ def combined_normalization_weights_init(self: Producer) -> None:
 
     self.norm_weights_producer.weight_name = "stitched_normalization_weight"
 
-    self.uses |= {self.norm_weights_producer}
-    self.produces |= {self.norm_weights_producer}
+    self.uses |= {self.norm_weights_producer, dataset_normalization_weight}
+    self.produces |= {self.norm_weights_producer, dataset_normalization_weight}
 
 
 @producer(
@@ -262,7 +264,7 @@ def combined_normalization_weights_init(self: Producer) -> None:
         normalized_pu_weights,
     },
     mc_only=True,
-    version=1,
+    version=law.config.get_expanded("analysis", "event_weights_version", 1),
 )
 def event_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
     """

From 8d245141937605c5044ab20dbfe06fc63ab73f76 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Tue, 10 Dec 2024 11:29:40 +0100
Subject: [PATCH 05/28] add task for campaign creation and consistency checks

---
 hbw/analysis/create_analysis.py |  59 +++++------
 hbw/config/datasets.py          |   9 +-
 hbw/tasks/campaigns.py          | 177 ++++++++++++++++++++++++++++++++
 hbw/tasks/inspection.py         |  34 +++++-
 hbw/util.py                     | 104 +++++++++++++++++--
 law.cfg                         |   2 +-
 6 files changed, 346 insertions(+), 39 deletions(-)
 create mode 100644 hbw/tasks/campaigns.py

diff --git a/hbw/analysis/create_analysis.py b/hbw/analysis/create_analysis.py
index 68b8ca9c..d2a49c1d 100644
--- a/hbw/analysis/create_analysis.py
+++ b/hbw/analysis/create_analysis.py
@@ -7,7 +7,6 @@
 from __future__ import annotations
 
 import os
-import importlib
 
 import law
 import order as od
@@ -24,6 +23,8 @@
     ml_inputs_producer,
 )
 
+from hbw.tasks.campaigns import BuildCampaignSummary
+
 
 @timeit_multiple
 def create_hbw_analysis(
@@ -82,7 +83,6 @@ def create_hbw_analysis(
     from hbw.config.config_run2 import add_config
 
     def add_lazy_config(
-        campaigns: dict[str, str],
         config_name: str,
         config_id: int,
         **kwargs,
@@ -101,22 +101,22 @@ def create_factory(
         ):
             @timeit_multiple
             def analysis_factory(configs: od.UniqueObjectIndex):
-                hbw_campaign_inst = None
-
-                for mod, campaign in campaigns.items():
-                    # import the campaign
-                    mod = importlib.import_module(mod)
-                    if not hbw_campaign_inst:
-                        # copy the main campaign
-                        hbw_campaign_inst = getattr(mod, campaign).copy()
-                    else:
-                        # add datasets to the main campaign
-                        campaign_inst = getattr(mod, campaign).copy()
-                        for dataset in list(campaign_inst.datasets):
-                            dataset.x.campaign = campaign
-                            if not hbw_campaign_inst.has_dataset(dataset.name):
-                                hbw_campaign_inst.add_dataset(dataset)
-
+                cpn_task = BuildCampaignSummary(
+                    config=config_name,
+                )
+                if cpn_task.complete():
+                    logger.warning(
+                        f"Using pickled campaign for config {config_name}; to re-initialize, run:\n"
+                        f"law run {cpn_task.task_family} --config {config_name} --remove-output 0,a,y",
+                    )
+                else:
+                    logger.warning(
+                        f"Campaign used for {config_name} has been changed since last initialization."
+                        "Difference: \n",
+                    )
+                    cpn_task.run()
+
+                hbw_campaign_inst = cpn_task.output()["hbw_campaign_inst"].load(formatter="pickle")
                 return add_config(
                     analysis_inst,
                     hbw_campaign_inst,
@@ -139,29 +139,30 @@ def analysis_factory(configs: od.UniqueObjectIndex):
 
     # 2017
     add_lazy_config(
-        {
-            "cmsdb.campaigns.run2_2017_nano_v9": "campaign_run2_2017_nano_v9",
-        },
+        # {
+        #     "cmsdb.campaigns.run2_2017_nano_v9": "campaign_run2_2017_nano_v9",
+        # },
         "c17",
         1700,
     )
 
     # 2022 preEE
     add_lazy_config(
-        {
-            "cmsdb.campaigns.run3_2022_preEE_nano_v12": "campaign_run3_2022_preEE_nano_v12",
-            "cmsdb.campaigns.run3_2022_preEE_nano_v13": "campaign_run3_2022_preEE_nano_v13",
-        },
+        # {
+        #     "cmsdb.campaigns.run3_2022_preEE_nano_v12": "campaign_run3_2022_preEE_nano_v12",
+        #     "cmsdb.campaigns.run3_2022_preEE_nano_v13": "campaign_run3_2022_preEE_nano_v13",
+        # },
         "c22pre",
         2200,
     )
 
     # 2022 postEE
     add_lazy_config(
-        {
-            "cmsdb.campaigns.run3_2022_postEE_nano_v12": "campaign_run3_2022_postEE_nano_v12",
-            "cmsdb.campaigns.run3_2022_postEE_nano_v13": "campaign_run3_2022_postEE_nano_v13",
-        },
+        # {
+        #     "cmsdb.campaigns.run3_2022_postEE_nano_v12": "campaign_run3_2022_postEE_nano_v12",
+        #     "cmsdb.campaigns.run3_2022_postEE_nano_v13": "campaign_run3_2022_postEE_nano_v13",
+        #     "cmsdb.campaigns.run3_2022_postEE_nano_uhh_v12": "campaign_run3_2022_postEE_nano_uhh_v12",
+        # },
         "c22post",
         2210,
     )
diff --git a/hbw/config/datasets.py b/hbw/config/datasets.py
index 278a8e80..431a1a32 100644
--- a/hbw/config/datasets.py
+++ b/hbw/config/datasets.py
@@ -83,6 +83,7 @@ def hbw_dataset_names(config: od.Config, as_list: bool = False) -> DotDict[str:
                 "dy_m50toinf_ht2500toinf_madgraph",
             ]),
             *config.x.if_era(run=3, values=[
+                # NLO samples
                 "dy_m50toinf_amcatnlo",
                 "dy_m10to50_amcatnlo",
                 "dy_m4to10_amcatnlo",
@@ -113,7 +114,13 @@ def hbw_dataset_names(config: od.Config, as_list: bool = False) -> DotDict[str:
                 "zz_pythia",
             ]),
         ],
-        "ttv": [],  # empty for now
+        "ttv": [
+            "ttw_wlnu_amcatnlo",
+            "ttz_zll_m4to50_amcatnlo",
+            "ttz_zll_m50toinf_amcatnlo",
+            "ttz_znunu_amcatnlo",
+            "ttz_zqq_amcatnlo",
+        ],
         "h": [
             *config.x.if_era(run=3, values=[
                 # TODO: remove whatever is not really necessary
diff --git a/hbw/tasks/campaigns.py b/hbw/tasks/campaigns.py
new file mode 100644
index 00000000..c7132cb9
--- /dev/null
+++ b/hbw/tasks/campaigns.py
@@ -0,0 +1,177 @@
+# coding: utf-8
+
+"""
+Custom tasks for creating and managing campaigns.
+"""
+
+from collections import defaultdict
+from functools import cached_property
+import importlib
+
+import law
+import luigi
+
+from columnflow.tasks.framework.base import AnalysisTask
+from hbw.tasks.base import HBWTask
+
+
+logger = law.logger.get_logger(__name__)
+
+
+campaign_map = {
+    "c17": {
+        "cmsdb.campaigns.run2_2017_nano_v9": "campaign_run2_2017_nano_v9",
+    },
+    "c22pre": {
+        "cmsdb.campaigns.run3_2022_preEE_nano_v12": "campaign_run3_2022_preEE_nano_v12",
+        "cmsdb.campaigns.run3_2022_preEE_nano_v13": "campaign_run3_2022_preEE_nano_v13",
+    },
+    "c22post": {
+        "cmsdb.campaigns.run3_2022_postEE_nano_v12": "campaign_run3_2022_postEE_nano_v12",
+        "cmsdb.campaigns.run3_2022_postEE_nano_v13": "campaign_run3_2022_postEE_nano_v13",
+        "cmsdb.campaigns.run3_2022_postEE_nano_uhh_v12": "campaign_run3_2022_postEE_nano_uhh_v12",
+    },
+}
+
+
+class BuildCampaignSummary(
+    HBWTask,
+    AnalysisTask,
+):
+
+    config = luigi.Parameter()
+    # TODO: set campaigns as part of this function instead of configuring in the config?
+
+    recreate_backup_summary = luigi.BoolParameter(default=False)
+
+    def requires(self):
+        return {}
+
+    def store_parts(self):
+        parts = super().store_parts()
+
+        # add the config name
+        parts.insert_after("task_family", "config", self.config)
+
+        return parts
+
+    @cached_property
+    def campaigns(self):
+        if self.config not in campaign_map:
+            raise ValueError(f"Unknown config {self.config}")
+        return campaign_map[self.config]
+
+    @cached_property
+    def campaign_insts(self):
+        return [
+            getattr(importlib.import_module(mod), campaign).copy()
+            for mod, campaign in self.campaigns.items()
+        ]
+
+    dataset_from_uhh_identifier = {
+        # TODO: use DY from uhh campaign
+        # "dy_m10to50_amcatnlo",
+        # "dy_m4to10_amcatnlo",
+        "ttw_",
+        "ttz_",
+    }
+
+    def get_dataset_prio(self, dataset_name, campaign):
+        """
+        If dataset should be overwritten from this campaign, return True.
+        Otherwise, return False.
+        """
+        if "uhh" in campaign.name and any(
+            dataset_identifier in dataset_name
+            for dataset_identifier in self.dataset_from_uhh_identifier
+        ):
+            return True
+
+        return False
+
+    def output(self):
+        output = {
+            "dataset_summary": self.target("dataset_summary.yaml"),
+            "campaign_summary": self.target("campaign_summary.yaml"),
+            "hbw_campaign_inst": self.target("hbw_campaign_inst.pickle"),
+        }
+        return output
+
+    @cached_property
+    def dataset_summary(self):
+        dataset_summary = defaultdict(dict)
+        used_datasets = set()
+        # create campaign summary with one key per dataset (to fulfill dataset uniqueness)
+        for campaign in self.campaign_insts:
+            for dataset in campaign.datasets:
+                if dataset.name not in used_datasets or self.get_dataset_prio(dataset.name, campaign):
+                    dataset_summary[dataset.name] = {
+                        "campaign": campaign.name,
+                        "n_events": dataset.n_events,
+                        "n_files": dataset.n_files,
+                    }
+                    used_datasets.add(dataset.name)
+
+        return dict(dataset_summary)
+
+    @cached_property
+    def campaign_summary(self,):
+        campaign_summary = {
+            campaign.name: {} for campaign in self.campaign_insts
+        }
+
+        for dataset, dataset_info in self.dataset_summary.items():
+            campaign_summary[dataset_info["campaign"]][dataset] = {
+                "n_events": dataset_info["n_events"],
+                "n_files": dataset_info["n_files"],
+            }
+        return campaign_summary
+
+    def get_custom_campaign(self):
+        hbw_campaign_inst = self.campaign_insts[0].copy()
+        hbw_campaign_inst.clear_datasets()
+        for campaign_inst in self.campaign_insts:
+            campaign_info = self.campaign_summary[campaign_inst.name]
+            for dataset in campaign_info.keys():
+                dataset_inst = campaign_inst.get_dataset(dataset)
+                dataset_inst.x.campaign = campaign_inst.name
+                hbw_campaign_inst.add_dataset(dataset_inst)
+
+        return hbw_campaign_inst
+
+    from hbw.util import timeit_multiple
+
+    @timeit_multiple
+    def run(self):
+        output = self.output()
+
+        # cross check if the dataset summary did change
+        backup_dataset_summary = self.target("backup_dataset_summary.yaml")
+        if backup_dataset_summary.exists():
+            backup_dataset_summary = backup_dataset_summary.load(formatter="yaml")
+            if backup_dataset_summary != self.dataset_summary:
+                from hbw.util import gather_dict_diff
+                logger.warning(
+                    "Backup dataset summary does not match the current one \n"
+                    f"{gather_dict_diff(backup_dataset_summary, self.dataset_summary)}",
+                )
+                if self.recreate_backup_summary:
+                    logger.warning("Recreating backup dataset summary")
+                    backup_dataset_summary.dump(self.dataset_summary, formatter="yaml")
+                else:
+                    logger.warning(
+                        "Run the following command to recreate the backup dataset summary:\n"
+                        f"law run {self.task_family} --recreate_backup_summary --config {self.config} --remove-output 0,a,y",  # noqa
+                    )
+        else:
+            logger.warning("No backup dataset summary found, creating one now")
+            backup_dataset_summary.dump(self.dataset_summary, formatter="yaml")
+
+        output["dataset_summary"].dump(self.dataset_summary, formatter="yaml")
+        output["campaign_summary"].dump(self.campaign_summary, formatter="yaml")
+
+        import sys
+        orig_rec_limit = sys.getrecursionlimit()
+        sys.setrecursionlimit(max(orig_rec_limit, 100000))
+        output["hbw_campaign_inst"].dump(self.get_custom_campaign(), formatter="pickle")
+        sys.setrecursionlimit(orig_rec_limit)
diff --git a/hbw/tasks/inspection.py b/hbw/tasks/inspection.py
index 950c8ee1..be49ac7e 100644
--- a/hbw/tasks/inspection.py
+++ b/hbw/tasks/inspection.py
@@ -4,15 +4,16 @@
 Custom tasks for inspecting the configuration or certain task outputs.
 """
 
-# from functools import cached_property
+from collections import defaultdict
 
 import law
 import luigi
 
+
 from columnflow.tasks.framework.mixins import (
     ProducersMixin, MLModelsMixin,
 )
-from columnflow.tasks.framework.base import ConfigTask, Requirements
+from columnflow.tasks.framework.base import MultiConfigTask, ConfigTask, Requirements
 from columnflow.tasks.framework.mixins import DatasetsProcessesMixin, SelectorMixin, CalibratorsMixin
 from columnflow.tasks.framework.parameters import SettingsParameter
 from columnflow.tasks.reduction import ReducedEventsUser
@@ -287,6 +288,35 @@ def run(self):
             debugger()
 
 
+class DatasetSummary(
+    HBWTask,
+    MultiConfigTask,
+):
+    def requires(self):
+        return {}
+
+    def output(self):
+        output = {
+            "dataset_summary": self.target("dataset_summary.yaml"),
+        }
+        return output
+
+    def run(self):
+        multi_config_dataset_summary = {}
+        for config in self.config_insts:
+            dataset_summary = defaultdict(dict)
+            cpn_name = config.campaign.name
+            for dataset in config.datasets:
+                dataset_campaign = dataset.x("campaign", cpn_name)
+                dataset_summary[dataset_campaign][dataset.name] = {
+                    "n_events": dataset.n_events,
+                    "n_files": dataset.n_files,
+                }
+            multi_config_dataset_summary[config.name] = dict(dataset_summary)
+
+        self.output()["dataset_summary"].dump(multi_config_dataset_summary, formatter="yaml")
+
+
 class CheckColumns(
     ColumnsBaseTask,
     law.LocalWorkflow,
diff --git a/hbw/util.py b/hbw/util.py
index 14154431..36e6c4dd 100644
--- a/hbw/util.py
+++ b/hbw/util.py
@@ -230,31 +230,108 @@ def traceback_function(depth: int = 1):
 
 
 def make_dict_hashable(d: dict, deep: bool = True):
-    """ small helper that converts dict into hashable dict"""
+    """Small helper that converts dict into a hashable representation."""
     d_out = d.copy()
     for key, value in d.items():
         if isinstance(value, Hashable):
-            # skip values that are already hashable
+            # Skip values that are already hashable
             continue
         elif isinstance(value, dict):
-            # convert dictionary items to hashable and use items of resulting dict
+            # Convert nested dictionaries to a hashable form
             if deep:
                 value = make_dict_hashable(value)
             d_out[key] = tuple(value)
         else:
-            # hopefully, everything else can be cast to a tuple
+            # Convert other types to tuples
             d_out[key] = law.util.make_tuple(value)
 
     return d_out.items()
 
 
 def dict_diff(dict1: dict, dict2: dict):
+    """Return the differences between two dictionaries."""
     set1 = set(make_dict_hashable(dict1))
     set2 = set(make_dict_hashable(dict2))
 
     return set1 ^ set2
 
 
+def filter_unchanged_keys(d1: dict, d2: dict):
+    """Recursively remove unchanged keys from nested dictionaries and return modified values."""
+    if not isinstance(d1, dict) or not isinstance(d2, dict):
+        return {"old": d1, "new": d2} if d1 != d2 else None
+
+    filtered = {}
+    all_keys = set(d1.keys()).union(set(d2.keys()))
+
+    for key in all_keys:
+        val1 = d1.get(key)
+        val2 = d2.get(key)
+
+        if isinstance(val1, dict) and isinstance(val2, dict):
+            # Recur for nested dictionaries
+            nested_diff = filter_unchanged_keys(val1, val2)
+            if nested_diff:
+                filtered[key] = nested_diff
+        elif val1 != val2:
+            # Value changed or key added/removed
+            filtered[key] = {"old": val1, "new": val2}
+
+    return filtered if filtered else None
+
+
+def dict_diff_filtered(dict1: dict, dict2: dict):
+    """Return the differences between two dictionaries with nested filtering of unchanged keys."""
+    diff = {}
+
+    # Check keys present in either dict
+    all_keys = set(dict1.keys()).union(set(dict2.keys()))
+
+    for key in all_keys:
+        if key in dict1 and key in dict2:
+            if isinstance(dict1[key], dict) and isinstance(dict2[key], dict):
+                # Recur for nested dictionaries and get filtered diff
+                nested_diff = filter_unchanged_keys(dict1[key], dict2[key])
+                if nested_diff:
+                    diff[key] = nested_diff
+            elif dict1[key] != dict2[key]:
+                diff[key] = {"old": dict1[key], "new": dict2[key]}
+        elif key in dict1:
+            diff[key] = {"old": dict1[key], "new": None}
+        else:
+            diff[key] = {"old": None, "new": dict2[key]}
+
+    return diff
+
+
+def gather_dict_diff(dict1: dict, dict2: dict) -> str:
+    """Gather the differences between two dictionaries and return them as a formatted string."""
+    diff = filter_unchanged_keys(dict1, dict2)
+    lines = []
+
+    if not diff:
+        return "✅ No differences found."
+
+    def process_diff(diff, indent=0):
+        indentation = "    " * indent
+        for key, value in diff.items():
+            if isinstance(value, dict) and "old" in value and "new" in value:
+                if value["old"] is None:
+                    lines.append(f"{indentation}🔹 Added: {key}: {value['new']}")
+                elif value["new"] is None:
+                    lines.append(f"{indentation}🔻 Removed: {key}: {value['old']}")
+                else:
+                    lines.append(f"{indentation}🔄 Modified: {key}:")
+                    lines.append(f"{indentation}    - Old: {value['old']}")
+                    lines.append(f"{indentation}    - New: {value['new']}")
+            elif isinstance(value, dict):
+                lines.append(f"{indentation}🔄 Modified: {key}:")
+                process_diff(value, indent + 1)
+
+    process_diff(diff)
+    return "\n".join(lines)
+
+
 def four_vec(
     collections: str | Iterable[str],
     columns: str | Iterable[str] | None = None,
@@ -333,7 +410,9 @@ def inner(config, *args, **kwargs):
 
 
 def timeit(func):
-    """ Simple wrapper to measure execution time of a function """
+    """
+    Simple wrapper to measure execution time of a function.
+    """
     @wraps(func)
     def timeit_wrapper(*args, **kwargs):
         start_time = time.perf_counter()
@@ -347,16 +426,29 @@ def timeit_wrapper(*args, **kwargs):
 
 def timeit_multiple(func):
     """ Wrapper to measure the number of execution calls and the added execution time of a function """
+    log_method = "info"
+    log_func = getattr(_logger, log_method)
+
     @wraps(func)
     def timeit_wrapper(*args, **kwargs):
         func.total_calls = getattr(func, "total_calls", 0) + 1
+        _repr = func.__name__
+        if len(args) >= 1 and hasattr(args[0], "__name__"):
+            _repr = f"{args[0].__name__}.{_repr}"
+
+        if len(args) >= 3 and isinstance(args[2], dict):
+            for param in ("branch", "dataset"):
+                if param in args[2]:
+                    _repr = f"{_repr} ({param} {args[2][param]})"
+
         start_time = time.perf_counter()
         result = func(*args, **kwargs)
         end_time = time.perf_counter()
         total_time = end_time - start_time
         func.total_time = getattr(func, "total_time", 0) + total_time
-        _logger.info(f"{func.__name__} has been run {func.total_calls} times ({round_sig(func.total_time)} seconds)")
+        log_func(f"{_repr} has been run {func.total_calls} times ({round_sig(func.total_time)} seconds)")
         return result
+
     return timeit_wrapper
 
 
diff --git a/law.cfg b/law.cfg
index 75d32b48..b57cbda9 100644
--- a/law.cfg
+++ b/law.cfg
@@ -8,7 +8,7 @@ inherit: $CF_BASE/law.cfg
 
 columnflow.tasks.cms.external
 columnflow.tasks.cms.inference
-hbw.tasks.{inspection,ml,inference,postfit_plots,plotting,wrapper,union,optimization,corrections}
+hbw.tasks.{inspection,campaigns,ml,inference,postfit_plots,plotting,wrapper,union,optimization,corrections}
 
 
 

From 87dd19c8a371cb6c22dd53fa312853ce07e18a72 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Tue, 10 Dec 2024 11:52:46 +0100
Subject: [PATCH 06/28] cleanup and tests

---
 hbw/analysis/create_analysis.py |   3 +-
 hbw/scripts/test_config.py      |   1 -
 hbw/tasks/campaigns.py          |  10 +--
 hbw/util.py                     |  24 +++----
 tests/test_util.py              | 122 +++++++++++++++++++++++++++++++-
 5 files changed, 138 insertions(+), 22 deletions(-)

diff --git a/hbw/analysis/create_analysis.py b/hbw/analysis/create_analysis.py
index d2a49c1d..382bea48 100644
--- a/hbw/analysis/create_analysis.py
+++ b/hbw/analysis/create_analysis.py
@@ -111,8 +111,7 @@ def analysis_factory(configs: od.UniqueObjectIndex):
                     )
                 else:
                     logger.warning(
-                        f"Campaign used for {config_name} has been changed since last initialization."
-                        "Difference: \n",
+                        f"Campaign used for {config_name} is being reinitialized: \n",
                     )
                     cpn_task.run()
 
diff --git a/hbw/scripts/test_config.py b/hbw/scripts/test_config.py
index 0a40ff1e..a5763e97 100644
--- a/hbw/scripts/test_config.py
+++ b/hbw/scripts/test_config.py
@@ -9,7 +9,6 @@
 
 default_analysis = law.config.get_expanded("analysis", "default_analysis")
 default_config = law.config.get_expanded("analysis", "default_config")
-default_config = "c22uhhpost"
 analysis_inst = ana = AnalysisTask.get_analysis_inst(default_analysis)
 config_inst = cfg = ana.get_config(default_config)
 
diff --git a/hbw/tasks/campaigns.py b/hbw/tasks/campaigns.py
index c7132cb9..cccb0f14 100644
--- a/hbw/tasks/campaigns.py
+++ b/hbw/tasks/campaigns.py
@@ -146,9 +146,9 @@ def run(self):
         output = self.output()
 
         # cross check if the dataset summary did change
-        backup_dataset_summary = self.target("backup_dataset_summary.yaml")
-        if backup_dataset_summary.exists():
-            backup_dataset_summary = backup_dataset_summary.load(formatter="yaml")
+        backup_target = self.target("backup_dataset_summary.yaml")
+        if backup_target.exists():
+            backup_dataset_summary = backup_target.load(formatter="yaml")
             if backup_dataset_summary != self.dataset_summary:
                 from hbw.util import gather_dict_diff
                 logger.warning(
@@ -157,7 +157,7 @@ def run(self):
                 )
                 if self.recreate_backup_summary:
                     logger.warning("Recreating backup dataset summary")
-                    backup_dataset_summary.dump(self.dataset_summary, formatter="yaml")
+                    backup_target.dump(self.dataset_summary, formatter="yaml")
                 else:
                     logger.warning(
                         "Run the following command to recreate the backup dataset summary:\n"
@@ -165,7 +165,7 @@ def run(self):
                     )
         else:
             logger.warning("No backup dataset summary found, creating one now")
-            backup_dataset_summary.dump(self.dataset_summary, formatter="yaml")
+            backup_target.dump(self.dataset_summary, formatter="yaml")
 
         output["dataset_summary"].dump(self.dataset_summary, formatter="yaml")
         output["campaign_summary"].dump(self.campaign_summary, formatter="yaml")
diff --git a/hbw/util.py b/hbw/util.py
index 36e6c4dd..bdb998dc 100644
--- a/hbw/util.py
+++ b/hbw/util.py
@@ -280,33 +280,33 @@ def filter_unchanged_keys(d1: dict, d2: dict):
     return filtered if filtered else None
 
 
-def dict_diff_filtered(dict1: dict, dict2: dict):
+def dict_diff_filtered(old_dict: dict, new_dict: dict):
     """Return the differences between two dictionaries with nested filtering of unchanged keys."""
     diff = {}
 
     # Check keys present in either dict
-    all_keys = set(dict1.keys()).union(set(dict2.keys()))
+    all_keys = set(old_dict.keys()).union(set(new_dict.keys()))
 
     for key in all_keys:
-        if key in dict1 and key in dict2:
-            if isinstance(dict1[key], dict) and isinstance(dict2[key], dict):
+        if key in old_dict and key in new_dict:
+            if isinstance(old_dict[key], dict) and isinstance(new_dict[key], dict):
                 # Recur for nested dictionaries and get filtered diff
-                nested_diff = filter_unchanged_keys(dict1[key], dict2[key])
+                nested_diff = filter_unchanged_keys(old_dict[key], new_dict[key])
                 if nested_diff:
                     diff[key] = nested_diff
-            elif dict1[key] != dict2[key]:
-                diff[key] = {"old": dict1[key], "new": dict2[key]}
-        elif key in dict1:
-            diff[key] = {"old": dict1[key], "new": None}
+            elif old_dict[key] != new_dict[key]:
+                diff[key] = {"old": old_dict[key], "new": new_dict[key]}
+        elif key in old_dict:
+            diff[key] = {"old": old_dict[key], "new": None}
         else:
-            diff[key] = {"old": None, "new": dict2[key]}
+            diff[key] = {"old": None, "new": new_dict[key]}
 
     return diff
 
 
-def gather_dict_diff(dict1: dict, dict2: dict) -> str:
+def gather_dict_diff(old_dict: dict, new_dict: dict) -> str:
     """Gather the differences between two dictionaries and return them as a formatted string."""
-    diff = filter_unchanged_keys(dict1, dict2)
+    diff = filter_unchanged_keys(old_dict, new_dict)
     lines = []
 
     if not diff:
diff --git a/tests/test_util.py b/tests/test_util.py
index 537a42bb..85defa9c 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -8,7 +8,7 @@
 
 from columnflow.util import maybe_import
 
-from hbw.util import build_param_product, round_sig, dict_diff, four_vec, call_once_on_config
+from hbw.util import build_param_product, round_sig, dict_diff, four_vec, call_once_on_config, gather_dict_diff
 
 import order as od
 
@@ -16,7 +16,121 @@
 ak = maybe_import("awkward")
 
 
-class HbwUtilTest(unittest.TestCase):
+class TestDictDiff(unittest.TestCase):
+    def test_no_difference(self):
+        dict1 = {"name": "Alice", "age": 25}
+        dict2 = {"name": "Alice", "age": 25}
+        result = gather_dict_diff(dict1, dict2)
+        self.assertEqual(result, "✅ No differences found.")
+
+    def test_simple_modification(self):
+        dict1 = {"name": "Alice", "age": 25}
+        dict2 = {"name": "Alice", "age": 26}
+        result = gather_dict_diff(dict1, dict2)
+        expected_output = (
+            "🔄 Modified: age:\n"
+            "    - Old: 25\n"
+            "    - New: 26"
+        )
+        self.assertEqual(result, expected_output)
+
+    def test_addition(self):
+        dict1 = {"name": "Alice"}
+        dict2 = {"name": "Alice", "hobby": "cycling"}
+        result = gather_dict_diff(dict1, dict2)
+        expected_output = "🔹 Added: hobby: cycling"
+        self.assertEqual(result, expected_output)
+
+    def test_removal(self):
+        dict1 = {"name": "Alice", "hobby": "cycling"}
+        dict2 = {"name": "Alice"}
+        result = gather_dict_diff(dict1, dict2)
+        expected_output = "🔻 Removed: hobby: cycling"
+        self.assertEqual(result, expected_output)
+
+    def test_nested_modification(self):
+        dict1 = {
+            "name": "Alice",
+            "skills": {
+                "python": "intermediate",
+                "sql": "beginner"
+            }
+        }
+        dict2 = {
+            "name": "Alice",
+            "skills": {
+                "python": "advanced",
+                "sql": "beginner"
+            }
+        }
+        result = gather_dict_diff(dict1, dict2)
+        expected_output = (
+            "🔄 Modified: skills:\n"
+            "    🔄 Modified: python:\n"
+            "        - Old: intermediate\n"
+            "        - New: advanced"
+        )
+        self.assertEqual(result, expected_output)
+
+    def test_nested_addition(self):
+        dict1 = {
+            "name": "Alice",
+            "skills": {
+                "python": "intermediate"
+            }
+        }
+        dict2 = {
+            "name": "Alice",
+            "skills": {
+                "python": "intermediate",
+                "docker": "beginner"
+            }
+        }
+        result = gather_dict_diff(dict1, dict2)
+        expected_output = (
+            "🔄 Modified: skills:\n"
+            "    🔹 Added: docker: beginner"
+        )
+        self.assertEqual(result, expected_output)
+
+    def test_complex_diff(self):
+        dict1 = {
+            "name": "Alice",
+            "age": 25,
+            "skills": {
+                "python": "intermediate",
+                "sql": "beginner",
+            },
+        }
+        dict2 = {
+            "name": "Alice",
+            "age": 26,
+            "skills": {
+                "python": "advanced",
+                "sql": "beginner",
+                "docker": "beginner",
+            },
+            "hobby": "cycling",
+        }
+        result = gather_dict_diff(dict1, dict2)
+        expected_output = (
+            "🔄 Modified: age:\n"
+            "    - Old: 25\n"
+            "    - New: 26\n"
+            "🔄 Modified: skills:\n"
+            "    🔄 Modified: python:\n"
+            "        - Old: intermediate\n"
+            "        - New: advanced\n"
+            "    🔹 Added: docker: beginner\n"
+            "🔹 Added: hobby: cycling"
+        )
+        self.assertEqual(result, expected_output)
+
+
+class HbwUtilTest(
+    TestDictDiff,
+    unittest.TestCase,
+):
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -97,3 +211,7 @@ def some_config_function(config: od.Config) -> str:
 
         # on second call, function should not be called -> returns None
         self.assertEqual(some_config_function(self.config_inst), None)
+
+
+if __name__ == "__main__":
+    unittest.main()

From 260bb915f6dc0ba07185e754448318b99911f2d7 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Tue, 10 Dec 2024 14:11:27 +0100
Subject: [PATCH 07/28] require existence of campaign before running analysis

---
 hbw/analysis/create_analysis.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/hbw/analysis/create_analysis.py b/hbw/analysis/create_analysis.py
index 382bea48..4c7d19fc 100644
--- a/hbw/analysis/create_analysis.py
+++ b/hbw/analysis/create_analysis.py
@@ -110,10 +110,11 @@ def analysis_factory(configs: od.UniqueObjectIndex):
                         f"law run {cpn_task.task_family} --config {config_name} --remove-output 0,a,y",
                     )
                 else:
-                    logger.warning(
-                        f"Campaign used for {config_name} is being reinitialized: \n",
+                    raise ValueError(
+                        f"Campaign used for {config_name} is not yet initialized; to initialize, run: \n",
+                        f"law run {cpn_task.task_family} --config {config_name} --remove-output 0,a,y",
                     )
-                    cpn_task.run()
+                    # cpn_task.run()
 
                 hbw_campaign_inst = cpn_task.output()["hbw_campaign_inst"].load(formatter="pickle")
                 return add_config(

From 07aa1b627f7131d772845a891d6680f49b4555e6 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Tue, 10 Dec 2024 14:12:23 +0100
Subject: [PATCH 08/28] enable usage of uhh campaigns

---
 hbw/config/datasets.py | 43 ++++++++++++++++++++++++++++++++++++++++++
 hbw/config/styling.py  |  2 +-
 hbw/tasks/campaigns.py |  2 ++
 law.cfg                | 36 ++++++++++++++++++++++++++++++++++-
 4 files changed, 81 insertions(+), 2 deletions(-)

diff --git a/hbw/config/datasets.py b/hbw/config/datasets.py
index 431a1a32..a9193479 100644
--- a/hbw/config/datasets.py
+++ b/hbw/config/datasets.py
@@ -397,6 +397,9 @@ def configure_hbw_datasets(
     limit_dataset_files: int | None = None,
     add_dataset_extensions: bool = False,
 ):
+    # allow usage of UHH campaign
+    enable_uhh_campaign_usage(config)
+
     for dataset in config.datasets:
         if add_dataset_extensions:
             add_dataset_extension_to_nominal(dataset)
@@ -541,3 +544,43 @@ def get_dataset_lfns_2017(
         lfn_base.child(basename, type="f").path
         for basename in lfn_base.listdir(pattern="*.root")
     ]
+
+
+def enable_uhh_campaign_usage(cfg: od.Config) -> None:
+    # custom lfn retrieval method in case the underlying campaign is custom uhh
+    def get_dataset_lfns_uhh(
+        dataset_inst: od.Dataset,
+        shift_inst: od.Shift,
+        dataset_key: str,
+    ) -> list[str]:
+        if "uhh" not in dataset_inst.x("campaign", ""):
+            # for non-uhh datasets, use default GetDatasetLFNs method
+            return GetDatasetLFNs.get_dataset_lfns_dasgoclient(
+                GetDatasetLFNs, dataset_inst=dataset_inst, shift_inst=shift_inst, dataset_key=dataset_key,
+            )
+        cpn_name = dataset_inst.x.campaign
+        # destructure dataset_key into parts and create the lfn base directory
+        dataset_id, full_campaign, tier = dataset_key.split("/")[1:]
+        main_campaign, sub_campaign = full_campaign.split("-", 1)
+        lfn_base = law.wlcg.WLCGDirectoryTarget(
+            f"/store/{dataset_inst.data_source}/{main_campaign}/{dataset_id}/{tier}/{sub_campaign}/0",
+            # fs=f"wlcg_fs_{cfg.campaign.x.custom['name']}",
+            fs=f"wlcg_fs_{cpn_name}",
+        )
+
+        # loop though files and interpret paths as lfns
+        return [
+            lfn_base.child(basename, type="f").path
+            for basename in lfn_base.listdir(pattern="*.root")
+        ]
+
+    if any("uhh" in cpn_name for cpn_name in cfg.campaign.x("campaigns", [])):
+        # define the lfn retrieval function
+        cfg.x.get_dataset_lfns = get_dataset_lfns_uhh
+
+        # define custom remote fs's to look at
+        cfg.x.get_dataset_lfns_remote_fs = lambda dataset_inst: (
+            None if "uhh" not in dataset_inst.x("campaign", "") else [
+                f"local_fs_{dataset_inst.x.campaign}",
+                f"wlcg_fs_{dataset_inst.x.campaign}",
+            ])
diff --git a/hbw/config/styling.py b/hbw/config/styling.py
index 318f7db4..9075979f 100644
--- a/hbw/config/styling.py
+++ b/hbw/config/styling.py
@@ -82,7 +82,7 @@
     "dy_m50toinf": color_palette["yellow"],
     "dy_m10to50": color_palette["brown"],
     "dy_m4to10": color_palette["darkgrey"],
-    "ttV": color_palette["brown"],
+    "ttv": color_palette["brown"],
     "vv": color_palette["blue"],
     "other": color_palette["grey"],
     "hh_ggf_hbb_htt": color_palette["grey"],
diff --git a/hbw/tasks/campaigns.py b/hbw/tasks/campaigns.py
index cccb0f14..8c9b9ca7 100644
--- a/hbw/tasks/campaigns.py
+++ b/hbw/tasks/campaigns.py
@@ -137,6 +137,8 @@ def get_custom_campaign(self):
                 dataset_inst.x.campaign = campaign_inst.name
                 hbw_campaign_inst.add_dataset(dataset_inst)
 
+        hbw_campaign_inst.x.campaigns = list(self.campaigns)
+
         return hbw_campaign_inst
 
     from hbw.util import timeit_multiple
diff --git a/law.cfg b/law.cfg
index b57cbda9..ef9ce8c0 100644
--- a/law.cfg
+++ b/law.cfg
@@ -73,7 +73,7 @@ check_overlapping_inputs: None
 [outputs]
 
 # list of all used file systems
-wlcg_file_systems: wlcg_fs, wlcg_fs_desy, wlcg_fs_cernbox, wlcg_fs_desy_store, wlcg_fs_infn_redirector, wlcg_fs_global_redirector
+wlcg_file_systems: wlcg_fs, wlcg_fs_desy, wlcg_fs_cernbox, wlcg_fs_desy_store, wlcg_fs_infn_redirector, wlcg_fs_global_redirector, wlcg_fs_run3_2022_preEE_nano_uhh_v12, wlcg_fs_run3_2022_postEE_nano_uhh_v12
 
 # list of file systems used by columnflow.tasks.external.GetDatasetLFNs.iter_nano_files to
 # look for the correct fs per nano input file (in that order)
@@ -259,6 +259,40 @@ gsiftp_base: gsiftp://dcache-door-cms04.desy.de:2811/pnfs/desy.de/cms/tier2/stor
 base: &::gsiftp_base
 
 
+[wlcg_fs_run3_2022_preEE_nano_uhh_v12]
+
+webdav_base: davs://dcache-cms-webdav-wan.desy.de:2880/pnfs/desy.de/cms/tier2/store/user/nprouvos/nanogen_store/MergeNano/config_22pre_v12/prod3
+gsiftp_base: gsiftp://dcache-door-cms04.desy.de:2811/pnfs/desy.de/cms/tier2/store/user/nprouvos/nanogen_store/MergeNano/config_22pre_v12/prod3
+xrootd_base: root://dcache-cms-xrootd.desy.de:1094/pnfs/desy.de/cms/tier2/store/user/nprouvos/nanogen_store/MergeNano/config_22pre_v12/prod3
+base: &::xrootd_base
+use_cache: $CF_WLCG_USE_CACHE
+cache_root: $CF_WLCG_CACHE_ROOT
+cache_cleanup: $CF_WLCG_CACHE_CLEANUP
+cache_max_size: 15GB
+cache_global_lock: True
+
+[local_fs_run3_2022_preEE_nano_uhh_v12]
+
+base: file:///pnfs/desy.de/cms/tier2/store/user/nprouvos/nanogen_store/MergeNano/config_22pre_v12/prod3
+
+
+[wlcg_fs_run3_2022_postEE_nano_uhh_v12]
+
+webdav_base: davs://dcache-cms-webdav-wan.desy.de:2880/pnfs/desy.de/cms/tier2/store/user/aalvesan/nanogen_store/MergeNano/config_22post_v12/prod1
+gsiftp_base: gsiftp://dcache-door-cms04.desy.de:2811/pnfs/desy.de/cms/tier2/store/user/aalvesan/nanogen_store/MergeNano/config_22post_v12/prod1
+xrootd_base: root://dcache-cms-xrootd.desy.de:1094/pnfs/desy.de/cms/tier2/store/user/aalvesan/nanogen_store/MergeNano/config_22post_v12/prod1
+base: &::xrootd_base
+use_cache: $CF_WLCG_USE_CACHE
+cache_root: $CF_WLCG_CACHE_ROOT
+cache_cleanup: $CF_WLCG_CACHE_CLEANUP
+cache_max_size: 15GB
+cache_global_lock: True
+
+[local_fs_run3_2022_postEE_nano_uhh_v12]
+
+base: file:///pnfs/desy.de/cms/tier2/store/user/aalvesan/nanogen_store/MergeNano/config_22post_v12/prod1
+
+
 
 [luigi_resources]
 

From 580e9b4408ee79ffb3289cbccfc5845d2560aaa5 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Thu, 12 Dec 2024 09:27:18 +0100
Subject: [PATCH 09/28] allow string version for CSPs

---
 hbw/columnflow_patches.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hbw/columnflow_patches.py b/hbw/columnflow_patches.py
index 1724d0ee..06ba7c5d 100644
--- a/hbw/columnflow_patches.py
+++ b/hbw/columnflow_patches.py
@@ -86,8 +86,8 @@ def patch_csp_versioning():
 
     def TaskArrayFunction_str(self):
         version = self.version() if callable(getattr(self, "version", None)) else getattr(self, "version", None)
-        if version and not isinstance(version, int):
-            raise Exception(f"version must be an integer, but is {version}")
+        if version and not isinstance(version, (int, str)):
+            raise Exception(f"version must be an integer or string, but is {version} ({type(version)})")
         version_str = f"V{version}" if version is not None else ""
         return f"{self.cls_name}{version_str}"
 

From 65b26bd9f59857a4d7cdf52401d4c0d745a0db93 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Thu, 12 Dec 2024 10:44:21 +0100
Subject: [PATCH 10/28] cleanup in variables (start count with 0)

---
 hbw/config/styling.py   |   6 +-
 hbw/config/variables.py | 196 ++++++++++++++++++++++++----------------
 2 files changed, 120 insertions(+), 82 deletions(-)

diff --git a/hbw/config/styling.py b/hbw/config/styling.py
index 9075979f..4f248c91 100644
--- a/hbw/config/styling.py
+++ b/hbw/config/styling.py
@@ -82,7 +82,7 @@
     "dy_m50toinf": color_palette["yellow"],
     "dy_m10to50": color_palette["brown"],
     "dy_m4to10": color_palette["darkgrey"],
-    "ttv": color_palette["brown"],
+    "ttv": color_palette["turqoise"],
     "vv": color_palette["blue"],
     "other": color_palette["grey"],
     "hh_ggf_hbb_htt": color_palette["grey"],
@@ -292,10 +292,10 @@ def quick_addvar(config: od.Config, obj: str, i: int, var: str):
     object (starting at 1) and `var` is the variable of interest; example: cf_loosejet1_pt
     """
     config.add_variable(
-        name=name.format(obj=obj, i=i + 1, var=var).lower(),
+        name=name.format(obj=obj, i=i, var=var).lower(),
         expression=expr.format(obj=obj, i=i, var=var),
         null_value=EMPTY_FLOAT,
         binning=default_var_binning[var],
         unit=default_var_unit.get(var, "1"),
-        x_title=x_title_base.format(obj=obj, i=i + 1) + default_var_title_format.get(var, var),
+        x_title=x_title_base.format(obj=obj, i=i) + default_var_title_format.get(var, var),
     )
diff --git a/hbw/config/variables.py b/hbw/config/variables.py
index 454e7888..34ea1dca 100644
--- a/hbw/config/variables.py
+++ b/hbw/config/variables.py
@@ -119,11 +119,11 @@ def add_feature_variables(config: od.Config) -> None:
     # FatJet features
     for i in range(2):
         config.add_variable(
-            name=f"fatjet{i+1}_tau21",
+            name=f"fatjet{i}_tau21",
             expression=f"FatJet.tau21[:,{i}]",
             null_value=EMPTY_FLOAT,
             binning=(40, 0, 1),
-            x_title=r"FatJet %i $\tau_{21}$" % (i + 1),
+            x_title=r"FatJet %i $\tau_{21}$" % i,
         )
 
 
@@ -375,31 +375,33 @@ def add_variables(config: od.Config) -> None:
         x_title="Number of jets",
         discrete_x=True,
     )
-    deepjet_wps = config.x.btag_working_points.deepjet
-    config.add_variable(
-        name="n_deepjet_loose",
-        expression=lambda events: ak.sum(events.Jet.btagDeepFlavB > deepjet_wps.loose, axis=1),
-        aux={"inputs": {"Jet.pt", "Jet.btagDeepFlavB"}},
-        binning=(7, -0.5, 6.5),
-        x_title="Number of deepjets (loose WP)",
-        discrete_x=True,
-    )
-    config.add_variable(
-        name="n_deepjet_medium",
-        expression=lambda events: ak.sum(events.Jet.btagDeepFlavB > deepjet_wps.medium, axis=1),
-        aux={"inputs": {"Jet.pt", "Jet.btagDeepFlavB"}},
-        binning=(7, -0.5, 6.5),
-        x_title="Number of deepjets (medium WP)",
-        discrete_x=True,
-    )
-    config.add_variable(
-        name="n_deepjet_tight",
-        expression=lambda events: ak.sum(events.Jet.btagDeepFlavB > deepjet_wps.tight, axis=1),
-        aux={"inputs": {"Jet.pt", "Jet.btagDeepFlavB"}},
-        binning=(7, -0.5, 6.5),
-        x_title="Number of deepjets (tight WP)",
-        discrete_x=True,
-    )
+
+    if config.x.run == 2:
+        deepjet_wps = config.x.btag_working_points.deepjet
+        config.add_variable(
+            name="n_deepjet_loose",
+            expression=lambda events: ak.sum(events.Jet.btagDeepFlavB > deepjet_wps.loose, axis=1),
+            aux={"inputs": {"Jet.pt", "Jet.btagDeepFlavB"}},
+            binning=(7, -0.5, 6.5),
+            x_title="Number of deepjets (loose WP)",
+            discrete_x=True,
+        )
+        config.add_variable(
+            name="n_deepjet_medium",
+            expression=lambda events: ak.sum(events.Jet.btagDeepFlavB > deepjet_wps.medium, axis=1),
+            aux={"inputs": {"Jet.pt", "Jet.btagDeepFlavB"}},
+            binning=(7, -0.5, 6.5),
+            x_title="Number of deepjets (medium WP)",
+            discrete_x=True,
+        )
+        config.add_variable(
+            name="n_deepjet_tight",
+            expression=lambda events: ak.sum(events.Jet.btagDeepFlavB > deepjet_wps.tight, axis=1),
+            aux={"inputs": {"Jet.pt", "Jet.btagDeepFlavB"}},
+            binning=(7, -0.5, 6.5),
+            x_title="Number of deepjets (tight WP)",
+            discrete_x=True,
+        )
     if config.x.run == 3:
         particlenet_wps = config.x.btag_working_points.particlenet
         config.add_variable(
@@ -508,169 +510,205 @@ def add_variables(config: od.Config) -> None:
     # Jets (4 pt-leading jets)
     for i in range(4):
         config.add_variable(
-            name=f"jet{i+1}_pt",
+            name=f"jet{i}_pt",
             expression=f"Jet.pt[:,{i}]",
             null_value=EMPTY_FLOAT,
             binning=(40, 0., 400.),
             unit="GeV",
-            x_title=r"Jet %i $p_{T}$" % (i + 1),
+            x_title=r"Jet %i $p_{T}$" % i,
         )
         config.add_variable(
-            name=f"jet{i+1}_eta",
+            name=f"jet{i}_eta",
             expression=f"Jet.eta[:,{i}]",
             null_value=EMPTY_FLOAT,
             binning=(50, -2.5, 2.5),
-            x_title=r"Jet %i $\eta$" % (i + 1),
+            x_title=r"Jet %i $\eta$" % i,
         )
         config.add_variable(
-            name=f"jet{i+1}_phi",
+            name=f"jet{i}_phi",
             expression=f"Jet.phi[:,{i}]",
             null_value=EMPTY_FLOAT,
             binning=(40, -3.2, 3.2),
-            x_title=r"Jet %i $\phi$" % (i + 1),
+            x_title=r"Jet %i $\phi$" % i,
         )
         config.add_variable(
-            name=f"jet{i+1}_mass",
+            name=f"jet{i}_mass",
             expression=f"Jet.mass[:,{i}]",
             null_value=EMPTY_FLOAT,
             binning=(40, 0, 200),
             unit="GeV",
-            x_title=r"Jet %i mass" % (i + 1),
-        )
-        # config.add_variable(
-        #     name=f"jet{i+1}_btagDeepB",
-        #     expression=f"Jet.btagDeepB[:,{i}]",
-        #     null_value=EMPTY_FLOAT,
-        #     binning=(40, 0, 1),
-        #     x_title=r"Jet %i DeepCSV b+bb tag" % (i + 1),
-        # )
-        config.add_variable(
-            name=f"jet{i+1}_btagDeepFlavB",
-            expression=f"Jet.btagDeepFlavB[:,{i}]",
-            null_value=EMPTY_FLOAT,
-            binning=(40, 0, 1),
-            x_title=r"Jet %i DeepFlavour b+bb+lepb tag" % (i + 1),
+            x_title=r"Jet %i mass" % i,
         )
+        if config.x.run == 2:
+            config.add_variable(
+                name=f"jet{i}_btagDeepFlavB",
+                expression=f"Jet.btagDeepFlavB[:,{i}]",
+                null_value=EMPTY_FLOAT,
+                binning=(40, 0, 1),
+                x_title=r"Jet %i DeepFlavour b+bb+lepb tag" % i,
+            )
         if config.x.run == 3:
             config.add_variable(
-                name=f"jet{i+1}_btagPNetB",
+                name=f"jet{i}_btagPNetB",
                 expression=f"Jet.btagPNetB[:,{i}]",
                 null_value=EMPTY_FLOAT,
                 binning=(40, 0, 1),
-                x_title=r"Jet %i ParticleNet score" % (i + 1),
+                x_title=r"Jet %i ParticleNet score" % i,
             )
 
     # Bjets (2 b-score leading jets) and Lightjets (2 non-b pt-leading jets)
     for i in range(2):
         for obj in ["Bjet", "Lightjet"]:
             config.add_variable(
-                name=f"{obj}{i+1}_pt".lower(),
+                name=f"{obj}{i}_pt".lower(),
                 expression=f"{obj}.pt[:,{i}]",
                 null_value=EMPTY_FLOAT,
                 binning=(40, 0., 300.),
                 unit="GeV",
-                x_title=obj + r" %i $p_{T}$" % (i + 1),
+                x_title=obj + r" %i $p_{T}$" % i,
             )
             config.add_variable(
-                name=f"{obj}{i+1}_eta".lower(),
+                name=f"{obj}{i}_eta".lower(),
                 expression=f"{obj}.eta[:,{i}]",
                 null_value=EMPTY_FLOAT,
                 binning=(50, -2.5, 2.5),
-                x_title=obj + r" %i $\eta$" % (i + 1),
+                x_title=obj + r" %i $\eta$" % i,
             )
             config.add_variable(
-                name=f"{obj}{i+1}_phi".lower(),
+                name=f"{obj}{i}_phi".lower(),
                 expression=f"{obj}.phi[:,{i}]",
                 null_value=EMPTY_FLOAT,
                 binning=(40, -3.2, 3.2),
-                x_title=obj + r" %i $\phi$" % (i + 1),
+                x_title=obj + r" %i $\phi$" % i,
             )
             config.add_variable(
-                name=f"{obj}{i+1}_mass".lower(),
+                name=f"{obj}{i}_mass".lower(),
                 expression=f"{obj}.mass[:,{i}]",
                 null_value=EMPTY_FLOAT,
                 binning=(40, 0, 200),
-                x_title=obj + r" %i mass" % (i + 1),
+                x_title=obj + r" %i mass" % i,
             )
             if config.x.run == 3:
                 config.add_variable(
-                    name=f"{obj}{i+1}_btagPNetB",
+                    name=f"{obj}{i}_btagPNetB",
                     expression=f"{obj}.btagPNetB[:,{i}]",
                     null_value=EMPTY_FLOAT,
                     binning=(40, 0, 1),
-                    x_title=obj + r" %i ParticleNet score" % (i + 1),
+                    x_title=obj + r" %i ParticleNet score" % i,
                 )
 
     # FatJets (2 pt-leading fatjets)
     for i in range(2):
         config.add_variable(
-            name=f"fatjet{i+1}_pt",
+            name=f"fatjet{i}_pt",
             expression=f"FatJet.pt[:,{i}]",
             null_value=EMPTY_FLOAT,
             binning=(40, 170., 500.),
             unit="GeV",
-            x_title=r"FatJet %i $p_{T}$" % (i + 1),
+            x_title=r"FatJet %i $p_{T}$" % i,
         )
         config.add_variable(
-            name=f"fatjet{i+1}_eta",
+            name=f"fatjet{i}_eta",
             expression=f"FatJet.eta[:,{i}]",
             null_value=EMPTY_FLOAT,
             binning=(50, -2.5, 2.5),
-            x_title=r"FatJet %i $\eta$" % (i + 1),
+            x_title=r"FatJet %i $\eta$" % i,
         )
         config.add_variable(
-            name=f"fatjet{i+1}_phi",
+            name=f"fatjet{i}_phi",
             expression=f"FatJet.phi[:,{i}]",
             null_value=EMPTY_FLOAT,
             binning=(40, -3.2, 3.2),
-            x_title=r"FatJet %i $\phi$" % (i + 1),
+            x_title=r"FatJet %i $\phi$" % i,
         )
         config.add_variable(
-            name=f"fatjet{i+1}_mass",
+            name=f"fatjet{i}_mass",
             expression=f"FatJet.mass[:,{i}]",
             null_value=EMPTY_FLOAT,
             binning=(40, 0, 250),
             unit="GeV",
-            x_title=r"FatJet %i mass" % (i + 1),
+            x_title=r"FatJet %i mass" % i,
         )
         config.add_variable(
-            name=f"fatjet{i+1}_msoftdrop",
+            name=f"fatjet{i}_msoftdrop",
             expression=f"FatJet.msoftdrop[:,{i}]",
             null_value=EMPTY_FLOAT,
             binning=(40, 0, 250),
             unit="GeV",
-            x_title=r"FatJet %i softdrop mass" % (i + 1),
+            x_title=r"FatJet %i softdrop mass" % i,
         )
         config.add_variable(
-            name=f"fatjet{i+1}_tau1",
+            name=f"fatjet{i}_tau1",
             expression=f"FatJet.tau1[:,{i}]",
             null_value=EMPTY_FLOAT,
             binning=(40, 0, 1),
-            x_title=r"FatJet %i $\tau_1$" % (i + 1),
+            x_title=r"FatJet %i $\tau_1$" % i,
         )
         config.add_variable(
-            name=f"fatjet{i+1}_tau2",
+            name=f"fatjet{i}_tau2",
             expression=f"FatJet.tau2[:,{i}]",
             null_value=EMPTY_FLOAT,
             binning=(40, 0, 1),
-            x_title=r"FatJet %i $\tau_2$" % (i + 1),
+            x_title=r"FatJet %i $\tau_2$" % i,
         )
         config.add_variable(
-            name=f"fatjet{i+1}_btagHbb",
+            name=f"fatjet{i}_btagHbb",
             expression=f"FatJet.btagHbb[:,{i}]",
             null_value=EMPTY_FLOAT,
             binning=(40, 0, 1),
-            x_title=r"FatJet %i btagHbb" % (i + 1),
+            x_title=r"FatJet %i btagHbb" % i,
         )
         config.add_variable(
-            name=f"fatjet{i+1}_deepTagMD_HbbvsQCD",
+            name=f"fatjet{i}_deepTagMD_HbbvsQCD",
             expression=f"FatJet.deepTagMD_HbbvsQCD[:,{i}]",
             null_value=EMPTY_FLOAT,
             binning=(40, 0, 1),
-            x_title=r"FatJet %i deepTagMD_HbbvsQCD " % (i + 1),
+            x_title=r"FatJet %i deepTagMD_HbbvsQCD " % i,
         )
 
     # Leptons
+    for i in range(2):
+        config.add_variable(
+            name=f"lepton{i}_pt",
+            expression="Lepton[:, i].pt",
+            aux=dict(
+                inputs={"{Electron,Muon}.{pt,eta,phi,mass}"},
+            ),
+            binning=(40, 0., 400.),
+            unit="GeV",
+            null_value=EMPTY_FLOAT,
+        )
+        config.add_variable(
+            name=f"lepton{i}_eta",
+            expression="Lepton[:, i].eta",
+            aux=dict(
+                inputs={"{Electron,Muon}.{pt,eta,phi,mass}"},
+            ),
+            binning=(40, -3.2, 3.2),
+            unit="GeV",
+            null_value=EMPTY_FLOAT,
+        )
+        config.add_variable(
+            name=f"lepton{i}_phi",
+            expression="Lepton[:, i].phi",
+            aux=dict(
+                inputs={"{Electron,Muon}.{pt,eta,phi,mass}"},
+            ),
+            binning=(50, -2.5, 2.5),
+            unit="GeV",
+            null_value=EMPTY_FLOAT,
+        )
+        config.add_variable(
+            name=f"lepton{i}_mass",
+            expression="Lepton[:, i].mass",
+            aux=dict(
+                inputs={"{Electron,Muon}.{pt,eta,phi,mass}"},
+            ),
+            binning=(40, 0., 400.),
+            unit="GeV",
+            null_value=EMPTY_FLOAT,
+        )
+
     for obj in ["Electron", "Muon"]:
         config.add_variable(
             name=f"{obj.lower()}_pt",

From f15ccd38fcc093edc19ae4a68962834bab4ea7a7 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Thu, 12 Dec 2024 12:39:12 +0100
Subject: [PATCH 11/28] update variable and process groups

---
 hbw/config/defaults_and_groups.py | 19 ++++++++++++++-----
 hbw/util.py                       | 30 ++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/hbw/config/defaults_and_groups.py b/hbw/config/defaults_and_groups.py
index d3351dc9..7852e34e 100644
--- a/hbw/config/defaults_and_groups.py
+++ b/hbw/config/defaults_and_groups.py
@@ -4,6 +4,7 @@
 
 from columnflow.inference import InferenceModel
 from columnflow.tasks.framework.base import RESOLVE_DEFAULT
+from hbw.util import bracket_expansion
 
 
 def default_calibrator(container):
@@ -134,9 +135,10 @@ def set_config_defaults_and_groups(config_inst):
         "much": ["hh_ggf_hbb_hvv_kl1_kt1", "hh_vbf_hbb_hvv_kv1_k2v1_kl1", "tt", "qcd", "st", "dy", "vv", "w_lnu", "h"],  # noqa: E501
         "ech": ["hh_ggf_hbb_hvv_kl1_kt1", "hh_vbf_hbb_hvv_kv1_k2v1_kl1", "tt", "qcd", "st", "dy", "vv", "w_lnu", "h"],  # noqa: E501
         "dl": ["hh_ggf_hbb_hvv_kl1_kt1", "hh_vbf_hbb_hvv_kv1_k2v1_kl1", "tt", "dy", "st", "vv", "w_lnu", "h"],  # noqa: E501
-        "dl1": [default_signal_process, "tt", "dy", "st", "vv", "w_lnu", "h"],
-        "dl2": [default_signal_process, "tt", "dy_m4to10", "dy_m10to50", "dy_m50toinf", "st", "vv", "w_lnu", "h"],  # noqa: E501
-        "dlbkg": ["tt", "dy_m4to10", "dy_m10to50", "dy_m50toinf", "st", "vv", "w_lnu", "h"],
+        "dl1": [default_signal_process, "tt", "dy", "st", "ttv", "vv", "w_lnu", "h"],
+        "dl2": [default_signal_process, "tt", "dy_m4to10", "dy_m10to50", "dy_m50toinf", "st", "ttv", "vv", "w_lnu", "h"],  # noqa: E501
+        "dl3": [default_signal_process, "tt", "dy_m10to50", "dy_m50toinf", "st", "ttv", "vv", "w_lnu", "h"],  # noqa: E501
+        "dlbkg": ["tt", "dy_m4to10", "dy_m10to50", "dy_m50toinf", "st", "ttv", "vv", "w_lnu", "h"],
         "dlmajor": [default_signal_process, "tt", "dy", "st"],
         "2much": [default_signal_process, "tt", "dy_m4to10", "dy_m10to50", "dy_m50toinf", "st", "vv", "w_lnu", "h"],
         "2ech": [default_signal_process, "tt", "dy_m4to10", "dy_m10to50", "dy_m50toinf", "st", "vv", "w_lnu", "h"],
@@ -186,7 +188,7 @@ def set_config_defaults_and_groups(config_inst):
         remove_generator = lambda x: x.replace("_powheg", "").replace("_madgraph", "").replace("_amcatnlo", "").replace("_pythia8", "").replace("4f_", "")  # noqa: E501
         config_inst.x.process_groups[f"datasets_{proc}"] = [remove_generator(dataset) for dataset in datasets]
 
-    for group in ("dl2", "dl1", "dl", "much", "2much", "ech", "2ech", "emuch"):
+    for group in ("dl3", "dl2", "dl1", "dl", "much", "2much", "ech", "2ech", "emuch"):
         # thanks to double counting removal, we can (and should) now use all datasets in each channel
         config_inst.x.process_groups[f"d{group}"] = ["data"] + config_inst.x.process_groups[group]
 
@@ -299,7 +301,14 @@ def set_config_defaults_and_groups(config_inst):
         "sl": ["n_*", "electron_*", "muon_*", "met_*", "jet*", "bjet*", "ht"],
         "sl_resolved": ["n_*", "electron_*", "muon_*", "met_*", "jet*", "bjet*", "ht"],
         "sl_boosted": ["n_*", "electron_*", "muon_*", "met_*", "fatjet_*"],
-        "dl": ["n_*", "electron_*", "muon_*", "met_*", "jet*", "bjet*", "ht", "lt", "mll", "ptll"],
+        "dl": bracket_expansion([
+            "n_{jet,bjet,electron,muon,fatjet,hbbjet}",
+            "lepton{0,1}_{pt,eta,phi}",
+            "met_{pt,phi}",
+            "jet{0,1,2,3}_{pt,eta,phi,mass,btagPNetB}",
+            "bjet{0,1}_{pt,eta,phi,mass,btagPNetB}",
+            "ht", "lt", "mll", "ptll",
+        ]),
         "dl_resolved": ["n_*", "electron_*", "muon_*", "met_*", "jet*", "bjet*", "ht", "lt", "mll", "ptll"],
         "dl_boosted": ["n_*", "electron_*", "muon_*", "met_*", "fatjet_*", "lt", "mll", "ptll"],
         "default": ["n_jet", "n_muon", "n_electron", "ht", "m_bb", "deltaR_bb", "jet1_pt"],  # n_deepjet, ....
diff --git a/hbw/util.py b/hbw/util.py
index bdb998dc..990f4da9 100644
--- a/hbw/util.py
+++ b/hbw/util.py
@@ -6,6 +6,8 @@
 
 from __future__ import annotations
 
+import re
+import itertools
 import time
 from typing import Hashable, Iterable, Callable
 from functools import wraps, reduce
@@ -378,6 +380,34 @@ def four_vec(
     return outp
 
 
+def bracket_expansion(inputs: list):
+    """
+    Expands a list of strings with bracket notation into all possible combinations.
+
+    Example:
+    bracket_expansion(["{Jet,Muon}.{pt,eta}", "{Electron,Photon}.{phi}"]) -->
+    {"Jet.pt", "Jet.eta", "Muon.pt", "Muon.eta", "Electron.phi", "Photon.phi"}
+
+    NOTE: similar implementation might be somewhere in columnflow.
+    """
+    pattern = re.compile(r'\{([^{}]+)\}')
+    outp = set()
+
+    for inp in inputs:
+        # Find all bracketed groups and extract options by splitting on ','
+        matches = pattern.findall(inp)
+        options = [match.split(',') for match in matches]
+
+        # Replace each bracketed group with a placeholder '{}'
+        template = pattern.sub('{}', inp)
+
+        # Generate all possible combinations and add to the output set
+        combinations = itertools.product(*options)
+        outp.update(template.format(*combo) for combo in combinations)
+
+    return sorted(outp)
+
+
 def has_four_vec(
     events: ak.Array,
     collection_name: str,

From 04ef6ad007eb885bd9c311ef33357714b959ad32 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Thu, 12 Dec 2024 12:40:34 +0100
Subject: [PATCH 12/28] cleanup in ml setup function

---
 hbw/ml/base.py       | 30 ++++++++++++++++++++++--------
 hbw/ml/derived/dl.py | 33 +++------------------------------
 2 files changed, 25 insertions(+), 38 deletions(-)

diff --git a/hbw/ml/base.py b/hbw/ml/base.py
index c1774524..90dfe5a6 100644
--- a/hbw/ml/base.py
+++ b/hbw/ml/base.py
@@ -21,6 +21,7 @@
 
 from hbw.util import log_memory
 from hbw.ml.data_loader import MLDatasetLoader, MLProcessData, input_features_sanity_checks
+from hbw.config.processes import create_combined_proc_forML
 
 from hbw.tasks.ml import MLPreTraining
 
@@ -186,24 +187,37 @@ def parameters_repr(self):
         self._parameters_repr = parameters_repr
         return self._parameters_repr
 
-    def setup(self):
+    def setup(self) -> None:
         """ function that is run as part of the setup phase. Most likely overwritten by subclasses """
-        logger.info(
-            f"Setting up MLModel {self.cls_name} (parameter hash: {self.parameters_repr})"
+        logger.debug(
+            f"Setting up MLModel {self.cls_name} (parameter hash: {self.parameters_repr}), "
             f"parameters: \n{self.parameters}",
         )
-        # dynamically add variables for the quantities produced by this model
-        # NOTE: since these variables are only used in ConfigTasks,
-        #       we do not need to add these variables to all configs
+        # dynamically add processes and variables for the quantities produced by this model
+        # NOTE: this function might not be called for all configs when the requested configs
+        # between MLTraining and the requested task are different
+        for proc in self.combine_processes:
+            if proc not in self.config_inst.processes:
+                proc_name = str(proc)
+                proc_dict = DotDict(self.combine_processes[proc])
+                create_combined_proc_forML(self.config_inst, proc_name, proc_dict)
+
         for proc in self.processes:
             for config_inst in self.config_insts:
                 if f"mlscore.{proc}" not in config_inst.variables:
                     config_inst.add_variable(
                         name=f"mlscore.{proc}",
+                        expression=f"mlscore.{proc}",
                         null_value=-1,
                         binning=(1000, 0., 1.),
                         x_title=f"DNN output score {config_inst.get_process(proc).x.ml_label}",
-                        aux={"rebin": 25},  # automatically rebin to 40 bins for plotting tasks
+                        aux={
+                            "rebin": 25,
+                            "rebin_config": {
+                                "processes": [proc],
+                                "n_bins": 4,
+                            }
+                        },  # automatically rebin to 40 bins for plotting tasks
                     )
 
     def preparation_producer(self: MLModel, analysis_inst: od.Analysis):
@@ -295,6 +309,7 @@ def output(self, task: law.Task) -> dict[str, law.FileSystemTarget]:
         outp = {
             "mlmodel": target,
             "plots": target.child("plots", type="d", optional=True),
+            # "dummy": target.child("dummy", type="d", optional=True),
             "checkpoint": target.child("checkpoint", type="d", optional=True),
         }
 
@@ -303,7 +318,6 @@ def output(self, task: law.Task) -> dict[str, law.FileSystemTarget]:
             target.child(fname, type="f") for fname in
             ("saved_model.pb", "keras_metadata.pb", "fingerprint.pb", "parameters.yaml", "input_features.pkl")
         ]
-
         return outp
 
     def open_model(self, target: law.LocalDirectoryTarget) -> dict[str, Any]:
diff --git a/hbw/ml/derived/dl.py b/hbw/ml/derived/dl.py
index 59b4a631..05791fe7 100644
--- a/hbw/ml/derived/dl.py
+++ b/hbw/ml/derived/dl.py
@@ -10,12 +10,11 @@
 
 import law
 
-from columnflow.util import maybe_import, DotDict
+from columnflow.util import maybe_import
 
 from hbw.ml.base import MLClassifierBase
 from hbw.ml.mixins import DenseModelMixin, ModelFitMixin
 
-from hbw.config.processes import create_combined_proc_forML
 
 np = maybe_import("numpy")
 ak = maybe_import("awkward")
@@ -135,34 +134,8 @@ def __init__(
     def cast_ml_param_values(self):
         super().cast_ml_param_values()
 
-    def setup(self):
-        # dynamically add variables for the quantities produced by this model
-        # NOTE: since these variables are only used in ConfigTasks,
-        #       we do not need to add these variables to all configs
-        for proc in self.combine_processes:
-            if proc not in self.config_inst.processes:
-                proc_name = str(proc)
-                proc_dict = DotDict(self.combine_processes[proc])
-                create_combined_proc_forML(self.config_inst, proc_name, proc_dict)
-
-        for proc in self.processes:
-            for config_inst in self.config_insts:
-                if f"mlscore.{proc}" not in config_inst.variables:
-                    config_inst.add_variable(
-                        name=f"mlscore.{proc}",
-                        expression=f"mlscore.{proc}",
-                        null_value=-1,
-                        binning=(1000, 0., 1.),
-                        x_title=f"DNN output score {config_inst.get_process(proc).x('ml_label', '')}",
-                        aux={"rebin": 40},
-                    )
-                    config_inst.add_variable(
-                        name=f"mlscore40.{proc}",
-                        expression=f"mlscore.{proc}",
-                        null_value=-1,
-                        binning=(40, 0., 1.),
-                        x_title=f"DNN output score {config_inst.get_process(proc).x('ml_label', '')}",
-                    )
+    def setup(self) -> None:
+        super().setup()
 
 
 #

From 0bceba32c32866b4ef9de04b8085dc272eaff623 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Fri, 13 Dec 2024 09:09:59 +0100
Subject: [PATCH 13/28] fix variables

---
 hbw/config/variables.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hbw/config/variables.py b/hbw/config/variables.py
index 34ea1dca..60d5071e 100644
--- a/hbw/config/variables.py
+++ b/hbw/config/variables.py
@@ -670,7 +670,7 @@ def add_variables(config: od.Config) -> None:
     for i in range(2):
         config.add_variable(
             name=f"lepton{i}_pt",
-            expression="Lepton[:, i].pt",
+            expression=f"Lepton[:, {i}].pt",
             aux=dict(
                 inputs={"{Electron,Muon}.{pt,eta,phi,mass}"},
             ),
@@ -680,7 +680,7 @@ def add_variables(config: od.Config) -> None:
         )
         config.add_variable(
             name=f"lepton{i}_eta",
-            expression="Lepton[:, i].eta",
+            expression=f"Lepton[:, {i}].eta",
             aux=dict(
                 inputs={"{Electron,Muon}.{pt,eta,phi,mass}"},
             ),
@@ -690,7 +690,7 @@ def add_variables(config: od.Config) -> None:
         )
         config.add_variable(
             name=f"lepton{i}_phi",
-            expression="Lepton[:, i].phi",
+            expression=f"Lepton[:, {i}].phi",
             aux=dict(
                 inputs={"{Electron,Muon}.{pt,eta,phi,mass}"},
             ),
@@ -700,7 +700,7 @@ def add_variables(config: od.Config) -> None:
         )
         config.add_variable(
             name=f"lepton{i}_mass",
-            expression="Lepton[:, i].mass",
+            expression=f"Lepton[:, {i}].mass",
             aux=dict(
                 inputs={"{Electron,Muon}.{pt,eta,phi,mass}"},
             ),

From 6ddf5515f6c28bc102e43d996628650e1db464b4 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Fri, 13 Dec 2024 09:10:36 +0100
Subject: [PATCH 14/28] extend timeit wrapper

---
 hbw/util.py | 49 ++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 46 insertions(+), 3 deletions(-)

diff --git a/hbw/util.py b/hbw/util.py
index 990f4da9..93b00cb7 100644
--- a/hbw/util.py
+++ b/hbw/util.py
@@ -463,13 +463,56 @@ def timeit_multiple(func):
     def timeit_wrapper(*args, **kwargs):
         func.total_calls = getattr(func, "total_calls", 0) + 1
         _repr = func.__name__
+
         if len(args) >= 1 and hasattr(args[0], "__name__"):
+            # some classmethod
             _repr = f"{args[0].__name__}.{_repr}"
 
-        if len(args) >= 3 and isinstance(args[2], dict):
+            if len(args) >= 2 and isinstance(args[1], dict):
+                params = args[1]
+            elif len(args) >= 3 and isinstance(args[2], dict):
+                params = args[2]
+            else:
+                params = {}
+
             for param in ("branch", "dataset"):
-                if param in args[2]:
-                    _repr = f"{_repr} ({param} {args[2][param]})"
+                if param in params:
+                    _repr = f"{_repr} ({param} {params[param]})"
+
+        elif len(args) >= 1 and hasattr(args[0], "cls_name"):
+            # probably a CSP function
+            inst = args[0]
+            params = {}
+            _repr = f"{inst.cls_name}.{_repr}"
+            if hasattr(inst, "config_inst"):
+                _repr = f"{_repr} ({inst.config_inst.name})"
+            if hasattr(inst, "dataset_inst"):
+                _repr = f"{_repr} ({inst.dataset_inst.name})"
+            if hasattr(inst, "shift_inst"):
+                _repr = f"{_repr} ({inst.shift_inst.name})"
+
+        start_time = time.perf_counter()
+        result = func(*args, **kwargs)
+        end_time = time.perf_counter()
+        total_time = end_time - start_time
+        func.total_time = getattr(func, "total_time", 0) + total_time
+        log_func(f"{_repr} has been run {func.total_calls} times ({round_sig(func.total_time)} seconds)")
+        return result
+
+    return timeit_wrapper
+
+
+def timeit_multiple_plain(func):
+    """ Wrapper to measure the number of execution calls and the added execution time of a function """
+    log_method = "info"
+    log_func = getattr(_logger, log_method)
+
+    @wraps(func)
+    def timeit_wrapper(*args, **kwargs):
+        func.total_calls = getattr(func, "total_calls", 0) + 1
+        _repr = func.__name__
+        if len(args) >= 1 and hasattr(args[0], "__name__"):
+            _repr = f"{args[0].__name__}.{_repr}"
 
         start_time = time.perf_counter()
         result = func(*args, **kwargs)

From 0feb3c6bbf9eb64d5faaf80adc0fd47f78648b6d Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Fri, 13 Dec 2024 09:10:55 +0100
Subject: [PATCH 15/28] add category group

---
 hbw/config/defaults_and_groups.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hbw/config/defaults_and_groups.py b/hbw/config/defaults_and_groups.py
index 7852e34e..d2d8897c 100644
--- a/hbw/config/defaults_and_groups.py
+++ b/hbw/config/defaults_and_groups.py
@@ -225,6 +225,7 @@ def set_config_defaults_and_groups(config_inst):
         "sl_much_boosted": ["sr__1mu__boosted"],
         "sl_ech_boosted": ["sr__1e__boosted"],
         "dl": ["sr", "dycr", "ttcr", "sr__1b", "sr__2b", "dycr__1b", "dycr__2b", "ttcr__1b", "ttcr__2b"],
+        "dl_preml": bracket_expansion(["incl", "{sr,ttcr,dycr}{,__2e,__2mu,__emu}{,__1b,__2b}"]),
         "dl_ttcr": ["ttcr", "ttcr__1b", "ttcr__2b", "ttcr__2e", "ttcr__2mu", "ttcr__emu"],
         "dl_dycr": ["dycr", "dycr__1b", "dycr__2b", "dycr__2e", "dycr__2mu", "dycr__emu"],
         "dl_sr": ["sr", "sr__1b", "sr__2b", "sr__2e", "sr__2mu", "sr__emu"],

From 8c85933ce077e31b44ba52ebeaa99e8043ad076a Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Fri, 13 Dec 2024 09:14:33 +0100
Subject: [PATCH 16/28] lint

---
 hbw/ml/base.py         |  2 +-
 hbw/ml/data_loader.py  |  2 +-
 hbw/tasks/campaigns.py |  2 +-
 hbw/util.py            | 10 +++++-----
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/hbw/ml/base.py b/hbw/ml/base.py
index 90dfe5a6..e03c3bd7 100644
--- a/hbw/ml/base.py
+++ b/hbw/ml/base.py
@@ -216,7 +216,7 @@ def setup(self) -> None:
                             "rebin_config": {
                                 "processes": [proc],
                                 "n_bins": 4,
-                            }
+                            },
                         },  # automatically rebin to 40 bins for plotting tasks
                     )
 
diff --git a/hbw/ml/data_loader.py b/hbw/ml/data_loader.py
index 445ccaba..9b6a1457 100644
--- a/hbw/ml/data_loader.py
+++ b/hbw/ml/data_loader.py
@@ -27,7 +27,7 @@ def get_proc_mask(
     """
     Creates the mask selecting events belonging to the process *proc* and a list of all ids belonging to this process.
 
-    :param events: Event array    
+    :param events: Event array
     :param proc: Either string or process instance.
     :param config_inst: An instance of the Config, can be None if Porcess instance is given.
     :return process mask and the corresponding process ids
diff --git a/hbw/tasks/campaigns.py b/hbw/tasks/campaigns.py
index 8c9b9ca7..62c36e74 100644
--- a/hbw/tasks/campaigns.py
+++ b/hbw/tasks/campaigns.py
@@ -115,7 +115,7 @@ def dataset_summary(self):
         return dict(dataset_summary)
 
     @cached_property
-    def campaign_summary(self,):
+    def campaign_summary(self):
         campaign_summary = {
             campaign.name: {} for campaign in self.campaign_insts
         }
diff --git a/hbw/util.py b/hbw/util.py
index 93b00cb7..67082e94 100644
--- a/hbw/util.py
+++ b/hbw/util.py
@@ -390,16 +390,16 @@ def bracket_expansion(inputs: list):
 
     NOTE: similar implementation might be somewhere in columnflow.
     """
-    pattern = re.compile(r'\{([^{}]+)\}')
+    pattern = re.compile(r"\{([^{}]+)\}")
     outp = set()
 
     for inp in inputs:
-        # Find all bracketed groups and extract options by splitting on ','
+        # Find all bracketed groups and extract options by splitting on ","
         matches = pattern.findall(inp)
-        options = [match.split(',') for match in matches]
+        options = [match.split(",") for match in matches]
 
-        # Replace each bracketed group with a placeholder '{}'
-        template = pattern.sub('{}', inp)
+        # Replace each bracketed group with a placeholder "{}"
+        template = pattern.sub("{}", inp)
 
         # Generate all possible combinations and add to the output set
         combinations = itertools.product(*options)

From d496971b00f26ef6f031c4126212048703cd5436 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Fri, 13 Dec 2024 09:27:37 +0100
Subject: [PATCH 17/28] lint again

---
 tests/test_util.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/test_util.py b/tests/test_util.py
index 85defa9c..d7a860c8 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -53,15 +53,15 @@ def test_nested_modification(self):
             "name": "Alice",
             "skills": {
                 "python": "intermediate",
-                "sql": "beginner"
-            }
+                "sql": "beginner",
+            },
         }
         dict2 = {
             "name": "Alice",
             "skills": {
                 "python": "advanced",
-                "sql": "beginner"
-            }
+                "sql": "beginner",
+            },
         }
         result = gather_dict_diff(dict1, dict2)
         expected_output = (
@@ -76,15 +76,15 @@ def test_nested_addition(self):
         dict1 = {
             "name": "Alice",
             "skills": {
-                "python": "intermediate"
-            }
+                "python": "intermediate",
+            },
         }
         dict2 = {
             "name": "Alice",
             "skills": {
                 "python": "intermediate",
-                "docker": "beginner"
-            }
+                "docker": "beginner",
+            },
         }
         result = gather_dict_diff(dict1, dict2)
         expected_output = (

From 1b57fed9e682eb3f6164ef5f001c4e0d2322aaf7 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Fri, 13 Dec 2024 09:37:44 +0100
Subject: [PATCH 18/28] loop over produced_columns instead of produces

---
 hbw/config/defaults_and_groups.py   |  4 ++--
 hbw/config/variables.py             | 10 +++++-----
 hbw/production/features.py          |  6 ++++--
 hbw/production/normalized_btag.py   |  6 ++++--
 hbw/production/resonant_features.py |  3 ++-
 5 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/hbw/config/defaults_and_groups.py b/hbw/config/defaults_and_groups.py
index d2d8897c..0443d24b 100644
--- a/hbw/config/defaults_and_groups.py
+++ b/hbw/config/defaults_and_groups.py
@@ -306,8 +306,8 @@ def set_config_defaults_and_groups(config_inst):
             "n_{jet,bjet,electron,muon,fatjet,hbbjet}",
             "lepton{0,1}_{pt,eta,phi}",
             "met_{pt,phi}",
-            "jet{0,1,2,3}_{pt,eta,phi,mass,btagPNetB}",
-            "bjet{0,1}_{pt,eta,phi,mass,btagPNetB}",
+            "jet{0,1,2,3}_{pt,eta,phi,mass,btagpnetb}",
+            "bjet{0,1}_{pt,eta,phi,mass,btagpnetb}",
             "ht", "lt", "mll", "ptll",
         ]),
         "dl_resolved": ["n_*", "electron_*", "muon_*", "met_*", "jet*", "bjet*", "ht", "lt", "mll", "ptll"],
diff --git a/hbw/config/variables.py b/hbw/config/variables.py
index 60d5071e..7fa7b1f8 100644
--- a/hbw/config/variables.py
+++ b/hbw/config/variables.py
@@ -541,7 +541,7 @@ def add_variables(config: od.Config) -> None:
         )
         if config.x.run == 2:
             config.add_variable(
-                name=f"jet{i}_btagDeepFlavB",
+                name=f"jet{i}_btagDeepFlavB".lower(),
                 expression=f"Jet.btagDeepFlavB[:,{i}]",
                 null_value=EMPTY_FLOAT,
                 binning=(40, 0, 1),
@@ -549,7 +549,7 @@ def add_variables(config: od.Config) -> None:
             )
         if config.x.run == 3:
             config.add_variable(
-                name=f"jet{i}_btagPNetB",
+                name=f"jet{i}_btagPNetB".lower(),
                 expression=f"Jet.btagPNetB[:,{i}]",
                 null_value=EMPTY_FLOAT,
                 binning=(40, 0, 1),
@@ -590,7 +590,7 @@ def add_variables(config: od.Config) -> None:
             )
             if config.x.run == 3:
                 config.add_variable(
-                    name=f"{obj}{i}_btagPNetB",
+                    name=f"{obj}{i}_btagPNetB".lower(),
                     expression=f"{obj}.btagPNetB[:,{i}]",
                     null_value=EMPTY_FLOAT,
                     binning=(40, 0, 1),
@@ -652,14 +652,14 @@ def add_variables(config: od.Config) -> None:
             x_title=r"FatJet %i $\tau_2$" % i,
         )
         config.add_variable(
-            name=f"fatjet{i}_btagHbb",
+            name=f"fatjet{i}_btagHbb".lower(),
             expression=f"FatJet.btagHbb[:,{i}]",
             null_value=EMPTY_FLOAT,
             binning=(40, 0, 1),
             x_title=r"FatJet %i btagHbb" % i,
         )
         config.add_variable(
-            name=f"fatjet{i}_deepTagMD_HbbvsQCD",
+            name=f"fatjet{i}_deepTagMD_HbbvsQCD".lower(),
             expression=f"FatJet.deepTagMD_HbbvsQCD[:,{i}]",
             null_value=EMPTY_FLOAT,
             binning=(40, 0, 1),
diff --git a/hbw/production/features.py b/hbw/production/features.py
index 7a23858b..f41dd275 100644
--- a/hbw/production/features.py
+++ b/hbw/production/features.py
@@ -37,7 +37,8 @@ def jj_features(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
     events = set_ak_column_f32(events, "deltaR_jj", deltaR_jj)
 
     # fill none values
-    for col in self.produces:
+    for route in self.produced_columns:
+        col = route.string_column
         events = set_ak_column_f32(events, col, ak.fill_none(events[col], EMPTY_FLOAT))
     return events
 
@@ -62,7 +63,8 @@ def bb_features(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
     events = set_ak_column_f32(events, "m_bb_combined", m_bb_combined)
 
     # fill none values
-    for col in self.produces:
+    for route in self.produced_columns:
+        col = route.string_column
         events = set_ak_column_f32(events, col, ak.fill_none(events[col], EMPTY_FLOAT))
 
     return events
diff --git a/hbw/production/normalized_btag.py b/hbw/production/normalized_btag.py
index 24743367..7c6bbad7 100644
--- a/hbw/production/normalized_btag.py
+++ b/hbw/production/normalized_btag.py
@@ -46,7 +46,8 @@ def normalized_btag_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Ar
             raise NotImplementedError(
                 f"Normalization mode {mode} not implemented (see hbw.tasks.corrections.GetBtagNormalizationSF)",
             )
-        for weight_name in self[btag_weights].produces:
+        for weight_route in self[btag_weights].produced_columns:
+            weight_name = weight_route.string_column
             if not weight_name.startswith("btag_weight"):
                 continue
 
@@ -66,7 +67,8 @@ def normalized_btag_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Ar
 
 @normalized_btag_weights.init
 def normalized_btag_weights_init(self: Producer) -> None:
-    for weight_name in self[btag_weights].produces:
+    for weight_route in self[btag_weights].produced_columns:
+        weight_name = weight_route.string_column
         if not weight_name.startswith("btag_weight"):
             continue
         for mode in self.modes:
diff --git a/hbw/production/resonant_features.py b/hbw/production/resonant_features.py
index 9fc5a169..d8aafe51 100644
--- a/hbw/production/resonant_features.py
+++ b/hbw/production/resonant_features.py
@@ -98,7 +98,8 @@ def resonant_features(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
     events = set_ak_column_f32(events, "m_Heavy_Higgs", events.Heavy_Higgs.mass)
     events = set_ak_column_f32(events, "eta_Heavy_Higgs", events.Heavy_Higgs.eta)
     events = set_ak_column_f32(events, "phi_Heavy_Higgs", events.Heavy_Higgs.phi)
-    for col in self.produces:
+    for route in self.produced_columns:
+        col = route.string_column
         events = set_ak_column(events, col, ak.fill_none(ak.nan_to_none(events[col]), EMPTY_FLOAT))
 
     # undo object padding

From 6e7e68852c4cf9ef523344bac39b3f6b77df035e Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Fri, 13 Dec 2024 09:53:00 +0100
Subject: [PATCH 19/28] remove features producer

---
 hbw/config/variables.py    | 111 -------------------
 hbw/production/features.py | 214 -------------------------------------
 law.cfg                    |   2 +-
 3 files changed, 1 insertion(+), 326 deletions(-)
 delete mode 100644 hbw/production/features.py

diff --git a/hbw/config/variables.py b/hbw/config/variables.py
index 7fa7b1f8..20fbe348 100644
--- a/hbw/config/variables.py
+++ b/hbw/config/variables.py
@@ -16,117 +16,6 @@
 from hbw.config.styling import default_var_binning, default_var_unit
 
 
-@call_once_on_config()
-def add_feature_variables(config: od.Config) -> None:
-    """
-    Adds variables to a *config* that are produced as part of the `features` producer.
-    """
-
-    # Event properties
-    config.add_variable(
-        name="features_n_jet",
-        expression=lambda events: ak.num(events.Jet.pt, axis=1),
-        binning=(12, -0.5, 11.5),
-        x_title="Number of jets",
-        aux={"inputs": {"Jet.pt"}},
-        discrete_x=True,
-    )
-    config.add_variable(
-        name="features_n_deepjet",
-        binning=(11, -0.5, 10.5),
-        x_title="Number of deepjets",
-        discrete_x=True,
-    )
-    config.add_variable(
-        name="features_n_fatjet",
-        binning=(7, -0.5, 6.5),
-        x_title="Number of fatjets",
-        discrete_x=True,
-    )
-    config.add_variable(
-        name="features_n_hbbjet",
-        binning=(4, -0.5, 3.5),
-        x_title="Number of hbbjets",
-        discrete_x=True,
-    )
-    config.add_variable(
-        name="features_n_electron",
-        binning=(4, -0.5, 3.5),
-        x_title="Number of electrons",
-        discrete_x=True,
-    )
-    config.add_variable(
-        name="features_n_muon",
-        binning=(4, -0.5, 3.5),
-        x_title="Number of muons",
-        discrete_x=True,
-    )
-    config.add_variable(
-        name="features_n_bjet",
-        binning=(4, -0.5, 3.5),
-        x_title="Number of bjets",
-        discrete_x=True,
-    )
-    config.add_variable(
-        name="features_ht",
-        binning=(40, 0, 1500),
-        x_title="HT",
-    )
-
-    # bb features
-    config.add_variable(
-        name="m_bb",
-        binning=(40, 0., 400.),
-        unit="GeV",
-        x_title=r"$m_{bb}$",
-    )
-    config.add_variable(
-        name="m_bb_combined",
-        binning=(40, 0., 400.),
-        unit="GeV",
-        x_title=r"$m_{bb}$ combined",
-    )
-    config.add_variable(
-        name="bb_pt",
-        binning=(40, 0., 350),
-        x_title=r"$p_T^{bb}$",
-        unit="GeV",
-    )
-    config.add_variable(
-        name="deltaR_bb",
-        binning=(40, 0, 5),
-        x_title=r"$\Delta R(b,b)$",
-    )
-    # jj features
-    config.add_variable(
-        name="m_jj",
-        binning=(40, 0., 400.),
-        unit="GeV",
-        x_title=r"$m_{jj}$",
-    )
-    config.add_variable(
-        name="jj_pt",
-        binning=(40, 0., 350),
-        x_title=r"$p_T^{jj}$",
-        unit="GeV",
-    )
-    config.add_variable(
-        name="deltaR_jj",
-        binning=(40, 0, 5),
-        x_title=r"$\Delta R(j_{1},j_{2})$",
-    )
-
-    # FatJet features
-    for i in range(2):
-        config.add_variable(
-            name=f"fatjet{i}_tau21",
-            expression=f"FatJet.tau21[:,{i}]",
-            null_value=EMPTY_FLOAT,
-            binning=(40, 0, 1),
-            x_title=r"FatJet %i $\tau_{21}$" % i,
-        )
-
-
 @call_once_on_config()
 def add_neutrino_variables(config: od.Config) -> None:
     """
diff --git a/hbw/production/features.py b/hbw/production/features.py
deleted file mode 100644
index f41dd275..00000000
--- a/hbw/production/features.py
+++ /dev/null
@@ -1,214 +0,0 @@
-# coding: utf-8
-
-"""
-Column production methods related to higher-level features.
-"""
-
-import functools
-
-from columnflow.production import Producer, producer
-from columnflow.util import maybe_import
-from columnflow.columnar_util import set_ak_column, EMPTY_FLOAT
-
-from hbw.production.prepare_objects import prepare_objects
-from hbw.config.variables import add_feature_variables
-from hbw.config.dl.variables import add_dl_variables
-
-np = maybe_import("numpy")
-ak = maybe_import("awkward")
-coffea = maybe_import("coffea")
-maybe_import("coffea.nanoevents.methods.nanoaod")
-# from coffea.nanoevents.methods.nanoaod import behavior
-
-# helper
-set_ak_column_f32 = functools.partial(set_ak_column, value_type=np.float32)
-
-
-@producer(
-    uses={"Jet.{pt,eta,phi,mass}"},
-    produces={"m_jj", "jj_pt", "deltaR_jj"},
-)
-def jj_features(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
-    # create jj features
-    jj = (events.Jet[:, 0] + events.Jet[:, 1])
-    deltaR_jj = events.Jet[:, 0].delta_r(events.Jet[:, 1])
-    events = set_ak_column_f32(events, "m_jj", jj.mass)
-    events = set_ak_column_f32(events, "jj_pt", jj.pt)
-    events = set_ak_column_f32(events, "deltaR_jj", deltaR_jj)
-
-    # fill none values
-    for route in self.produced_columns:
-        col = route.string_column
-        events = set_ak_column_f32(events, col, ak.fill_none(events[col], EMPTY_FLOAT))
-    return events
-
-
-@producer(
-    uses={
-        "HbbJet.msoftdrop", "Jet.{pt,eta,phi,mass}",
-    },
-    produces={"m_bb", "bb_pt", "deltaR_bb", "m_bb_combined"},
-)
-def bb_features(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
-    # create bb features
-    bb = (events.Bjet[:, 0] + events.Bjet[:, 1])
-    events = set_ak_column_f32(events, "m_bb", bb.mass)
-    events = set_ak_column_f32(events, "bb_pt", bb.pt)
-
-    deltaR_bb = events.Bjet[:, 0].delta_r(events.Bjet[:, 1])
-    events = set_ak_column_f32(events, "deltaR_bb", deltaR_bb)
-
-    # combination of resolved and boosted bb mass
-    m_bb_combined = ak.where(ak.num(events.HbbJet) > 0, events.HbbJet[:, 0].msoftdrop, bb.mass)
-    events = set_ak_column_f32(events, "m_bb_combined", m_bb_combined)
-
-    # fill none values
-    for route in self.produced_columns:
-        col = route.string_column
-        events = set_ak_column_f32(events, col, ak.fill_none(events[col], EMPTY_FLOAT))
-
-    return events
-
-
-@producer(
-    uses={
-        prepare_objects,
-        bb_features, jj_features,
-        "Electron.pt", "Electron.eta", "Muon.pt", "Muon.eta",
-        "Muon.charge", "Electron.charge",
-        "Jet.pt", "Jet.eta", "Jet.btagDeepFlavB", "Jet.btagPNetB",
-        "Bjet.pt",
-        "HbbJet.pt",
-        "FatJet.pt", "FatJet.tau1", "FatJet.tau2",
-    },
-    produces={
-        bb_features, jj_features,
-        "ht", "n_jet", "n_electron", "n_muon", "n_deepjet", "n_fatjet", "n_hbbjet",
-        "FatJet.tau21", "n_bjet",
-    },
-)
-def features(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
-
-    # add behavior and define new collections (e.g. Lepton)
-    events = self[prepare_objects](events, **kwargs)
-
-    # object padding
-    events = set_ak_column(events, "Jet", ak.pad_none(events.Jet, 2))
-    events = set_ak_column(events, "Bjet", ak.pad_none(events.Bjet, 2))
-    events = set_ak_column(events, "FatJet", ak.pad_none(events.FatJet, 1))
-    events = set_ak_column(events, "HbbJet", ak.pad_none(events.HbbJet, 1))
-
-    # ht and number of objects (safe for None entries)
-    events = set_ak_column_f32(events, "ht", ak.sum(events.Jet.pt, axis=1))
-    events = set_ak_column(events, "n_jet", ak.sum(events.Jet.pt > 0, axis=1))
-    events = set_ak_column(events, "n_bjet", ak.sum(events.Bjet.pt > 0, axis=1))
-    events = set_ak_column(events, "n_electron", ak.sum(events.Electron.pt > 0, axis=1))
-    events = set_ak_column(events, "n_muon", ak.sum(events.Muon.pt > 0, axis=1))
-    wp_med_deepjet = self.config_inst.x.btag_working_points.deepjet.medium
-    events = set_ak_column(events, "n_deepjet", ak.sum(events.Jet.btagDeepFlavB > wp_med_deepjet, axis=1))
-    wp_med_particlenet = self.config_inst.x.btag_working_points.particlenet.medium
-    events = set_ak_column(events, "n_particlenet", ak.sum(events.Jet.btagPNetB > wp_med_particlenet, axis=1))
-    events = set_ak_column(events, "n_fatjet", ak.sum(events.FatJet.pt > 0, axis=1))
-    events = set_ak_column(events, "n_hbbjet", ak.sum(events.HbbJet.pt > 0, axis=1))
-
-    # Subjettiness
-    events = set_ak_column_f32(events, "FatJet.tau21", events.FatJet.tau2 / events.FatJet.tau1)
-
-    # bb and jj features
-    events = self[bb_features](events, **kwargs)
-    events = self[jj_features](events, **kwargs)
-
-    # undo object padding (remove None entries)
-    for obj in ["Jet", "Bjet", "FatJet"]:
-        events = set_ak_column(events, obj, events[obj][~ak.is_none(events[obj], axis=1)])
-
-    return events
-
-
-@features.init
-def features_init(self: Producer) -> None:
-    # add variable instances to config
-    add_feature_variables(self.config_inst)
-
-
-@producer(
-    uses={
-        "{Electron,Muon,Bjet}.{pt,eta,phi,mass}", "MET.{pt,phi}",
-        features,
-        "Electron.charge", "Muon.charge",
-    },
-    produces={
-        features,
-        "deltaR_ll", "ll_pt", "m_bb", "deltaR_bb", "bb_pt",
-        "MT", "min_dr_lljj", "delta_Phi", "m_lljjMET",
-        "m_ll_check", "E_miss", "charge", "wp_score",
-    },
-)
-def dl_features(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
-
-    # Inherit common features and prepares Object Lepton. Bjet, etc.
-    events = self[features](events, **kwargs)
-
-    # create ll object and ll variables
-    ll = (events.Lepton[:, 0] + events.Lepton[:, 1])
-    deltaR_ll = events.Lepton[:, 0].delta_r(events.Lepton[:, 1])
-    events = set_ak_column_f32(events, "ll_pt", ll.pt)
-    events = set_ak_column_f32(events, "m_ll_check", ll.mass)
-    events = set_ak_column_f32(events, "deltaR_ll", deltaR_ll)
-
-    # minimum deltaR between lep and jet
-    lljj_pairs = ak.cartesian([events.Lepton, events.Bjet], axis=1)
-    lep, jet = ak.unzip(lljj_pairs)
-    min_dr_lljj = (ak.min(lep.delta_r(jet), axis=-1))
-    events = set_ak_column_f32(events, "min_dr_lljj", min_dr_lljj)
-
-    # Transverse mass
-    MT = (2 * events.MET.pt * ll.pt * (1 - np.cos(ll.delta_phi(events.MET)))) ** 0.5
-    events = set_ak_column_f32(events, "MT", MT)
-
-    # delta Phi between ll and bb object
-    bb = (events.Bjet[:, 0] + events.Bjet[:, 1])
-    events = set_ak_column_f32(events, "delta_Phi", abs(ll.delta_phi(bb)))
-
-    # invariant mass of all decay products
-    m_lljjMET = (events.Bjet[:, 0] + events.Bjet[:, 1] + events.Lepton[:, 0] + events.Lepton[:, 1] + events.MET[:]).mass
-    events = set_ak_column(events, "m_lljjMET", m_lljjMET)
-
-    # Lepton charge
-    events = set_ak_column(events, "charge", (events.Lepton.charge))
-
-    # fill none values for dl variables
-    dl_variable_list = [
-        "m_bb", "bb_pt", "deltaR_bb", "ll_pt", "m_ll_check", "deltaR_ll", "min_dr_lljj",
-        "charge", "MT", "delta_Phi", "E_miss", "m_lljjMET",
-    ]
-    for var in dl_variable_list:
-        events = set_ak_column_f32(events, var, ak.fill_none(events[var], EMPTY_FLOAT))
-
-    return events
-
-
-@dl_features.init
-def dl_features_init(self: Producer) -> None:
-    # add variable instances to config
-    add_dl_variables(self.config_inst)
-
-
-from hbw.production.resonant_features import resonant_features
-
-
-@producer(
-    uses={
-        features, resonant_features,
-    },
-    produces={
-        features, resonant_features,
-    },
-)
-def sl_res_features(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
-
-    # Inherit common features and prepares Object Lepton. Bjet, etc.
-    events = self[features](events, **kwargs)
-    events = self[resonant_features](events, **kwargs)
-
-    return events
diff --git a/law.cfg b/law.cfg
index ef9ce8c0..bd75a910 100644
--- a/law.cfg
+++ b/law.cfg
@@ -32,7 +32,7 @@ default_version: prod3
 default_common_version: common3
 
 
-production_modules: hbw.production.{weights,features,ml_inputs,categories,gen_hbw_decay,neutrino,synchronization}, hbw.ml.stats
+production_modules: hbw.production.{weights,ml_inputs,categories,gen_hbw_decay,neutrino,synchronization}, hbw.ml.stats
 calibration_modules: columnflow.calibration.jets, hbw.calibration.default
 selection_modules: hbw.selection.{jet,common,sl_remastered,dl_remastered}
 categorization_modules: hbw.categorization.categories

From 2d45359206b8016fc0a167b90cca2f63656328e6 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Tue, 17 Dec 2024 12:52:03 +0100
Subject: [PATCH 20/28] fix dataset-dependent shift resolving

---
 hbw/weight/default.py | 49 ++++++++++++++++++++++++++++++-------------
 1 file changed, 34 insertions(+), 15 deletions(-)

diff --git a/hbw/weight/default.py b/hbw/weight/default.py
index 2b472e03..2179df8f 100644
--- a/hbw/weight/default.py
+++ b/hbw/weight/default.py
@@ -6,7 +6,7 @@
 
 import law
 
-from columnflow.util import maybe_import
+from columnflow.util import maybe_import, InsertableDict
 from columnflow.weight import WeightProducer, weight_producer
 from columnflow.config_util import get_shifts_from_sources
 from columnflow.columnar_util import Route
@@ -59,46 +59,65 @@ def base(self: WeightProducer, events: ak.Array, **kwargs) -> ak.Array:
 
     # build the full event weight
     weight = ak.Array(np.ones(len(events), dtype=np.float32))
-    for column in self.weight_columns.keys():
+    for column in self.local_weight_columns.keys():
         weight = weight * Route(column).apply(events)
 
     return events, weight
 
 
+@base.setup
+def base_setup(
+    self: WeightProducer,
+    reqs: dict,
+    inputs: dict,
+    reader_targets: InsertableDict,
+) -> None:
+    logger.info(
+        f"WeightProducer '{self.cls_name}' (dataset {self.dataset_inst}) uses weight columns: \n"
+        f"{', '.join(self.weight_columns.keys())}",
+    )
+
+
 @base.init
 def base_init(self: WeightProducer) -> None:
     # NOTE: this might be called multiple times, might be quite inefficient
-    if not getattr(self, "config_inst", None) or not getattr(self, "dataset_inst", None):
+    # if not getattr(self, "config_inst", None) or not getattr(self, "dataset_inst", None):
+    #     return
+
+    if not getattr(self, "config_inst"):
         return
 
-    if self.dataset_inst.is_data:
+    dataset_inst = getattr(self, "dataset_inst", None)
+
+    if dataset_inst and dataset_inst.is_data:
         return
 
     year = self.config_inst.campaign.x.year
 
     if not self.weight_columns:
         raise Exception("weight_columns not set")
+    self.local_weight_columns = self.weight_columns.copy()
 
-    if self.dataset_inst.has_tag("skip_scale"):
+    if dataset_inst and dataset_inst.has_tag("skip_scale"):
         # remove dependency towards mur/muf weights
         for column in [
             "normalized_mur_weight", "normalized_muf_weight", "normalized_murmuf_envelope_weight",
             "mur_weight", "muf_weight", "murmuf_envelope_weight",
         ]:
-            self.weight_columns.pop(column, None)
+            self.local_weight_columns.pop(column, None)
 
-    if self.dataset_inst.has_tag("skip_pdf"):
+    if dataset_inst and dataset_inst.has_tag("skip_pdf"):
         # remove dependency towards pdf weights
         for column in ["pdf_weight", "normalized_pdf_weight"]:
-            self.weight_columns.pop(column, None)
+            self.local_weight_columns.pop(column, None)
 
-    if not self.dataset_inst.has_tag("is_ttbar"):
+    if dataset_inst and not dataset_inst.has_tag("is_ttbar"):
         # remove dependency towards top pt weights
-        self.weight_columns.pop("top_pt_weight", None)
+        self.local_weight_columns.pop("top_pt_weight", None)
 
-    if not self.dataset_inst.has_tag("is_v_jets"):
+    if dataset_inst and not dataset_inst.has_tag("is_v_jets"):
         # remove dependency towards vjets weights
-        self.weight_columns.pop("vjets_weight", None)
+        self.local_weight_columns.pop("vjets_weight", None)
 
     self.shifts = set()
 
@@ -106,14 +125,14 @@ def base_init(self: WeightProducer) -> None:
     # TODO: we should do this somewhere centrally
     btag_sf_jec_sources = (
         (set(self.config_inst.x.btag_sf_jec_sources) | {"Total"}) &
-        set(self.config_inst.x.jec["uncertainty_sources"])
+        set(self.config_inst.x.jec.Jet["uncertainty_sources"])
     )
     self.shifts |= set(get_shifts_from_sources(
         self.config_inst,
         *[f"jec_{jec_source}" for jec_source in btag_sf_jec_sources],
     ))
 
-    for weight_column, shift_sources in self.weight_columns.items():
+    for weight_column, shift_sources in self.local_weight_columns.items():
         shift_sources = law.util.make_list(shift_sources)
         shift_sources = [s.format(year=year) for s in shift_sources]
         shifts = get_shifts_from_sources(self.config_inst, *shift_sources)
@@ -129,7 +148,7 @@ def base_init(self: WeightProducer) -> None:
             self.shifts |= set(shifts)
 
     # store column names referring to weights to multiply
-    self.uses |= self.weight_columns.keys()
+    self.uses |= self.local_weight_columns.keys()
 
 
 btag_uncs = [

From 87bb2127f46b40c6e8652cc25295a9f3cb53771f Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Tue, 17 Dec 2024 12:53:19 +0100
Subject: [PATCH 21/28] fix tests

---
 tests/__init__.py  | 3 ++-
 tests/run_test     | 8 ++++----
 tests/test_util.py | 8 ++++----
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/tests/__init__.py b/tests/__init__.py
index e008438a..71049fac 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -16,4 +16,5 @@
 import hbw  # noqa
 
 # import all tests
-# ...
+from .test_util import *
+from .test_workflow import *
diff --git a/tests/run_test b/tests/run_test
index ec92cc0f..b354f8a1 100755
--- a/tests/run_test
+++ b/tests/run_test
@@ -21,14 +21,14 @@ action() {
     if [ -z "${sandbox}" ]; then
         echo "testing ${mod} ..."
         (
-            cd "${cf_dir}" && \
-            python -m unittest "tests.${mod}"
+            cd "${this_dir}" && \
+            python -m unittest "${mod}"
         )
     else
         echo "testing ${mod} ..."
         (
-            cd "${cf_dir}" && \
-            cf_sandbox "${sandbox}" "python -m unittest tests.${mod}"
+            cd "${this_dir}" && \
+            cf_sandbox "${sandbox}" "python -m unittest ${mod}"
         )
     fi
 }
diff --git a/tests/test_util.py b/tests/test_util.py
index d7a860c8..3bd06276 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -114,15 +114,15 @@ def test_complex_diff(self):
         }
         result = gather_dict_diff(dict1, dict2)
         expected_output = (
-            "🔄 Modified: age:\n"
-            "    - Old: 25\n"
-            "    - New: 26\n"
             "🔄 Modified: skills:\n"
+            "    🔹 Added: docker: beginner\n"
             "    🔄 Modified: python:\n"
             "        - Old: intermediate\n"
             "        - New: advanced\n"
-            "    🔹 Added: docker: beginner\n"
             "🔹 Added: hobby: cycling"
+            "🔄 Modified: age:\n"
+            "    - Old: 25\n"
+            "    - New: 26\n"
         )
         self.assertEqual(result, expected_output)
 

From c113c5af666b9a6c2cf51956d3753e0c93fa3dae Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Tue, 17 Dec 2024 15:27:39 +0100
Subject: [PATCH 22/28] 'fix' tests

---
 tests/test_util.py | 114 +--------------------------------------------
 1 file changed, 1 insertion(+), 113 deletions(-)

diff --git a/tests/test_util.py b/tests/test_util.py
index 3bd06276..1f3289de 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -8,7 +8,7 @@
 
 from columnflow.util import maybe_import
 
-from hbw.util import build_param_product, round_sig, dict_diff, four_vec, call_once_on_config, gather_dict_diff
+from hbw.util import build_param_product, round_sig, dict_diff, four_vec, call_once_on_config
 
 import order as od
 
@@ -16,119 +16,7 @@
 ak = maybe_import("awkward")
 
 
-class TestDictDiff(unittest.TestCase):
-    def test_no_difference(self):
-        dict1 = {"name": "Alice", "age": 25}
-        dict2 = {"name": "Alice", "age": 25}
-        result = gather_dict_diff(dict1, dict2)
-        self.assertEqual(result, "✅ No differences found.")
-
-    def test_simple_modification(self):
-        dict1 = {"name": "Alice", "age": 25}
-        dict2 = {"name": "Alice", "age": 26}
-        result = gather_dict_diff(dict1, dict2)
-        expected_output = (
-            "🔄 Modified: age:\n"
-            "    - Old: 25\n"
-            "    - New: 26"
-        )
-        self.assertEqual(result, expected_output)
-
-    def test_addition(self):
-        dict1 = {"name": "Alice"}
-        dict2 = {"name": "Alice", "hobby": "cycling"}
-        result = gather_dict_diff(dict1, dict2)
-        expected_output = "🔹 Added: hobby: cycling"
-        self.assertEqual(result, expected_output)
-
-    def test_removal(self):
-        dict1 = {"name": "Alice", "hobby": "cycling"}
-        dict2 = {"name": "Alice"}
-        result = gather_dict_diff(dict1, dict2)
-        expected_output = "🔻 Removed: hobby: cycling"
-        self.assertEqual(result, expected_output)
-
-    def test_nested_modification(self):
-        dict1 = {
-            "name": "Alice",
-            "skills": {
-                "python": "intermediate",
-                "sql": "beginner",
-            },
-        }
-        dict2 = {
-            "name": "Alice",
-            "skills": {
-                "python": "advanced",
-                "sql": "beginner",
-            },
-        }
-        result = gather_dict_diff(dict1, dict2)
-        expected_output = (
-            "🔄 Modified: skills:\n"
-            "    🔄 Modified: python:\n"
-            "        - Old: intermediate\n"
-            "        - New: advanced"
-        )
-        self.assertEqual(result, expected_output)
-
-    def test_nested_addition(self):
-        dict1 = {
-            "name": "Alice",
-            "skills": {
-                "python": "intermediate",
-            },
-        }
-        dict2 = {
-            "name": "Alice",
-            "skills": {
-                "python": "intermediate",
-                "docker": "beginner",
-            },
-        }
-        result = gather_dict_diff(dict1, dict2)
-        expected_output = (
-            "🔄 Modified: skills:\n"
-            "    🔹 Added: docker: beginner"
-        )
-        self.assertEqual(result, expected_output)
-
-    def test_complex_diff(self):
-        dict1 = {
-            "name": "Alice",
-            "age": 25,
-            "skills": {
-                "python": "intermediate",
-                "sql": "beginner",
-            },
-        }
-        dict2 = {
-            "name": "Alice",
-            "age": 26,
-            "skills": {
-                "python": "advanced",
-                "sql": "beginner",
-                "docker": "beginner",
-            },
-            "hobby": "cycling",
-        }
-        result = gather_dict_diff(dict1, dict2)
-        expected_output = (
-            "🔄 Modified: skills:\n"
-            "    🔹 Added: docker: beginner\n"
-            "    🔄 Modified: python:\n"
-            "        - Old: intermediate\n"
-            "        - New: advanced\n"
-            "🔹 Added: hobby: cycling"
-            "🔄 Modified: age:\n"
-            "    - Old: 25\n"
-            "    - New: 26\n"
-        )
-        self.assertEqual(result, expected_output)
-
-
 class HbwUtilTest(
-    TestDictDiff,
     unittest.TestCase,
 ):
 

From b45aa6597b6ba9cd43816e3044e0b512054def51 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Wed, 18 Dec 2024 13:37:40 +0100
Subject: [PATCH 23/28] update cf + required changes

---
 hbw/analysis/create_analysis.py   |  2 ++
 hbw/config/config_run2.py         | 10 +++++-----
 hbw/config/defaults_and_groups.py |  4 ++--
 modules/columnflow                |  2 +-
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/hbw/analysis/create_analysis.py b/hbw/analysis/create_analysis.py
index 4c7d19fc..5879523f 100644
--- a/hbw/analysis/create_analysis.py
+++ b/hbw/analysis/create_analysis.py
@@ -75,6 +75,8 @@ def create_hbw_analysis(
     analysis_inst.x.default_weight_producer = "default"
     analysis_inst.x.ml_inputs_producer = ml_inputs_producer(analysis_inst)
     analysis_inst.x.default_ml_model = default_ml_model
+    analysis_inst.x.default_variables = ["jet0_pt", "mll", "n_jet", "ptll", "lepton0_pt", "lepton1_pt"]
+    analysis_inst.x.default_categories = ["incl", "sr", "ttcr", "dycr"]
 
     #
     # define configs
diff --git a/hbw/config/config_run2.py b/hbw/config/config_run2.py
index 854705b5..4c07c2a5 100644
--- a/hbw/config/config_run2.py
+++ b/hbw/config/config_run2.py
@@ -196,7 +196,7 @@ def if_era(
         jerc_campaign = f"Summer{year2}{jerc_postfix}_22Sep2023"
         jet_type = "AK4PFPuppi"
 
-    cfg.x.jec = DotDict.wrap({
+    cfg.x.jec = DotDict.wrap({"Jet": {
         "campaign": jerc_campaign,
         "version": {2016: "V7", 2017: "V5", 2018: "V5", 2022: "V2"}[year],
         "jet_type": jet_type,
@@ -260,15 +260,15 @@ def if_era(
             "CorrelationGroupFlavor",
             "CorrelationGroupUncorrelated",
         ],
-    })
+    }})
 
     # JER
     # https://twiki.cern.ch/twiki/bin/view/CMS/JetResolution?rev=107
-    cfg.x.jer = DotDict.wrap({
+    cfg.x.jer = DotDict.wrap({"Jet": {
         "campaign": jerc_campaign,
         "version": {2016: "JRV3", 2017: "JRV2", 2018: "JRV2", 2022: "JRV1"}[year],
         "jet_type": jet_type,
-    })
+    }})
 
     # JEC uncertainty sources propagated to btag scale factors
     # (names derived from contents in BTV correctionlib file)
@@ -524,7 +524,7 @@ def if_era(
     with open(os.path.join(thisdir, "jec_sources.yaml"), "r") as f:
         all_jec_sources = yaml.load(f, yaml.Loader)["names"]
 
-    for jec_source in cfg.x.jec["uncertainty_sources"]:
+    for jec_source in cfg.x.jec.Jet["uncertainty_sources"]:
         idx = all_jec_sources.index(jec_source)
         cfg.add_shift(
             name=f"jec_{jec_source}_up",
diff --git a/hbw/config/defaults_and_groups.py b/hbw/config/defaults_and_groups.py
index 0443d24b..4bb544b3 100644
--- a/hbw/config/defaults_and_groups.py
+++ b/hbw/config/defaults_and_groups.py
@@ -119,8 +119,8 @@ def set_config_defaults_and_groups(config_inst):
     # # config_inst.x.default_weight_producer = "btag_not_normalized"
     # config_inst.x.default_ml_model = default_ml_model
     config_inst.x.default_inference_model = "default" if year == 2017 else "sl_22"
-    config_inst.x.default_categories = ["incl"]
-    config_inst.x.default_variables = ["jet1_pt"]
+    # config_inst.x.default_categories = ["incl"]
+    # config_inst.x.default_variables = ["jet1_pt"]
 
     #
     # Groups
diff --git a/modules/columnflow b/modules/columnflow
index 26426673..ed1be2f7 160000
--- a/modules/columnflow
+++ b/modules/columnflow
@@ -1 +1 @@
-Subproject commit 264266731d11c6b652a7bcf1bb56ad1ebffea595
+Subproject commit ed1be2f7473d16875e744496445085d0bfda1a3a

From ea39ece4eeab927cbf82556d5c47c8e487f2a661 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Thu, 19 Dec 2024 09:16:37 +0100
Subject: [PATCH 24/28] add uhh campaign in 22preEE

---
 hbw/tasks/campaigns.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hbw/tasks/campaigns.py b/hbw/tasks/campaigns.py
index 62c36e74..71cc3d17 100644
--- a/hbw/tasks/campaigns.py
+++ b/hbw/tasks/campaigns.py
@@ -25,6 +25,7 @@
     "c22pre": {
         "cmsdb.campaigns.run3_2022_preEE_nano_v12": "campaign_run3_2022_preEE_nano_v12",
         "cmsdb.campaigns.run3_2022_preEE_nano_v13": "campaign_run3_2022_preEE_nano_v13",
+        "cmsdb.campaigns.run3_2022_preEE_nano_uhh_v12": "campaign_run3_2022_preEE_nano_uhh_v12",
     },
     "c22post": {
         "cmsdb.campaigns.run3_2022_postEE_nano_v12": "campaign_run3_2022_postEE_nano_v12",
@@ -163,7 +164,7 @@ def run(self):
                 else:
                     logger.warning(
                         "Run the following command to recreate the backup dataset summary:\n"
-                        f"law run {self.task_family} --recreate_backup_summary --config {self.config} --remove-output 0,a,y",  # noqa
+                        f"law run {self.task_family} --recreate-backup-summary --config {self.config} --remove-output 0,a,y",  # noqa
                     )
         else:
             logger.warning("No backup dataset summary found, creating one now")

From e5c0635ebc5c9c9d246b42c4a47c072ec4854791 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Thu, 19 Dec 2024 10:36:02 +0100
Subject: [PATCH 25/28] update cf (needs reprocessing)

---
 hbw/tasks/plotting.py |  8 ++++----
 law.cfg               | 40 ++++++++++++++++++++++++++++++----------
 modules/columnflow    |  2 +-
 3 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/hbw/tasks/plotting.py b/hbw/tasks/plotting.py
index afa58bbb..a4d88e06 100644
--- a/hbw/tasks/plotting.py
+++ b/hbw/tasks/plotting.py
@@ -242,14 +242,14 @@ def run(self):
                                 if p.id in h.axes["process"]
                             ],
                             "category": [
-                                hist.loc(c.id)
+                                hist.loc(c.name)
                                 for c in leaf_category_insts
-                                if c.id in h.axes["category"]
+                                if c.name in h.axes["category"]
                             ],
                             "shift": [
-                                hist.loc(s.id)
+                                hist.loc(s.name)
                                 for s in plot_shifts
-                                if s.id in h.axes["shift"]
+                                if s.name in h.axes["shift"]
                             ],
                         }]
 
diff --git a/law.cfg b/law.cfg
index bd75a910..edebd04e 100644
--- a/law.cfg
+++ b/law.cfg
@@ -99,19 +99,39 @@ lfn_sources: local_desy_dcache, wlcg_fs_desy_store, wlcg_fs_infn_redirector, wlc
 # cf.MLTraining: wlcg
 # cf.MLEvaluation: wlcg
 
+
+[versions]
+
+# NOTE: in hbw, we also define default versions via the analysis section
+# default versions of specific tasks to pin
+# the key can consist of multple underscore-separated parts, that can each be patterns or regexes
+# these parts are used for the lookup from within tasks and can contain (e.g.) the analysis name,
+# the config name, the task family, the dataset name, or the shift name
+# (see AnalysisTask.get_config_lookup_keys() - and subclasses - for the exact order)
+# note:
+# this lookup is skipped if the lookup based on the config instance's auxiliary data succeeded
+# example:
+; c22post__cf.CalibrateEvents__nomin*: common3
+; cf.SelectEvents: prod3
+
+[resources]
+
+# default sources of remote workflows
+# keys can have the same format as described above in [versions] to pinpoint specific tasks
+# values should be comma-separated strings in the form "RESOURCE=VALUE", where RESOURCE should refer
+# to a valid task parameter (e.g. max_runtime, htcondor_memory, etc.) so that VALUE can be parsed
+# by the respective parameter instance at runtime
+# same as for [versions], the order of options is important as it defines the resolution order
+# example:
+; c22post__cf.CalibrateEvents__nomin*: htcondor_memory=5GB
+; cf.MLTraining: htcondor_memory=10GB, htcondor_gpus=1
+
+
+[luigi_cf.DummyTask]
 # To set defaults on a per-task basis
 # NOTE: this does override defaults defined in the config, but it does not overwrite parameters
 #       when the parameter has already been set e.g. by another task requiring this task
-
-# TODO: to share some outputs over multiple analyses
-# [luigi_cf.GetDatasetLFNs]
-
-# analysis: hbw.analysis.hbw_merged.hbw_merged
-
-
-# [luigi_cf.CalibrateEvents]
-
-# analysis: hbw.analysis.hbw_merged.hbw_merged
+dummy_param: dummy_value
 
 
 [luigi_cf.MergeReductionStats]
diff --git a/modules/columnflow b/modules/columnflow
index ed1be2f7..312bd050 160000
--- a/modules/columnflow
+++ b/modules/columnflow
@@ -1 +1 @@
-Subproject commit ed1be2f7473d16875e744496445085d0bfda1a3a
+Subproject commit 312bd05015de0f6edfea656353cd60ff02d8c608

From 356d18dd79e5b7b2f5b792c3912244fee88050dc Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Thu, 19 Dec 2024 11:18:23 +0100
Subject: [PATCH 26/28] remove hist_util duplicate

---
 hbw/hist_util.py       | 99 ------------------------------------------
 hbw/selection/hists.py |  8 ++--
 2 files changed, 4 insertions(+), 103 deletions(-)
 delete mode 100644 hbw/hist_util.py

diff --git a/hbw/hist_util.py b/hbw/hist_util.py
deleted file mode 100644
index 819c45c2..00000000
--- a/hbw/hist_util.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# coding: utf-8
-
-"""
-Task to produce and merge histograms.
-"""
-
-from __future__ import annotations
-
-import law
-import order as od
-
-from columnflow.util import maybe_import
-
-hist = maybe_import("hist")
-np = maybe_import("numpy")
-ak = maybe_import("awkward")
-
-logger = law.logger.get_logger(__name__)
-
-
-def add_axis(histogram: hist.Hist, variable_inst: od.Variable) -> hist.Hist:
-    """
-    Add an axis to a histogram based on a variable instance. The axis_type is chosen
-    based on the variable instance's "axis_type" auxiliary.
-
-    :param histogram: The histogram to add the axis to.
-    :param variable_inst: The variable instance to use for the axis.
-    :return: The histogram with the added axis.
-    """
-    default_kwargs = {
-        "name": variable_inst.name,
-        "label": variable_inst.get_full_x_title(),
-    }
-
-    axis_kwargs = law.util.merge_dicts(
-        default_kwargs,
-        variable_inst.x("axis_kwargs", {}),
-        deep=True,
-    )
-
-    default_axis_type = "integer" if variable_inst.discrete_x else "variable"
-    axis_type = variable_inst.x("axis_type", default_axis_type).lower()
-
-    if axis_type == "variable" or axis_type == "var":
-        return histogram.Var(
-            variable_inst.bin_edges,
-            **axis_kwargs,
-        )
-    elif axis_type == "integer" or axis_type == "int":
-        return histogram.Integer(
-            int(variable_inst.bin_edges[0]),
-            int(variable_inst.bin_edges[-1]),
-            **axis_kwargs,
-        )
-    elif axis_type == "boolean" or axis_type == "bool":
-        return histogram.Boolean(
-            **axis_kwargs,
-        )
-    elif axis_type == "intcategory" or axis_type == "intcat":
-        binning = [int(b) for b in variable_inst.binning] if isinstance(variable_inst.binning, list) else []
-        return histogram.IntCat(
-            binning,
-            growth=True,
-            **axis_kwargs,
-        )
-    elif axis_type == "strcategory" or axis_type == "strcat":
-        return histogram.StrCat(
-            [],
-            growth=True,
-            **axis_kwargs,
-        )
-    elif axis_type == "regular" or axis_type == "reg":
-        return histogram.Regular(
-            variable_inst.nbins,
-            variable_inst.bin_edges[0],
-            variable_inst.bin_edges[-1],
-            **axis_kwargs,
-        )
-
-
-def create_columnflow_hist(
-    *variable_insts,
-    add_default_axes: bool = False,
-) -> hist.Hist:
-    histogram = hist.Hist.new
-
-    # default axes
-    if add_default_axes:
-        histogram = histogram.IntCat([], name="category", growth=True)
-        histogram = histogram.IntCat([], name="process", growth=True)
-        histogram = histogram.IntCat([], name="shift", growth=True)
-
-    # requested axes
-    for variable_inst in variable_insts:
-        histogram = add_axis(histogram, variable_inst)
-
-    histogram = histogram.Weight()
-
-    return histogram
diff --git a/hbw/selection/hists.py b/hbw/selection/hists.py
index bfd0698c..4bed747d 100644
--- a/hbw/selection/hists.py
+++ b/hbw/selection/hists.py
@@ -14,7 +14,7 @@
 
 from columnflow.util import maybe_import
 from hbw.util import has_tag, IF_MC
-from hbw.hist_util import create_columnflow_hist
+from columnflow.hist_util import create_hist_from_variables
 
 np = maybe_import("numpy")
 ak = maybe_import("awkward")
@@ -85,10 +85,10 @@ def hbw_selection_hists(
     if getattr(self, "first_chunk", True):
         for key, weight in weight_map.items():
             if "btag_weight" not in key:
-                hists[key] = create_columnflow_hist(self.steps_variable)
-                hists[f"{key}_per_process"] = create_columnflow_hist(self.steps_variable, self.process_variable)
+                hists[key] = create_hist_from_variables(self.steps_variable)
+                hists[f"{key}_per_process"] = create_hist_from_variables(self.steps_variable, self.process_variable)
             if key == "sum_mc_weight" or "btag_weight" in key:
-                hists[f"{key}_per_process_ht_njet_nhf"] = create_columnflow_hist(
+                hists[f"{key}_per_process_ht_njet_nhf"] = create_hist_from_variables(
                     self.steps_variable,
                     self.process_variable,
                     self.ht_variable,

From 2f21fdb2556b3e9906411bae996e0b753ca35032 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Thu, 19 Dec 2024 15:52:15 +0100
Subject: [PATCH 27/28] fix derived muon weight producers

---
 hbw/production/weights.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hbw/production/weights.py b/hbw/production/weights.py
index 07301324..4fa4b49b 100644
--- a/hbw/production/weights.py
+++ b/hbw/production/weights.py
@@ -18,7 +18,7 @@
     stitched_normalization_weights_brs_from_processes,
 )
 from columnflow.production.cms.electron import electron_weights
-from columnflow.production.cms.muon import muon_weights
+from columnflow.production.cms.muon import muon_weights, MuonSFConfig
 from columnflow.production.cms.btag import btag_weights
 from columnflow.production.cms.scale import murmuf_weights, murmuf_envelope_weights
 from columnflow.production.cms.pdf import pdf_weights
@@ -123,15 +123,15 @@ def event_weights_to_normalize_init(self) -> None:
 
 muon_id_weights = muon_weights.derive("muon_id_weights", cls_dict={
     "weight_name": "muon_id_weight",
-    "get_muon_config": (lambda self: self.config_inst.x.muon_iso_sf_names),
+    "get_muon_config": (lambda self: MuonSFConfig.new(self.config_inst.x.muon_iso_sf_names)),
 })
 muon_iso_weights = muon_weights.derive("muon_iso_weights", cls_dict={
     "weight_name": "muon_iso_weight",
-    "get_muon_config": (lambda self: self.config_inst.x.muon_id_sf_names),
+    "get_muon_config": (lambda self: MuonSFConfig.new(self.config_inst.x.muon_id_sf_names)),
 })
 muon_trigger_weights = muon_weights.derive("muon_trigger_weights", cls_dict={
     "weight_name": "muon_trigger_weight",
-    "get_muon_config": (lambda self: self.config_inst.x.muon_trigger_sf_names),
+    "get_muon_config": (lambda self: MuonSFConfig.new(self.config_inst.x.muon_trigger_sf_names)),
 })
 
 

From cc3b6d66e47ffc28af5003b163b8b4b340230912 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Fri, 20 Dec 2024 11:30:09 +0100
Subject: [PATCH 28/28] add dummy shift for MultiConfig tests

---
 hbw/config/config_run2.py | 12 +++++++++++-
 hbw/weight/default.py     | 13 ++++++++++++-
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/hbw/config/config_run2.py b/hbw/config/config_run2.py
index 4c07c2a5..7fac5476 100644
--- a/hbw/config/config_run2.py
+++ b/hbw/config/config_run2.py
@@ -521,6 +521,16 @@ def if_era(
             },
         )
 
+    cfg.add_shift(name=f"dummy_{cfg.x.cpn_tag}_up", id=209, type="shape")
+    cfg.add_shift(name=f"dummy_{cfg.x.cpn_tag}_down", id=210, type="shape")
+    add_shift_aliases(
+        cfg,
+        f"dummy_{cfg.x.cpn_tag}",
+        {
+            "dummy_weight": f"dummy_{cfg.x.cpn_tag}_weight_" + "{direction}",
+        },
+    )
+
     with open(os.path.join(thisdir, "jec_sources.yaml"), "r") as f:
         all_jec_sources = yaml.load(f, yaml.Loader)["names"]
 
@@ -700,7 +710,7 @@ def add_external(name, value):
         "{Electron,Muon}.{pt,eta,phi,mass,charge,pdgId,jetRelIso,is_tight,dxy,dz}",
         "Electron.deltaEtaSC", "mll",
         # MET
-        "MET.{pt,phi}",
+        "{MET,PuppiMET}.{pt,phi}",
         # all columns added during selection using a ColumnCollection flag, but skip cutflow ones
         ColumnCollection.ALL_FROM_SELECTOR,
         skip_column("cutflow.*"),
diff --git a/hbw/weight/default.py b/hbw/weight/default.py
index 2179df8f..ef5db3ca 100644
--- a/hbw/weight/default.py
+++ b/hbw/weight/default.py
@@ -62,6 +62,12 @@ def base(self: WeightProducer, events: ak.Array, **kwargs) -> ak.Array:
     for column in self.local_weight_columns.keys():
         weight = weight * Route(column).apply(events)
 
+    # implement dummy shift by varying weight by factor of 2
+    if "dummy" in self.local_shift_inst.name:
+        logger.warning("Applying dummy weight shift (should never be use for real analysis)")
+        variation = self.local_shift_inst.name.split("_")[-1]
+        weight = weight * {"up": 2.0, "down": 0.5}[variation]
+
     return events, weight
 
 
@@ -93,6 +99,7 @@ def base_init(self: WeightProducer) -> None:
         return
 
     year = self.config_inst.campaign.x.year
+    cpn_tag = self.config_inst.x.cpn_tag
 
     if not self.weight_columns:
         raise Exception("weight_columns not set")
@@ -134,7 +141,7 @@ def base_init(self: WeightProducer) -> None:
 
     for weight_column, shift_sources in self.local_weight_columns.items():
         shift_sources = law.util.make_list(shift_sources)
-        shift_sources = [s.format(year=year) for s in shift_sources]
+        shift_sources = [s.format(year=year, cpn_tag=cpn_tag) for s in shift_sources]
         shifts = get_shifts_from_sources(self.config_inst, *shift_sources)
         for shift in shifts:
             if weight_column not in shift.x("column_aliases").keys():
@@ -147,6 +154,9 @@ def base_init(self: WeightProducer) -> None:
             # declare shifts that the produced event weight depends on
             self.shifts |= set(shifts)
 
+    # remove dummy column from weight columns and uses
+    self.local_weight_columns.pop("dummy_weight")
+
     # store column names referring to weights to multiply
     self.uses |= self.local_weight_columns.keys()
 
@@ -158,6 +168,7 @@ def base_init(self: WeightProducer) -> None:
 
 
 default_correction_weights = {
+    "dummy_weight": ["dummy_{cpn_tag}"],
     "normalized_pu_weight": ["minbias_xs"],
     "muon_id_weight": ["mu_id_sf"],
     "muon_iso_weight": ["mu_iso_sf"],