From b6aabeab79529eb551e68b5d61c220ef8b13e5c0 Mon Sep 17 00:00:00 2001
From: "Marcel R." <github.riga@icloud.com>
Date: Wed, 8 Jan 2025 11:31:09 +0100
Subject: [PATCH 01/20] Update subjet handling.

---
 hbt/config/configs_hbt.py |  5 ++---
 hbt/selection/jet.py      | 28 +++++++++++-----------------
 modules/cmsdb             |  2 +-
 3 files changed, 14 insertions(+), 21 deletions(-)

diff --git a/hbt/config/configs_hbt.py b/hbt/config/configs_hbt.py
index 7806ef79..b909b544 100644
--- a/hbt/config/configs_hbt.py
+++ b/hbt/config/configs_hbt.py
@@ -1206,9 +1206,8 @@ def add_external(name, value):
     ################################################################################################
 
     # channels
-    # TODO: switch etau and mutau, also check if change needed in res_dnn's
-    cfg.add_channel(name="mutau", id=1)
-    cfg.add_channel(name="etau", id=2)
+    cfg.add_channel(name="etau", id=1)
+    cfg.add_channel(name="mutau", id=2)
     cfg.add_channel(name="tautau", id=3)
     cfg.add_channel(name="mumu", id=4)
     cfg.add_channel(name="emu", id=5)
diff --git a/hbt/selection/jet.py b/hbt/selection/jet.py
index afefe759..4d17ac65 100644
--- a/hbt/selection/jet.py
+++ b/hbt/selection/jet.py
@@ -183,13 +183,15 @@ def jet_selection(
     # trigger leg matching for tautau events that were only triggered by a tau-tau-jet cross trigger
     # with two different strategies (still under scrutiny):
     # a) select the two highest scoring hhbjets of which one must match the jet leg
-    #    (folds matching into the hhbjet identification itself)
+    #    (folds matching decisions into the hhbjet identification itself)
     # b) _after_ selecting the two hhbjets, at least one of them must match the jet leg
     #    (does the hhbjet identification first, and then filters using the matching)
+
+    # create the mask select those events
     false_mask = ak.full_like(events.event, False, dtype=bool)
     ttj_mask = (
         (events.channel_id == 3) &
-        ~ak.any(reduce(or_, [(events.trigger_ids == tid) for tid in self.trigger_ids_ttc], false_mask), axis=1) &
+        ~ak.any(reduce(or_, [(events.trigger_ids == tid) for tid in self.op], false_mask), axis=1) &
         ak.any(reduce(or_, [(events.trigger_ids == tid) for tid in self.trigger_ids_ttjc], false_mask), axis=1)
     )
 
@@ -237,7 +239,7 @@ def jet_selection(
         flat_hhbjet_mask[flat_jet_mask] = ak.flatten(sel_hhbjet_mask)
 
     # validate that either none or two hhbjets were identified
-    assert len(set(ak.sum(hhbjet_mask, axis=1)) - {0, 2}) == 0
+    assert ak.all(((n_hhbjets := ak.sum(hhbjet_mask, axis=1)) == 0) | (n_hhbjets == 2))
 
     #
     # fat jets
@@ -263,14 +265,10 @@ def jet_selection(
         axis=2,
     )
 
-    # discard the event in case the (first) fatjet with matching subjets is found
-    # but they are not b-tagged (TODO: move to deepjet when available for subjets)
-    # TODO: is it correct to do this? for run 3 the pnet wp is compare against btagDeepB?
-    if self.config_inst.campaign.x.run == 3:
-        wp = self.config_inst.x.btag_working_points.particleNet.loose
-    else:
-        wp = self.config_inst.x.btag_working_points.deepcsv.loose
-    subjets_btagged = ak.all(events.SubJet[ak.firsts(subjet_indices)].btagDeepB > wp, axis=1)
+    # check subjet btags (only deepcsv available)
+    # note: skipped for now as we do not have a final strategy for run 3 yet
+    # wp = ...
+    # subjets_btagged = ak.all(events.SubJet[ak.firsts(subjet_indices)].btagDeepB > wp, axis=1)
 
     #
     # vbf jets
@@ -343,12 +341,8 @@ def jet_selection(
         ascending=False,
     )
 
-    # final event selection
-    jet_sel = (
-        (ak.sum(default_mask, axis=1) >= 2) &
-        # TODO: do want this?
-        ak.fill_none(subjets_btagged, True)  # was none for events with no matched fatjet
-    )
+    # final event selection (only looking at number of default jets for now)
+    jet_sel = ak.sum(default_mask, axis=1) >= 2
 
     # some final type conversions
     jet_indices = ak.values_astype(ak.fill_none(jet_indices, 0), np.int32)
diff --git a/modules/cmsdb b/modules/cmsdb
index f324901a..54953603 160000
--- a/modules/cmsdb
+++ b/modules/cmsdb
@@ -1 +1 @@
-Subproject commit f324901a22acce34c1f8948dd60704b641716e93
+Subproject commit 549536034fdd0b20efbddc1adf4403a3713161ab

From 2484fc52b45a5107b6f19d48b282486d1bc4a9a7 Mon Sep 17 00:00:00 2001
From: Ana A <ana.rita.alves.andrade@cern.ch>
Date: Wed, 8 Jan 2025 16:45:26 +0100
Subject: [PATCH 02/20] removing method a) from hhbtag matching

---
 hbt/selection/jet.py | 72 ++++++++++++++++++++++++++------------------
 1 file changed, 43 insertions(+), 29 deletions(-)

diff --git a/hbt/selection/jet.py b/hbt/selection/jet.py
index 4d17ac65..ab3da841 100644
--- a/hbt/selection/jet.py
+++ b/hbt/selection/jet.py
@@ -180,18 +180,15 @@ def jet_selection(
     # deselect jets in events with less than two valid scores
     hhbjet_mask = hhbjet_mask & (ak.sum(hhbtag_scores != EMPTY_FLOAT, axis=1) >= 2)
 
-    # trigger leg matching for tautau events that were only triggered by a tau-tau-jet cross trigger
-    # with two different strategies (still under scrutiny):
-    # a) select the two highest scoring hhbjets of which one must match the jet leg
-    #    (folds matching decisions into the hhbjet identification itself)
-    # b) _after_ selecting the two hhbjets, at least one of them must match the jet leg
-    #    (does the hhbjet identification first, and then filters using the matching)
-
-    # create the mask select those events
+    # trigger leg matching for tautau events that were only triggered by a tau-tau-jet cross trigger;
+    # two strategies were studied a) and b) but strategy a) seems to not comply with how trigger matching
+    # should be done and should therefore be ignored.
+
+    # create a mask to select tautau events that were only triggered by a tau-tau-jet cross trigger
     false_mask = ak.full_like(events.event, False, dtype=bool)
     ttj_mask = (
         (events.channel_id == 3) &
-        ~ak.any(reduce(or_, [(events.trigger_ids == tid) for tid in self.op], false_mask), axis=1) &
+        ~ak.any(reduce(or_, [(events.trigger_ids == tid) for tid in self.trigger_ids_ttc], false_mask), axis=1) &
         ak.any(reduce(or_, [(events.trigger_ids == tid) for tid in self.trigger_ids_ttjc], false_mask), axis=1)
     )
 
@@ -209,35 +206,52 @@ def jet_selection(
         # constrain to jets with a score
         matching_mask = matching_mask & (hhbjet_mask[ttj_mask] != EMPTY_FLOAT)
 
+        #
         # a)
-        # sort matching masks by score first
-        sel_score_indices = score_indices[ttj_mask]
-        sorted_matching_mask = matching_mask[sel_score_indices]
-        # get the position of the highest scoring _and_ matched hhbjet
-        # (this hhbet is guaranteed to be selected)
-        sel_li = ak.local_index(sorted_matching_mask)
-        matched_idx = ak.firsts(sel_li[sorted_matching_mask], axis=1)
-        # the other hhbjet is not required to be matched and is either at the 0th or 1st position
-        # (depending on whether the matched one had the highest score)
-        other_idx = ak.where(matched_idx == 0, 1, 0)
-        # use comparisons between selected indices and the local index to convert back into a mask
-        # and check again that both hhbjets have a score
-        sel_hhbjet_mask = (
-            (sel_li == ak.fill_none(sel_score_indices[matched_idx[..., None]][..., 0], -1)) |
-            (sel_li == ak.fill_none(sel_score_indices[other_idx[..., None]][..., 0], -1))
-        ) & (hhbjet_mask[ttj_mask] != EMPTY_FLOAT)
-
+        # two hhb-tagged jets must be selected. The highest scoring jet is always selected.
+        #  - If this jet happens to match the trigger leg, then the second highest scoring jet is also selected.
+        #  - If this is not the case, then the highest scoring jet that matches the trigger leg is selected.
+        # ! Note : Apparently the official recommendation is that trigger matching should only be used
+        #          to select full events and not for individual objects selection. Thus, this strategy results in bias.
+        #
+
+        # # sort matching masks by score first
+        # sel_score_indices = score_indices[ttj_mask]
+        # sorted_matching_mask = matching_mask[sel_score_indices]
+        # # get the position of the highest scoring _and_ matched hhbjet
+        # # (this hhbet is guaranteed to be selected)
+        # sel_li = ak.local_index(sorted_matching_mask)
+        # matched_idx = ak.firsts(sel_li[sorted_matching_mask], axis=1)
+        # # the other hhbjet is not required to be matched and is either at the 0th or 1st position
+        # # (depending on whether the matched one had the highest score)
+        # other_idx = ak.where(matched_idx == 0, 1, 0)
+        # # use comparisons between selected indices and the local index to convert back into a mask
+        # # and check again that both hhbjets have a score
+        # sel_hhbjet_mask = (
+        #     (sel_li == ak.fill_none(sel_score_indices[matched_idx[..., None]][..., 0], -1)) |
+        #     (sel_li == ak.fill_none(sel_score_indices[other_idx[..., None]][..., 0], -1))
+        # ) & (hhbjet_mask[ttj_mask] != EMPTY_FLOAT)
+
+        #
         # b)
+        # two hhb-tagged jets must be selected. The highest and second-highest scoring jets are selected.
+        #  - If any of those matches the trigger leg, the event is accepted.
+        #  - If none of them matches the trigger leg, the event is rejected.
+        #
+
         # check if any of the two jets is matched and fold back into hhbjet_mask (brodcasted)
-        # sel_hhbjet_mask = ak.Array(hhbjet_mask[ttj_mask])
-        # one_matched = ak.any(matching_mask[sel_hhbjet_mask], axis=1)
-        # sel_hhbjet_mask = sel_hhbjet_mask & one_matched
+        sel_hhbjet_mask = ak.Array(hhbjet_mask[ttj_mask])
+        one_matched = ak.any(matching_mask[sel_hhbjet_mask], axis=1)
+        sel_hhbjet_mask = sel_hhbjet_mask & one_matched
 
         # insert back into the full hhbjet_mask
         flat_hhbjet_mask = flat_np_view(hhbjet_mask)
         flat_jet_mask = ak.flatten(ak.full_like(events.Jet.pt, False, dtype=bool) | ttj_mask)
         flat_hhbjet_mask[flat_jet_mask] = ak.flatten(sel_hhbjet_mask)
 
+        from IPython import embed;
+        embed()
+
     # validate that either none or two hhbjets were identified
     assert ak.all(((n_hhbjets := ak.sum(hhbjet_mask, axis=1)) == 0) | (n_hhbjets == 2))
 

From e5cec17ab5de7183ac6b4ae12e59b31c99ccd1c8 Mon Sep 17 00:00:00 2001
From: "Marcel R." <github.riga@icloud.com>
Date: Wed, 8 Jan 2025 18:55:26 +0100
Subject: [PATCH 03/20] Fix lepton / trigger matching.

---
 hbt/categorization/default.py |   5 +
 hbt/config/categories.py      |   9 +-
 hbt/config/configs_hbt.py     |  11 +-
 hbt/config/triggers.py        |  98 ++---
 hbt/selection/jet.py          |   7 +-
 hbt/selection/lepton.py       | 773 ++++++++++++++++++----------------
 6 files changed, 486 insertions(+), 417 deletions(-)

diff --git a/hbt/categorization/default.py b/hbt/categorization/default.py
index c86e40a4..17391df5 100644
--- a/hbt/categorization/default.py
+++ b/hbt/categorization/default.py
@@ -30,6 +30,11 @@ def cat_tautau(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array,
     return events, events.channel_id == self.config_inst.channels.n.tautau.id
 
 
+@categorizer(uses={"channel_id"})
+def cat_ee(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]:
+    return events, events.channel_id == self.config_inst.channels.n.ee.id
+
+
 @categorizer(uses={"channel_id"})
 def cat_mumu(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]:
     return events, events.channel_id == self.config_inst.channels.n.mumu.id
diff --git a/hbt/config/categories.py b/hbt/config/categories.py
index dae03186..b86371f0 100644
--- a/hbt/config/categories.py
+++ b/hbt/config/categories.py
@@ -19,8 +19,9 @@ def add_categories(config: od.Config) -> None:
     add_category(config, name="etau", id=1, selection="cat_etau", label=r"$e\tau_{h}$")
     add_category(config, name="mutau", id=2, selection="cat_mutau", label=r"$\mu\tau_{h}$")
     add_category(config, name="tautau", id=3, selection="cat_tautau", label=r"$\tau_{h}\tau_{h}$")
-    add_category(config, name="mumu", id=4, selection="cat_mumu", label=r"$\mu\mu$")
-    add_category(config, name="emu", id=5, selection="cat_emu", label=r"$e\mu$")
+    add_category(config, name="ee", id=4, selection="cat_ee", label=r"$ee$")
+    add_category(config, name="mumu", id=5, selection="cat_mumu", label=r"$\mu\mu$")
+    add_category(config, name="emu", id=6, selection="cat_emu", label=r"$e\mu$")
 
     # qcd regions
     add_category(config, name="os", id=10, selection="cat_os", label="Opposite sign", tags={"os"})
@@ -75,7 +76,9 @@ def kwargs_fn(categories, add_qcd_group=True):
     # control categories
     control_categories = {
         # channels first
-        "channel": [config.get_category("mumu"), config.get_category("emu")],
+        "channel": [
+            config.get_category("ee"), config.get_category("mumu"), config.get_category("emu"),
+        ],
         # kinematic regions in the middle (to be extended)
         "kin": [config.get_category("incl"), config.get_category("2j")],
         # relative sign last
diff --git a/hbt/config/configs_hbt.py b/hbt/config/configs_hbt.py
index b909b544..fadd094b 100644
--- a/hbt/config/configs_hbt.py
+++ b/hbt/config/configs_hbt.py
@@ -342,7 +342,7 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
 
         # add tags to datasets
         if dataset.name.startswith("data_e_"):
-            dataset.add_tag({"etau", "emu_from_e"})
+            dataset.add_tag({"etau", "emu_from_e", "ee"})
         if dataset.name.startswith("data_mu_"):
             dataset.add_tag({"mutau", "emu_from_mu", "mumu"})
         if dataset.name.startswith("data_tau_"):
@@ -428,8 +428,8 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
             "ewk",
         ]),
         "dy_split": [
-            "dy_m4to10", "dy_m10to50", "dy_m50toinf",
-            "dy_m50toinf_0j", "dy_m50toinf_1j", "dy_m50toinf_2j",
+            # "dy_m4to10", "dy_m10to50", "dy_m50toinf",
+            # "dy_m50toinf_0j", "dy_m50toinf_1j", "dy_m50toinf_2j",
             "dy_m50toinf_1j_pt40to100", "dy_m50toinf_1j_pt100to200", "dy_m50toinf_1j_pt200to400",
             "dy_m50toinf_1j_pt400to600", "dy_m50toinf_1j_pt600toinf",
             "dy_m50toinf_2j_pt40to100", "dy_m50toinf_2j_pt100to200", "dy_m50toinf_2j_pt200to400",
@@ -1209,8 +1209,9 @@ def add_external(name, value):
     cfg.add_channel(name="etau", id=1)
     cfg.add_channel(name="mutau", id=2)
     cfg.add_channel(name="tautau", id=3)
-    cfg.add_channel(name="mumu", id=4)
-    cfg.add_channel(name="emu", id=5)
+    cfg.add_channel(name="ee", id=4)
+    cfg.add_channel(name="mumu", id=5)
+    cfg.add_channel(name="emu", id=6)
 
     # add categories
     from hbt.config.categories import add_categories
diff --git a/hbt/config/triggers.py b/hbt/config/triggers.py
index 0e482ceb..0fc77340 100644
--- a/hbt/config/triggers.py
+++ b/hbt/config/triggers.py
@@ -258,7 +258,7 @@ def add_triggers_2016(config: od.Config) -> None:
             # does not exist for run F on but should only be used until run 276215 -> which era?
             # TODO: to be checked
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.x.era <= "E"),
-            tags={"cross_trigger", "cross_e_tau", "channel_e_tau"},
+            tags={"cross_trigger", "cross_e_tau"},
         ),
         Trigger(
             name="HLT_Ele24_eta2p1_WPLoose_Gsf_LooseIsoPFTau20",
@@ -282,7 +282,7 @@ def add_triggers_2016(config: od.Config) -> None:
             # does not exist for run F on but should only be used between run 276215 and 278270 -> which eras?
             # TODO: to be checked
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data and dataset_inst.x.era <= "E"),
-            tags={"cross_trigger", "cross_e_tau", "channel_e_tau"},
+            tags={"cross_trigger", "cross_e_tau"},
         ),
         Trigger(
             name="HLT_Ele24_eta2p1_WPLoose_Gsf_LooseIsoPFTau30",
@@ -306,7 +306,7 @@ def add_triggers_2016(config: od.Config) -> None:
             # does not exist until run E but should only be used after run 278270 -> which era?
             # TODO: to be checked
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data and dataset_inst.x.era >= "E"),
-            tags={"cross_trigger", "cross_e_tau", "channel_e_tau"},
+            tags={"cross_trigger", "cross_e_tau"},
         ),
 
         #
@@ -331,7 +331,7 @@ def add_triggers_2016(config: od.Config) -> None:
                     trigger_bits=None,  # TODO
                 ),
             ),
-            tags={"cross_trigger", "cross_mu_tau", "channel_mu_tau"},
+            tags={"cross_trigger", "cross_mu_tau"},
         ),
         Trigger(
             name="HLT_IsoMu19_eta2p1_LooseIsoPFTau20_SingleL1",
@@ -352,7 +352,7 @@ def add_triggers_2016(config: od.Config) -> None:
                     trigger_bits=None,  # TODO
                 ),
             ),
-            tags={"cross_trigger", "cross_mu_tau", "channel_mu_tau"},
+            tags={"cross_trigger", "cross_mu_tau"},
         ),
 
         #
@@ -378,7 +378,7 @@ def add_triggers_2016(config: od.Config) -> None:
                 ),
             ),
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or ("B" <= dataset_inst.x.era <= "F")),
-            tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"},
+            tags={"cross_trigger", "cross_tau_tau"},
         ),
         Trigger(
             name="HLT_DoubleMediumCombinedIsoPFTau35_Trk1_eta2p1_Reg",
@@ -400,7 +400,7 @@ def add_triggers_2016(config: od.Config) -> None:
                 ),
             ),
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.x.era >= "H"),
-            tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"},
+            tags={"cross_trigger", "cross_tau_tau"},
         ),
 
         #
@@ -425,7 +425,7 @@ def add_triggers_2016(config: od.Config) -> None:
                     trigger_bits=None,  # TODO
                 ),
             ),
-            tags={"single_trigger", "single_e", "channel_e_tau"},
+            tags={"single_trigger", "single_e"},
         )
         #
         # single muon
@@ -442,7 +442,7 @@ def add_triggers_2016(config: od.Config) -> None:
                     trigger_bits=None,  # TODO
                 ),
             ),
-            tags={"single_trigger", "single_mu", "channel_mu_tau"},
+            tags={"single_trigger", "single_mu"},
         )
         config.x.triggers.add(
             name="HLT_IsoMu22_eta2p1",
@@ -456,7 +456,7 @@ def add_triggers_2016(config: od.Config) -> None:
                     trigger_bits=None,  # TODO
                 ),
             ),
-            tags={"single_trigger", "single_mu", "channel_mu_tau"},
+            tags={"single_trigger", "single_mu"},
         )
         config.x.triggers.add(
             name="HLT_IsoTkMu22",
@@ -470,7 +470,7 @@ def add_triggers_2016(config: od.Config) -> None:
                     trigger_bits=None,  # TODO
                 ),
             ),
-            tags={"single_trigger", "single_mu", "channel_mu_tau"},
+            tags={"single_trigger", "single_mu"},
         )
         config.x.triggers.add(
             name="HLT_IsoTkMu22_eta2p1",
@@ -484,7 +484,7 @@ def add_triggers_2016(config: od.Config) -> None:
                     trigger_bits=None,  # TODO
                 ),
             ),
-            tags={"single_trigger", "single_mu", "channel_mu_tau"},
+            tags={"single_trigger", "single_mu"},
         )
 
 
@@ -510,7 +510,7 @@ def add_triggers_2017(config: od.Config) -> None:
                 ),
             ),
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.x.era >= "D"),
-            tags={"single_trigger", "single_e", "channel_e_tau"},
+            tags={"single_trigger", "single_e"},
         ),
         Trigger(
             name="HLT_Ele32_WPTight_Gsf_L1DoubleEG",
@@ -525,7 +525,7 @@ def add_triggers_2017(config: od.Config) -> None:
                     trigger_bits=2 + 1024,
                 ),
             ),
-            tags={"single_trigger", "single_e", "channel_e_tau"},
+            tags={"single_trigger", "single_e"},
         ),
         Trigger(
             name="HLT_Ele35_WPTight_Gsf",
@@ -539,7 +539,7 @@ def add_triggers_2017(config: od.Config) -> None:
                     trigger_bits=2,
                 ),
             ),
-            tags={"single_trigger", "single_e", "channel_e_tau"},
+            tags={"single_trigger", "single_e"},
         ),
 
         #
@@ -557,7 +557,7 @@ def add_triggers_2017(config: od.Config) -> None:
                     trigger_bits=2,
                 ),
             ),
-            tags={"single_trigger", "single_mu", "channel_mu_tau"},
+            tags={"single_trigger", "single_mu"},
         ),
         Trigger(
             name="HLT_IsoMu27",
@@ -571,7 +571,7 @@ def add_triggers_2017(config: od.Config) -> None:
                     trigger_bits=2,
                 ),
             ),
-            tags={"single_trigger", "single_mu", "channel_mu_tau"},
+            tags={"single_trigger", "single_mu"},
         ),
 
         #
@@ -598,7 +598,7 @@ def add_triggers_2017(config: od.Config) -> None:
                     trigger_bits=1024 + 256,
                 ),
             ),
-            tags={"cross_trigger", "cross_e_tau", "channel_e_tau"},
+            tags={"cross_trigger", "cross_e_tau"},
         ),
 
         #
@@ -625,7 +625,7 @@ def add_triggers_2017(config: od.Config) -> None:
                     trigger_bits=1024 + 512,
                 ),
             ),
-            tags={"cross_trigger", "cross_mu_tau", "channel_mu_tau"},
+            tags={"cross_trigger", "cross_mu_tau"},
         ),
 
         #
@@ -650,7 +650,7 @@ def add_triggers_2017(config: od.Config) -> None:
                     trigger_bits=64,
                 ),
             ),
-            tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"},
+            tags={"cross_trigger", "cross_tau_tau"},
         ),
         Trigger(
             name="HLT_DoubleTightChargedIsoPFTau35_Trk1_TightID_eta2p1_Reg",
@@ -672,7 +672,7 @@ def add_triggers_2017(config: od.Config) -> None:
                 ),
             ),
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data),
-            tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"},
+            tags={"cross_trigger", "cross_tau_tau"},
         ),
         Trigger(
             name="HLT_DoubleMediumChargedIsoPFTau40_Trk1_TightID_eta2p1_Reg",
@@ -694,7 +694,7 @@ def add_triggers_2017(config: od.Config) -> None:
                 ),
             ),
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data),
-            tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"},
+            tags={"cross_trigger", "cross_tau_tau"},
         ),
         Trigger(
             name="HLT_DoubleTightChargedIsoPFTau40_Trk1_eta2p1_Reg",
@@ -716,7 +716,7 @@ def add_triggers_2017(config: od.Config) -> None:
                 ),
             ),
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data),
-            tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"},
+            tags={"cross_trigger", "cross_tau_tau"},
         ),
 
         #
@@ -756,7 +756,7 @@ def add_triggers_2017(config: od.Config) -> None:
                 ),
             ),
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.x.era >= "D"),
-            tags={"cross_trigger", "cross_tau_tau_vbf", "channel_tau_tau"},
+            tags={"cross_trigger", "cross_tau_tau_vbf"},
         ),
     ])
 
@@ -779,7 +779,7 @@ def add_triggers_2018(config: od.Config) -> None:
                 ),
             ),
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.x.era >= "D"),
-            tags={"single_trigger", "single_e", "channel_e_tau"},
+            tags={"single_trigger", "single_e"},
         ),
         Trigger(
             name="HLT_Ele35_WPTight_Gsf",
@@ -793,7 +793,7 @@ def add_triggers_2018(config: od.Config) -> None:
                     trigger_bits=2,
                 ),
             ),
-            tags={"single_trigger", "single_e", "channel_e_tau"},
+            tags={"single_trigger", "single_e"},
         ),
 
         #
@@ -811,7 +811,7 @@ def add_triggers_2018(config: od.Config) -> None:
                     trigger_bits=2,
                 ),
             ),
-            tags={"single_trigger", "single_mu", "channel_mu_tau"},
+            tags={"single_trigger", "single_mu"},
         ),
         Trigger(
             name="HLT_IsoMu27",
@@ -825,7 +825,7 @@ def add_triggers_2018(config: od.Config) -> None:
                     trigger_bits=2,
                 ),
             ),
-            tags={"single_trigger", "single_mu", "channel_mu_tau"},
+            tags={"single_trigger", "single_mu"},
         ),
 
         #
@@ -854,7 +854,7 @@ def add_triggers_2018(config: od.Config) -> None:
             ),
             # the non-HPS path existed only for data and is fully covered in MC below
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data),
-            tags={"cross_trigger", "cross_e_tau", "channel_e_tau"},
+            tags={"cross_trigger", "cross_e_tau"},
         ),
 
         #
@@ -883,7 +883,7 @@ def add_triggers_2018(config: od.Config) -> None:
             ),
             # the non-HPS path existed only for data and is fully covered in MC below
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data),
-            tags={"cross_trigger", "cross_mu_tau", "channel_mu_tau"},
+            tags={"cross_trigger", "cross_mu_tau"},
         ),
 
         #
@@ -909,7 +909,7 @@ def add_triggers_2018(config: od.Config) -> None:
                 ),
             ),
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data),
-            tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"},
+            tags={"cross_trigger", "cross_tau_tau"},
         ),
         Trigger(
             name="HLT_DoubleMediumChargedIsoPFTau40_Trk1_TightID_eta2p1_Reg",
@@ -931,7 +931,7 @@ def add_triggers_2018(config: od.Config) -> None:
                 ),
             ),
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data),
-            tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"},
+            tags={"cross_trigger", "cross_tau_tau"},
         ),
         Trigger(
             name="HLT_DoubleTightChargedIsoPFTau40_Trk1_eta2p1_Reg",
@@ -953,7 +953,7 @@ def add_triggers_2018(config: od.Config) -> None:
                 ),
             ),
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data),
-            tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"},
+            tags={"cross_trigger", "cross_tau_tau"},
         ),
 
         #
@@ -992,7 +992,7 @@ def add_triggers_2018(config: od.Config) -> None:
                 ),
             ),
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.x.era >= "D"),
-            tags={"cross_trigger", "cross_tau_tau_vbf", "channel_tau_tau"},
+            tags={"cross_trigger", "cross_tau_tau_vbf"},
         ),
     ])
 
@@ -1030,10 +1030,11 @@ def add_triggers_2022(config: od.Config) -> None:
             applies_to_dataset=(lambda dataset_inst: (
                 dataset_inst.is_mc or
                 dataset_inst.has_tag("etau") or
+                dataset_inst.has_tag("ee") or
                 dataset_inst.has_tag("emu_from_e") or
                 dataset_inst.has_tag("emu_from_mu")
             )),
-            tags={"single_trigger", "single_e", "channel_e_tau"},
+            tags={"single_trigger", "single_e"},
         ),
 
         #
@@ -1061,7 +1062,7 @@ def add_triggers_2022(config: od.Config) -> None:
                 dataset_inst.has_tag("emu_from_mu") or
                 dataset_inst.has_tag("mumu")
             )),
-            tags={"single_trigger", "single_mu", "channel_mu_tau"},
+            tags={"single_trigger", "single_mu"},
         ),
 
         #
@@ -1095,7 +1096,7 @@ def add_triggers_2022(config: od.Config) -> None:
                 ),
             ),
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("etau")),
-            tags={"cross_trigger", "cross_e_tau", "channel_e_tau"},
+            tags={"cross_trigger", "cross_e_tau"},
         ),
 
         #
@@ -1130,7 +1131,7 @@ def add_triggers_2022(config: od.Config) -> None:
                 ),
             ),
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("mutau")),
-            tags={"cross_trigger", "cross_mu_tau", "channel_mu_tau"},
+            tags={"cross_trigger", "cross_mu_tau"},
         ),
 
         #
@@ -1164,7 +1165,7 @@ def add_triggers_2022(config: od.Config) -> None:
                 ),
             ),
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("tautau")),
-            tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"},
+            tags={"cross_trigger", "cross_tau_tau"},
         ),
 
         #
@@ -1220,7 +1221,7 @@ def add_triggers_2022(config: od.Config) -> None:
                 ),
             ),
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("tautau")),
-            tags={"cross_trigger", "cross_tau_tau_vbf", "channel_tau_tau"},
+            tags={"cross_trigger", "cross_tau_tau_vbf"},
         ),
 
         # Currently disabled since it may not be needed
@@ -1278,7 +1279,7 @@ def add_triggers_2022(config: od.Config) -> None:
                 ),
             ),
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("tautau")),
-            tags={"cross_trigger", "cross_tau_tau_jet", "channel_tau_tau"},
+            tags={"cross_trigger", "cross_tau_tau_jet"},
         ),
     ])
 
@@ -1314,10 +1315,11 @@ def add_triggers_2023(config: od.Config) -> None:
             applies_to_dataset=(lambda dataset_inst: (
                 dataset_inst.is_mc or
                 dataset_inst.has_tag("etau") or
+                dataset_inst.has_tag("ee") or
                 dataset_inst.has_tag("emu_from_e") or
                 dataset_inst.has_tag("emu_from_mu")
             )),
-            tags={"single_trigger", "single_e", "channel_e_tau"},
+            tags={"single_trigger", "single_e"},
         ),
 
         #
@@ -1345,7 +1347,7 @@ def add_triggers_2023(config: od.Config) -> None:
                 dataset_inst.has_tag("emu_from_mu") or
                 dataset_inst.has_tag("mumu")
             )),
-            tags={"single_trigger", "single_mu", "channel_mu_tau"},
+            tags={"single_trigger", "single_mu"},
         ),
 
         #
@@ -1379,7 +1381,7 @@ def add_triggers_2023(config: od.Config) -> None:
                 ),
             ),
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("etau")),
-            tags={"cross_trigger", "cross_e_tau", "channel_e_tau"},
+            tags={"cross_trigger", "cross_e_tau"},
         ),
 
         #
@@ -1414,7 +1416,7 @@ def add_triggers_2023(config: od.Config) -> None:
                 ),
             ),
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("mutau")),
-            tags={"cross_trigger", "cross_mu_tau", "channel_mu_tau"},
+            tags={"cross_trigger", "cross_mu_tau"},
         ),
 
         #
@@ -1448,7 +1450,7 @@ def add_triggers_2023(config: od.Config) -> None:
                 ),
             ),
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("tautau")),
-            tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"},
+            tags={"cross_trigger", "cross_tau_tau"},
         ),
 
         #
@@ -1502,7 +1504,7 @@ def add_triggers_2023(config: od.Config) -> None:
                 ),
             ),
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("tautau")),
-            tags={"cross_trigger", "cross_tau_tau_vbf", "channel_tau_tau"},
+            tags={"cross_trigger", "cross_tau_tau_vbf"},
         ),
 
         #
@@ -1541,6 +1543,6 @@ def add_triggers_2023(config: od.Config) -> None:
                 ),
             ),
             applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("tautau")),
-            tags={"cross_trigger", "cross_tau_tau_jet", "channel_tau_tau"},
+            tags={"cross_trigger", "cross_tau_tau_jet"},
         ),
     ])
diff --git a/hbt/selection/jet.py b/hbt/selection/jet.py
index ab3da841..a0afcca8 100644
--- a/hbt/selection/jet.py
+++ b/hbt/selection/jet.py
@@ -249,9 +249,6 @@ def jet_selection(
         flat_jet_mask = ak.flatten(ak.full_like(events.Jet.pt, False, dtype=bool) | ttj_mask)
         flat_hhbjet_mask[flat_jet_mask] = ak.flatten(sel_hhbjet_mask)
 
-        from IPython import embed;
-        embed()
-
     # validate that either none or two hhbjets were identified
     assert ak.all(((n_hhbjets := ak.sum(hhbjet_mask, axis=1)) == 0) | (n_hhbjets == 2))
 
@@ -424,9 +421,9 @@ def jet_selection_setup(self: Selector, reqs: dict, inputs: dict, reader_targets
     # store ids of tau-tau-jet and tau-tau cross triggers
     self.trigger_ids_ttc = [
         trigger.id for trigger in self.config_inst.x.triggers
-        if trigger.has_tag("channel_tau_tau") and not trigger.has_tag("cross_tau_tau_jet")
+        if trigger.has_tag({"cross_tau_tau", "cross_tau_tau_vbf"})
     ]
     self.trigger_ids_ttjc = [
         trigger.id for trigger in self.config_inst.x.triggers
-        if trigger.has_tag("channel_tau_tau") and trigger.has_tag("cross_tau_tau_jet")
+        if trigger.has_tag("cross_tau_tau_jet")
     ]
diff --git a/hbt/selection/lepton.py b/hbt/selection/lepton.py
index d7b14823..6c04ec7b 100644
--- a/hbt/selection/lepton.py
+++ b/hbt/selection/lepton.py
@@ -12,7 +12,7 @@
 from functools import reduce
 
 from columnflow.selection import Selector, SelectionResult, selector
-from columnflow.columnar_util import set_ak_column
+from columnflow.columnar_util import set_ak_column, sorted_indices_from_mask
 from columnflow.util import maybe_import
 
 from hbt.util import IF_NANO_V9, IF_NANO_GE_V10
@@ -70,9 +70,8 @@ def update_channel_ids(
 @selector(
     uses={
         "Electron.{pt,eta,phi,dxy,dz,pfRelIso03_all}",
-        IF_NANO_V9("Electron.mvaFall17V2{Iso_WP80,Iso_WP90,noIso_WP90}"),
-        IF_NANO_GE_V10("Electron.{mvaIso_WP80,mvaIso_WP90,mvaNoIso_WP90}"),
-        "TrigObj.{pt,eta,phi}",
+        IF_NANO_V9("Electron.mvaFall17V2{Iso_WP80,Iso_WP90}"),
+        IF_NANO_GE_V10("Electron.{mvaIso_WP80,mvaIso_WP90}"),
     },
     exposed=False,
 )
@@ -80,36 +79,15 @@ def electron_selection(
     self: Selector,
     events: ak.Array,
     trigger: Trigger,
-    leg_masks: dict[str, ak.Array],
-    trigger_fire_list: list[bool] | None = None,
     **kwargs,
-) -> tuple[ak.Array, ak.Array]:
+) -> tuple[ak.Array | None, ak.Array]:
     """
-    Electron selection returning two sets of indices for default and veto electrons.
+    Electron selection returning two sets of masks for default and veto electrons.
     See https://twiki.cern.ch/twiki/bin/view/CMS/EgammaNanoAOD?rev=4
-    If the trigger_fire_list is given, all events that are not fired by the trigger lose their trigger
-    matching, i.e. the trigger object matching is set to True. This is useful for the emu channel.
     """
+    is_2016 = self.config_inst.campaign.x.year == 2016
     is_single = trigger.has_tag("single_e")
     is_cross = trigger.has_tag("cross_e_tau")
-    is_2016 = self.config_inst.campaign.x.year == 2016
-
-    # start per-electron mask with trigger object matching
-    if is_single:
-        # catch config errors
-        assert trigger.n_legs == len(leg_masks) == 1
-        assert abs(trigger.legs["e"].pdg_id) == 11
-        # match leg 0
-        matches_leg0 = trigger_object_matching(events.Electron, events.TrigObj[leg_masks["e"]])
-    elif is_cross:
-        # catch config errors
-        assert trigger.n_legs == len(leg_masks) == 2
-        assert abs(trigger.legs["e"].pdg_id) == 11
-        # match leg 0
-        matches_leg0 = trigger_object_matching(events.Electron, events.TrigObj[leg_masks["e"]])
-
-    # pt sorted indices for converting masks to indices
-    sorted_indices = ak.argsort(events.Electron.pt, axis=-1, ascending=False)
 
     # obtain mva flags, which might be located at different routes, depending on the nano version
     if "mvaIso_WP80" in events.Electron.fields:
@@ -118,124 +96,92 @@ def electron_selection(
         # check this in original root files if necessary
         mva_iso_wp80 = events.Electron.mvaIso_WP80
         mva_iso_wp90 = events.Electron.mvaIso_WP90
-        # mva_noniso_wp90 = events.Electron.mvaNoIso_WP90
     else:
         # <= nano v9
         mva_iso_wp80 = events.Electron.mvaFall17V2Iso_WP80
         mva_iso_wp90 = events.Electron.mvaFall17V2Iso_WP90
-        # mva_noniso_wp90 = events.Electron.mvaFall17V2noIso_WP90
 
-    # default electron mask, only required for single and cross triggers with electron leg
+    # default electron mask
     default_mask = None
-    default_indices = None
-
-    # add true to the leg mask if the trigger is not fired for the emu channel only case,
-    # where trigger_fire_list should be given
-    if trigger_fire_list is not None:
-        matches_leg0 = ak.where(trigger_fire_list, events.Electron.pt > -1, events.Electron.pt < -1)
-    if is_single or is_cross or (trigger_fire_list is not None):
+    if is_single or is_cross:
         min_pt = 26.0 if is_2016 else (31.0 if is_single else 25.0)
+        max_eta = 2.5 if is_single else 2.1
         default_mask = (
             (mva_iso_wp80 == 1) &
-            (abs(events.Electron.eta) < 2.5) if is_single else (abs(events.Electron.eta) < 2.1) &
+            (abs(events.Electron.eta) < max_eta) &
             (abs(events.Electron.dxy) < 0.045) &
             (abs(events.Electron.dz) < 0.2) &
-            (events.Electron.pt > min_pt) &
-            matches_leg0
+            (events.Electron.pt > min_pt)
         )
-        # convert to sorted indices
-        default_indices = sorted_indices[default_mask[sorted_indices]]
-        default_indices = ak.values_astype(default_indices, np.int32)
 
-    # veto electron mask
+    # veto electron mask (must be trigger independent!)
     veto_mask = (
-        (
-            (mva_iso_wp90 == 1) |
-            False
-            # TODO: do we need to keep this?
-            # disabled as part of the resonant synchronization effort
-            # ((mva_noniso_wp90 == 1) & (events.Electron.pfRelIso03_all < 0.3))
-        ) &
+        (mva_iso_wp90 == 1) &
         (abs(events.Electron.eta) < 2.5) &
         (abs(events.Electron.dxy) < 0.045) &
         (abs(events.Electron.dz) < 0.2) &
         (events.Electron.pt > 10.0)
     )
-    # convert to sorted indices
-    veto_indices = sorted_indices[veto_mask[sorted_indices]]
-    veto_indices = ak.values_astype(veto_indices, np.int32)
 
-    return default_indices, veto_indices
+    return default_mask, veto_mask
 
 
 @selector(
-    uses={
-        # nano columns
-        "Muon.{pt,eta,phi,mediumId,tightId,pfRelIso04_all,dxy,dz}",
-        "TrigObj.{pt,eta,phi}",
-    },
+    uses={"{Electron,TrigObj}.{pt,eta,phi}"},
     exposed=False,
 )
-def muon_selection(
+def electron_trigger_matching(
     self: Selector,
     events: ak.Array,
     trigger: Trigger,
     leg_masks: dict[str, ak.Array],
-    select_without_trigger: bool = False,
-    mumu_selection: bool = False,
     **kwargs,
-) -> tuple[ak.Array, ak.Array]:
+) -> tuple[ak.Array]:
     """
-    Muon selection returning two sets of indidces for default and veto muons.
+    Electron trigger matching.
+    """
+    # TODO: only perform this for events where the trigger fired, needs more info and flat_views
+    is_single = trigger.has_tag("single_e")
+    is_cross = trigger.has_tag("cross_e_tau")
+
+    # catch config errors
+    assert is_single or is_cross
+    assert trigger.n_legs == len(leg_masks) == (1 if is_single else 2)
+    assert abs(trigger.legs["e"].pdg_id) == 11
+
+    return trigger_object_matching(events.Electron, events.TrigObj[leg_masks["e"]])
+
+
+@selector(
+    uses={"Muon.{pt,eta,phi,mediumId,tightId,pfRelIso04_all,dxy,dz}"},
+    exposed=False,
+)
+def muon_selection(
+    self: Selector,
+    events: ak.Array,
+    trigger: Trigger,
+    **kwargs,
+) -> tuple[ak.Array | None, ak.Array]:
+    """
+    Muon selection returning two sets of masks for default and veto muons.
 
     References:
 
     - Isolation working point: https://twiki.cern.ch/twiki/bin/view/CMS/SWGuideMuonIdRun2?rev=59
     - ID und ISO : https://twiki.cern.ch/twiki/bin/view/CMS/MuonUL2017?rev=15
-
-    If mumu_selection is set to True, a second muon is selected and the corresponding indices are additionally returned.
     """
+    is_2016 = self.config_inst.campaign.x.year == 2016
     is_single = trigger.has_tag("single_mu")
     is_cross = trigger.has_tag("cross_mu_tau")
-    is_2016 = self.config_inst.campaign.x.year == 2016
-
-    if (is_single or is_cross or mumu_selection) and select_without_trigger:
-        raise ValueError("select_without_trigger can only be used for non-triggered muon selections")
 
-    if is_cross and mumu_selection:
-        raise ValueError("mumu_selection can only be used for single muon selections")
-
-    # start per-muon mask with trigger object matching
-    if is_single:
-        # catch config errors
-        assert trigger.n_legs == len(leg_masks) == 1
-        assert abs(trigger.legs["mu"].pdg_id) == 13
-        # match leg 0
-        matches_leg0 = trigger_object_matching(events.Muon, events.TrigObj[leg_masks["mu"]])
-    elif is_cross:
-        # catch config errors
-        assert trigger.n_legs == len(leg_masks) == 2
-        assert abs(trigger.legs["mu"].pdg_id) == 13
-        # match leg 0
-        matches_leg0 = trigger_object_matching(events.Muon, events.TrigObj[leg_masks["mu"]])
-    elif select_without_trigger:
-        matches_leg0 = events.Muon.pt > -1
-
-    # pt sorted indices for converting masks to indices
-    # TODO: Why not local index? they should be sorted anyway so yes the results should be the same
-    # but if there is a problem with the sorting, we are selecting the wrong muons, since the
-    # masks below are created with the default sorting
-    sorted_indices = ak.argsort(events.Muon.pt, axis=-1, ascending=False)
-
-    # default muon mask, only required for single and cross triggers with muon leg
+    # default muon mask
     default_mask = None
-    default_indices = None
-    if is_single or is_cross or select_without_trigger:
+    if is_single or is_cross:
         if is_2016:
             min_pt = 23.0 if is_single else 20.0
         else:
             min_pt = 26.0 if is_single else 22.0
-        default_mask_wo_trigger = (
+        default_mask = (
             (events.Muon.tightId == 1) &
             (abs(events.Muon.eta) < 2.4) &
             (abs(events.Muon.dxy) < 0.045) &
@@ -244,43 +190,7 @@ def muon_selection(
             (events.Muon.pt > min_pt)
         )
 
-        if mumu_selection:
-            # for mumu selection, the matched muon should the one with the highest pt among the two
-            # otherwise selected muons. We check the number of muons only afterwards but should still
-            # apply the trigger matching to only selected muons
-
-            # first mask the array with the default mask without trigger, the non-matched muon will
-            # become None and will return False in the trigger matching and be put at the end of the
-            # list when sorted by pt
-            masked_muons = ak.mask(events.Muon, default_mask_wo_trigger)
-
-            # then sort the selected muons by pt to push the non-selected ones to the end
-            sorted_masked_muons = masked_muons[ak.argsort(masked_muons.pt, ascending=False)]
-
-            # apply trigger matching to the first muon of the selected muons
-            matches_first_selected_muons = ak.where(
-                ak.local_index(sorted_masked_muons) == 0,
-                trigger_object_matching(sorted_masked_muons, events.TrigObj[leg_masks["mu"]]),
-                False,
-            )
-
-            # bring the muons back in the original position in the array
-            matches_leg0 = matches_first_selected_muons[ak.argsort(ak.argsort(masked_muons.pt, ascending=False))]
-
-            # for mumu selection, the second muon should not be the same as the first one
-            matches_second_muon = ~matches_leg0
-            default_mask_second_muon = default_mask_wo_trigger & matches_second_muon
-
-        default_mask = default_mask_wo_trigger & matches_leg0
-
-        # convert to sorted indices
-        default_indices = sorted_indices[default_mask[sorted_indices]]
-        default_indices = ak.values_astype(default_indices, np.int32)
-        if mumu_selection:
-            default_indices_second_muon = sorted_indices[default_mask_second_muon[sorted_indices]]
-            default_indices_second_muon = ak.values_astype(default_indices_second_muon, np.int32)
-
-    # veto muon mask
+    # veto muon mask (must be trigger independent!)
     veto_mask = (
         ((events.Muon.mediumId == 1) | (events.Muon.tightId == 1)) &
         (abs(events.Muon.eta) < 2.4) &
@@ -289,14 +199,34 @@ def muon_selection(
         (events.Muon.pfRelIso04_all < 0.3) &
         (events.Muon.pt > 10)
     )
-    # convert to sorted indices
-    veto_indices = sorted_indices[veto_mask[sorted_indices]]
-    veto_indices = ak.values_astype(veto_indices, np.int32)
 
-    if mumu_selection:
-        return default_indices, default_indices_second_muon, veto_indices
-    else:
-        return default_indices, veto_indices
+    return default_mask, veto_mask
+
+
+@selector(
+    uses={"{Muon,TrigObj}.{pt,eta,phi}"},
+    exposed=False,
+)
+def muon_trigger_matching(
+    self: Selector,
+    events: ak.Array,
+    trigger: Trigger,
+    leg_masks: dict[str, ak.Array],
+    **kwargs,
+) -> tuple[ak.Array]:
+    """
+    Muon trigger matching.
+    """
+    # TODO: only perform this for events where the trigger fired, needs more info and flat_views
+    is_single = trigger.has_tag("single_mu")
+    is_cross = trigger.has_tag("cross_mu_tau")
+
+    # catch config errors
+    assert is_single or is_cross
+    assert trigger.n_legs == len(leg_masks) == (1 if is_single else 2)
+    assert abs(trigger.legs["mu"].pdg_id) == 13
+
+    return trigger_object_matching(events.Muon, events.TrigObj[leg_masks["mu"]])
 
 
 @selector(
@@ -311,22 +241,20 @@ def tau_selection(
     self: Selector,
     events: ak.Array,
     trigger: Trigger,
-    leg_masks: dict[str, ak.Array],
-    electron_indices: ak.Array,
-    muon_indices: ak.Array,
+    electron_mask: ak.Array | None,
+    muon_mask: ak.Array | None,
     **kwargs,
 ) -> tuple[ak.Array, ak.Array]:
     """
-    Tau selection returning a set of indices for taus that are at least VVLoose isolated (vs jet)
-    and a second mask to select the action Medium isolated ones, eventually to separate normal and
-    iso inverted taus for QCD estimations.
+    Tau selection returning a masks for taus that are at least VVLoose isolated (vs jet)
+    and a second mask to select isolated ones, eventually to separate normal and iso inverted taus
+    for QCD estimations.
     """
     # return empty mask if no tagged taus exists in the chunk
     if ak.all(ak.num(events.Tau) == 0):
         logger.info("no taus found in event chunk")
-        # convenient definition of empty indices and iso mask
-        empty_indices = ak.local_index(events.Tau)
-        return empty_indices, empty_indices == 0
+        false_mask = ak.full_like(events.Tau.pt, False, dtype=bool)
+        return false_mask, false_mask
 
     is_single_e = trigger.has_tag("single_e")
     is_single_mu = trigger.has_tag("single_mu")
@@ -335,29 +263,11 @@ def tau_selection(
     is_cross_tau = trigger.has_tag("cross_tau_tau")
     is_cross_tau_vbf = trigger.has_tag("cross_tau_tau_vbf")
     is_cross_tau_jet = trigger.has_tag("cross_tau_tau_jet")
-    is_any_cross_tau = is_cross_tau or is_cross_tau_vbf or is_cross_tau_jet
     is_2016 = self.config_inst.campaign.x.year == 2016
     is_run3 = self.config_inst.campaign.x.run == 3
     get_tau_tagger = lambda tag: f"id{self.config_inst.x.tau_tagger}VS{tag}"
-
     wp_config = self.config_inst.x.tau_id_working_points
 
-    # start per-tau mask with trigger object matching per leg
-    if is_cross_e or is_cross_mu:
-        # catch config errors
-        assert trigger.n_legs == len(leg_masks) == 2
-        assert abs(trigger.legs["tau"].pdg_id) == 15
-        # match leg 1
-        matches_leg1 = trigger_object_matching(events.Tau, events.TrigObj[leg_masks["tau"]])
-    elif is_any_cross_tau:
-        # catch config errors
-        assert trigger.n_legs == len(leg_masks) >= 2
-        assert abs(trigger.legs["tau1"].pdg_id) == 15
-        assert abs(trigger.legs["tau2"].pdg_id) == 15
-        # match both legs
-        matches_leg0 = trigger_object_matching(events.Tau, events.TrigObj[leg_masks["tau1"]])
-        matches_leg1 = trigger_object_matching(events.Tau, events.TrigObj[leg_masks["tau2"]])
-
     # determine minimum pt and maximum eta
     max_eta = 2.5
     if is_single_e or is_single_mu:
@@ -375,59 +285,83 @@ def tau_selection(
     elif is_cross_tau_jet:
         min_pt = None if not is_run3 else 35.0
 
-    # select which decay modes to consider
-    decay_mode_mask = reduce(or_,
-        [
-            events.Tau.decayMode == 0,
-            events.Tau.decayMode == 1,
-            events.Tau.decayMode == 10,
-            events.Tau.decayMode == 11,
-        ],
-    )
-
     # base tau mask for default and qcd sideband tau
     base_mask = (
         (abs(events.Tau.eta) < max_eta) &
         (events.Tau.pt > min_pt) &
         (abs(events.Tau.dz) < 0.2) &
-        decay_mode_mask &
+        reduce(or_, [events.Tau.decayMode == mode for mode in (0, 1, 10, 11)]) &
         (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vloose) &
         (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.tight) &
         (events.Tau[get_tau_tagger("jet")] >= wp_config.tau_vs_jet.vvvloose)
     )
 
     # remove taus with too close spatial separation to previously selected leptons
-    if electron_indices is not None:
-        base_mask = base_mask & ak.all(events.Tau.metric_table(events.Electron[electron_indices]) > 0.5, axis=2)
-    if muon_indices is not None:
-        base_mask = base_mask & ak.all(events.Tau.metric_table(events.Muon[muon_indices]) > 0.5, axis=2)
+    if electron_mask is not None:
+        base_mask = base_mask & ak.all(events.Tau.metric_table(events.Electron[electron_mask]) > 0.5, axis=2)
+    if muon_mask is not None:
+        base_mask = base_mask & ak.all(events.Tau.metric_table(events.Muon[muon_mask]) > 0.5, axis=2)
 
-    # add trigger object masks
-    if is_cross_e or is_cross_mu:
-        base_mask = base_mask & matches_leg1
-    elif is_cross_tau or is_cross_tau_vbf or is_cross_tau_jet:
-        # taus need to be matched to at least one leg, but as a side condition
-        # each leg has to have at least one match to a tau
-        base_mask = base_mask & (
-            (matches_leg0 | matches_leg1) &
-            ak.any(matches_leg0, axis=1) &
-            ak.any(matches_leg1, axis=1)
-        )
+    # compute the isolation mask separately as it is used to defined (qcd) categories later on
+    iso_mask = events.Tau[get_tau_tagger("jet")] >= wp_config.tau_vs_jet.medium
 
-    # indices for sorting first by isolation, then by pt
-    # for this, combine iso and pt values, e.g. iso 255 and pt 32.3 -> 2550032.3
-    f = 10**(np.ceil(np.log10(ak.max(events.Tau.pt))) + 1)
-    sort_key = events.Tau[get_tau_tagger("jet")] * f + events.Tau.pt
-    sorted_indices = ak.argsort(sort_key, axis=-1, ascending=False)
+    return base_mask, iso_mask
 
-    # convert to sorted indices
-    base_indices = sorted_indices[base_mask[sorted_indices]]
-    base_indices = ak.values_astype(base_indices, np.int32)
 
-    # additional mask to select final, Medium isolated taus
-    iso_mask = events.Tau[base_indices][get_tau_tagger("jet")] >= wp_config.tau_vs_jet.medium
+@selector(
+    uses={"{Tau,TrigObj}.{pt,eta,phi}"},
+    # shifts are declared dynamically below in tau_selection_init
+    exposed=False,
+)
+def tau_trigger_matching(
+    self: Selector,
+    events: ak.Array,
+    trigger: Trigger,
+    leg_masks: dict[str, ak.Array],
+    **kwargs,
+) -> tuple[ak.Array]:
+    """
+    Tau trigger matching.
+    """
+    # TODO: only perform this for events where the trigger fired, needs more info and flat_views
+    if ak.all(ak.num(events.Tau) == 0):
+        logger.info("no taus found in event chunk")
+        return ak.full_like(events.Tau.pt, False, dtype=bool)
 
-    return base_indices, iso_mask
+    is_cross_e = trigger.has_tag("cross_e_tau")
+    is_cross_mu = trigger.has_tag("cross_mu_tau")
+    is_cross_tau = trigger.has_tag("cross_tau_tau")
+    is_cross_tau_vbf = trigger.has_tag("cross_tau_tau_vbf")
+    is_cross_tau_jet = trigger.has_tag("cross_tau_tau_jet")
+    is_any_cross_tau = is_cross_tau or is_cross_tau_vbf or is_cross_tau_jet
+    assert is_cross_e or is_cross_mu or is_any_cross_tau
+
+    # start per-tau mask with trigger object matching per leg
+    if is_cross_e or is_cross_mu:
+        # catch config errors
+        assert trigger.n_legs == len(leg_masks) == 2
+        assert abs(trigger.legs["tau"].pdg_id) == 15
+        # match leg 1
+        return trigger_object_matching(events.Tau, events.TrigObj[leg_masks["tau"]])
+
+    # is_any_cross_tau
+    # catch config errors
+    assert trigger.n_legs == len(leg_masks) >= 2
+    assert abs(trigger.legs["tau1"].pdg_id) == 15
+    assert abs(trigger.legs["tau2"].pdg_id) == 15
+
+    # match both legs
+    matches_leg0 = trigger_object_matching(events.Tau, events.TrigObj[leg_masks["tau1"]])
+    matches_leg1 = trigger_object_matching(events.Tau, events.TrigObj[leg_masks["tau2"]])
+    # taus need to be matched to at least one leg, but as a side condition
+    # each leg has to have at least one match to a tau
+    matches = (
+        (matches_leg0 | matches_leg1) &
+        ak.any(matches_leg0, axis=1) &
+        ak.any(matches_leg1, axis=1)
+    )
+
+    return matches
 
 
 @tau_selection.init
@@ -448,11 +382,13 @@ def tau_selection_init(self: Selector) -> None:
 
 @selector(
     uses={
-        electron_selection, muon_selection, tau_selection,
+        electron_selection, electron_trigger_matching, muon_selection, muon_trigger_matching,
+        tau_selection, tau_trigger_matching,
         "event", "{Electron,Muon,Tau}.{charge,mass}",
     },
     produces={
-        electron_selection, muon_selection, tau_selection,
+        electron_selection, electron_trigger_matching, muon_selection, muon_trigger_matching,
+        tau_selection, tau_trigger_matching,
         # new columns
         "channel_id", "leptons_os", "tau2_isolated", "single_triggered", "cross_triggered",
     },
@@ -470,6 +406,7 @@ def lepton_selection(
     ch_etau = self.config_inst.get_channel("etau")
     ch_mutau = self.config_inst.get_channel("mutau")
     ch_tautau = self.config_inst.get_channel("tautau")
+    ch_ee = self.config_inst.get_channel("ee")
     ch_mumu = self.config_inst.get_channel("mumu")
     ch_emu = self.config_inst.get_channel("emu")
 
@@ -480,12 +417,17 @@ def lepton_selection(
     leptons_os = false_mask
     single_triggered = false_mask
     cross_triggered = false_mask
-    empty_indices = events.Tau[:, :0].charge * 1  # ak.zeros_like(1 * events.event, dtype=np.uint16)[..., None][..., :0]
-    sel_electron_indices = empty_indices
-    sel_muon_indices = empty_indices
-    sel_tau_indices = empty_indices
+    sel_electron_mask = ak.full_like(events.Electron.pt, False, dtype=bool)
+    sel_muon_mask = ak.full_like(events.Muon.pt, False, dtype=bool)
+    sel_tau_mask = ak.full_like(events.Tau.pt, False, dtype=bool)
     leading_taus = events.Tau[:, :0]
 
+    # indices for sorting taus first by isolation, then by pt
+    # for this, combine iso and pt values, e.g. iso 255 and pt 32.3 -> 2550032.3
+    f = 10**(np.ceil(np.log10(ak.max(events.Tau.pt))) + 2)
+    tau_sorting_key = events.Tau[f"raw{self.config_inst.x.tau_tagger}VSjet"] * f + events.Tau.pt
+    tau_sorting_indices = ak.argsort(tau_sorting_key, axis=-1, ascending=False)
+
     # perform each lepton election step separately per trigger, avoid caching
     sel_kwargs = {**kwargs, "call_force": True}
     for trigger, trigger_fired, leg_masks in trigger_results.x.trigger_data:
@@ -493,242 +435,350 @@ def lepton_selection(
         is_cross = trigger.has_tag("cross_trigger")
 
         # electron selection
-        electron_indices, electron_veto_indices = self[electron_selection](
+        electron_mask, electron_veto_mask = self[electron_selection](
             events,
             trigger,
-            leg_masks,
             **sel_kwargs,
         )
 
         # muon selection
-        muon_indices, muon_veto_indices = self[muon_selection](
+        muon_mask, muon_veto_mask = self[muon_selection](
             events,
             trigger,
-            leg_masks,
             **sel_kwargs,
         )
 
         # tau selection
-        tau_indices, tau_iso_mask = self[tau_selection](
+        tau_mask, tau_iso_mask = self[tau_selection](
             events,
             trigger,
-            leg_masks,
-            electron_indices,
-            muon_indices,
+            electron_mask,
+            muon_mask,
             **sel_kwargs,
         )
 
-        # lepton pair selecton per trigger via lepton counting
-
+        # conditions potentially leading to etau channel
         if trigger.has_tag({"single_e", "cross_e_tau"}) and (
             self.dataset_inst.is_mc or
             self.dataset_inst.has_tag("etau")
         ):
+            # fold trigger matching into the selection
+            trig_electron_mask = (
+                electron_mask &
+                self[electron_trigger_matching](events, trigger, leg_masks, **sel_kwargs)
+            )
+            trig_tau_mask = tau_mask
+            if trigger.has_tag("cross_e_tau"):
+                trig_tau_mask = (
+                    trig_tau_mask &
+                    self[tau_trigger_matching](events, trigger, leg_masks, **sel_kwargs)
+                )
+
+            # check if the most isolated tau among the selected ones is matched
+            first_tau_matched = ak.fill_none(
+                ak.firsts(trig_tau_mask[tau_sorting_indices[tau_mask[tau_sorting_indices]]], axis=1),
+                False,
+            )
+
             # expect 1 electron, 1 veto electron (the same one), 0 veto muons, and at least one tau
+            # without and with trigger matching on the default objects
             is_etau = (
                 trigger_fired &
-                (ak.num(electron_indices, axis=1) == 1) &
-                (ak.num(electron_veto_indices, axis=1) == 1) &
-                (ak.num(muon_veto_indices, axis=1) == 0) &
-                (ak.num(tau_indices, axis=1) >= 1)
+                (ak.sum(electron_mask, axis=1) == 1) &
+                (ak.sum(trig_electron_mask, axis=1) == 1) &
+                (ak.sum(electron_veto_mask, axis=1) == 1) &
+                (ak.sum(muon_veto_mask, axis=1) == 0) &
+                first_tau_matched
             )
-            is_iso = ak.sum(tau_iso_mask, axis=1) >= 1
-            # determine the os/ss charge sign relation
-            e_charge = ak.firsts(events.Electron[electron_indices].charge, axis=1)
-            tau_charge = ak.firsts(events.Tau[tau_indices].charge, axis=1)
+
+            # get selected taus and sort them
+            # (this will be correct for events for which is_etau is actually True)
+            sorted_sel_taus = events.Tau[tau_sorting_indices][trig_tau_mask[tau_sorting_indices]]
+            # determine the relative charge and tau2 isolation
+            e_charge = ak.firsts(events.Electron[trig_electron_mask].charge, axis=1)
+            tau_charge = ak.firsts(sorted_sel_taus.charge, axis=1)
             is_os = e_charge == -tau_charge
+            is_iso = ak.sum(tau_iso_mask[trig_tau_mask], axis=1) >= 1
             # store global variables
             channel_id = update_channel_ids(events, channel_id, ch_etau.id, is_etau)
             tau2_isolated = ak.where(is_etau, is_iso, tau2_isolated)
             leptons_os = ak.where(is_etau, is_os, leptons_os)
             single_triggered = ak.where(is_etau & is_single, True, single_triggered)
             cross_triggered = ak.where(is_etau & is_cross, True, cross_triggered)
-            sel_electron_indices = ak.where(is_etau, electron_indices, sel_electron_indices)
-            sel_tau_indices = ak.where(is_etau, tau_indices, sel_tau_indices)
-            leading_taus = ak.where(is_etau, events.Tau[tau_indices[:, :1]], leading_taus)
+            sel_electron_mask = ak.where(is_etau, trig_electron_mask, sel_electron_mask)
+            sel_tau_mask = ak.where(is_etau, trig_tau_mask, sel_tau_mask)
+            leading_taus = ak.where(is_etau, sorted_sel_taus[:, :1], leading_taus)
 
-        elif trigger.has_tag({"single_mu", "cross_mu_tau"}) and (
+        # mutau channel
+        if trigger.has_tag({"single_mu", "cross_mu_tau"}) and (
             self.dataset_inst.is_mc or
             self.dataset_inst.has_tag("mutau")
         ):
+            # fold trigger matching into the selection
+            trig_muon_mask = (
+                muon_mask &
+                self[muon_trigger_matching](events, trigger, leg_masks, **sel_kwargs)
+            )
+            trig_tau_mask = tau_mask
+            if trigger.has_tag("cross_e_tau"):
+                trig_tau_mask = (
+                    trig_tau_mask &
+                    self[tau_trigger_matching](events, trigger, leg_masks, **sel_kwargs)
+                )
+
+            # check if the most isolated tau among the selected ones is matched
+            first_tau_matched = ak.fill_none(
+                ak.firsts(trig_tau_mask[tau_sorting_indices[tau_mask[tau_sorting_indices]]], axis=1),
+                False,
+            )
+
             # expect 1 muon, 1 veto muon (the same one), 0 veto electrons, and at least one tau
+            # without and with trigger matching on the default objects
             is_mutau = (
                 trigger_fired &
-                (ak.num(muon_indices, axis=1) == 1) &
-                (ak.num(muon_veto_indices, axis=1) == 1) &
-                (ak.num(electron_veto_indices, axis=1) == 0) &
-                (ak.num(tau_indices, axis=1) >= 1)
+                (ak.sum(muon_mask, axis=1) == 1) &
+                (ak.sum(trig_muon_mask, axis=1) == 1) &
+                (ak.sum(muon_veto_mask, axis=1) == 1) &
+                (ak.sum(electron_veto_mask, axis=1) == 0) &
+                first_tau_matched
             )
-            is_iso = ak.sum(tau_iso_mask, axis=1) >= 1
-            # determine the os/ss charge sign relation
-            mu_charge = ak.firsts(events.Muon[muon_indices].charge, axis=1)
-            tau_charge = ak.firsts(events.Tau[tau_indices].charge, axis=1)
+
+            # get selected, sorted taus to obtain quantities
+            # (this will be correct for events for which is_mutau is actually True)
+            sorted_sel_taus = events.Tau[tau_sorting_indices][trig_tau_mask[tau_sorting_indices]]
+            # determine the relative charge and tau2 isolation
+            mu_charge = ak.firsts(events.Muon[trig_muon_mask].charge, axis=1)
+            tau_charge = ak.firsts(sorted_sel_taus.charge, axis=1)
             is_os = mu_charge == -tau_charge
+            is_iso = ak.sum(tau_iso_mask[trig_tau_mask], axis=1) >= 1
             # store global variables
             channel_id = update_channel_ids(events, channel_id, ch_mutau.id, is_mutau)
             tau2_isolated = ak.where(is_mutau, is_iso, tau2_isolated)
             leptons_os = ak.where(is_mutau, is_os, leptons_os)
             single_triggered = ak.where(is_mutau & is_single, True, single_triggered)
             cross_triggered = ak.where(is_mutau & is_cross, True, cross_triggered)
-            sel_muon_indices = ak.where(is_mutau, muon_indices, sel_muon_indices)
-            sel_tau_indices = ak.where(is_mutau, tau_indices, sel_tau_indices)
-            leading_taus = ak.where(is_mutau, events.Tau[tau_indices[:, :1]], leading_taus)
+            sel_muon_mask = ak.where(is_mutau, trig_muon_mask, sel_muon_mask)
+            sel_tau_mask = ak.where(is_mutau, trig_tau_mask, sel_tau_mask)
+            leading_taus = ak.where(is_mutau, sorted_sel_taus[:, :1], leading_taus)
 
-        elif trigger.has_tag({"cross_tau_tau", "cross_tau_tau_vbf", "cross_tau_tau_jet"}) and (
-            self.dataset_inst.is_mc or
-            self.dataset_inst.has_tag("tautau")
+        # tautau channel
+        if (
+            trigger.has_tag({"cross_tau_tau", "cross_tau_tau_vbf", "cross_tau_tau_jet"}) and
+            (self.dataset_inst.is_mc or self.dataset_inst.has_tag("tautau"))
         ):
+            # fold trigger matching into the selection
+            trig_tau_mask = (
+                tau_mask &
+                self[tau_trigger_matching](events, trigger, leg_masks, **sel_kwargs)
+            )
+
+            # check if the two leading (most isolated) taus are matched
+            leading_taus_matched = ak.fill_none(
+                ak.firsts(trig_tau_mask[tau_sorting_indices[tau_mask[tau_sorting_indices]]], axis=1) &
+                ak.firsts(trig_tau_mask[tau_sorting_indices[tau_mask[tau_sorting_indices]]][:, 1:], axis=1),
+                False,
+            )
+
             # expect 0 veto electrons, 0 veto muons and at least two taus of which one is isolated
             is_tautau = (
                 trigger_fired &
-                (ak.num(electron_veto_indices, axis=1) == 0) &
-                (ak.num(muon_veto_indices, axis=1) == 0) &
-                (ak.num(tau_indices, axis=1) >= 2) &
-                (ak.sum(tau_iso_mask, axis=1) >= 1)
+                (ak.sum(electron_veto_mask, axis=1) == 0) &
+                (ak.sum(muon_veto_mask, axis=1) == 0) &
+                leading_taus_matched
             )
+
+            # TODO: not clear if this is required in the first place, discuss with cclub
             # special case for cross tau vbf trigger:
             # to avoid overlap, with non-vbf triggers, only one tau is allowed to have pt > 40
-            if trigger.has_tag("cross_tau_tau_vbf"):
-                is_tautau = is_tautau & (ak.sum(events.Tau[tau_indices].pt > 40, axis=1) <= 1)
-            is_iso = ak.sum(tau_iso_mask, axis=1) >= 2
-            # tau_indices are sorted by highest isolation as cond. 1 and highest pt as cond. 2, so
-            # the first two indices are exactly those selected by the full-blown pairing algorithm
-            # and there is no need here to apply it again :)
-            # determine the os/ss charge sign relation
-            tau1_charge = ak.firsts(events.Tau[tau_indices].charge, axis=1)
-            tau2_charge = ak.firsts(events.Tau[tau_indices].charge[..., 1:], axis=1)
+            # if trigger.has_tag("cross_tau_tau_vbf"):
+            #     is_tautau = is_tautau & (ak.sum(events.Tau[tau_indices].pt > 40, axis=1) <= 1)
+
+            # get selected, sorted taus to obtain quantities
+            # (this will be correct for events for which is_tautau is actually True)
+            sorted_sel_taus = events.Tau[tau_sorting_indices][trig_tau_mask[tau_sorting_indices]]
+            # determine the relative charge and tau2 isolation
+            tau1_charge = ak.firsts(sorted_sel_taus.charge, axis=1)
+            tau2_charge = ak.firsts(sorted_sel_taus.charge[:, 1:], axis=1)
             is_os = tau1_charge == -tau2_charge
+            is_iso = ak.sum(tau_iso_mask[trig_tau_mask], axis=1) >= 2
             # store global variables
             channel_id = update_channel_ids(events, channel_id, ch_tautau.id, is_tautau)
             tau2_isolated = ak.where(is_tautau, is_iso, tau2_isolated)
             leptons_os = ak.where(is_tautau, is_os, leptons_os)
             single_triggered = ak.where(is_tautau & is_single, True, single_triggered)
             cross_triggered = ak.where(is_tautau & is_cross, True, cross_triggered)
-            sel_tau_indices = ak.where(is_tautau, tau_indices, sel_tau_indices)
-            leading_taus = ak.where(is_tautau, events.Tau[tau_indices[:, :2]], leading_taus)
+            sel_tau_mask = ak.where(is_tautau, trig_tau_mask, sel_tau_mask)
+            leading_taus = ak.where(is_tautau, sorted_sel_taus[:, :2], leading_taus)
 
-        # control regions
-        if trigger.has_tag({"single_mu"}) and (
+        # ee channel
+        if trigger.has_tag("single_e") and (
+            self.dataset_inst.is_mc or
+            self.dataset_inst.has_tag("ee")
+        ):
+            # fold trigger matching into the selection
+            trig_electron_mask = (
+                electron_mask &
+                self[electron_trigger_matching](events, trigger, leg_masks, **sel_kwargs)
+            )
+
+            # check if the first (hardest) electron matched
+            electron_sorting_indices = ak.argsort(events.Electron.pt, axis=1, ascending=False)
+            leading_electron_matched = ak.fill_none(
+                ak.firsts(trig_electron_mask[electron_sorting_indices[electron_mask[electron_sorting_indices]]], axis=1),  # noqa: E501
+                False,
+            )
+
+            # expect 2 electrons, 2 veto electrons, 0 veto muons, and ignore the taus
+            is_ee = (
+                trigger_fired &
+                (ak.sum(electron_mask, axis=1) == 2) &
+                leading_electron_matched &
+                (ak.sum(electron_veto_mask, axis=1) == 2) &
+                (ak.sum(muon_veto_mask, axis=1) == 0)
+            )
+
+            # get selected, sorted electrons to obtain quantities
+            # (this will be correct for events for which is_ee is actually True)
+            sorted_sel_electrons = events.Electron[electron_sorting_indices][electron_mask[electron_sorting_indices]]
+            # determine the relative charge
+            e1_charge = ak.firsts(sorted_sel_electrons.charge, axis=1)
+            e2_charge = ak.firsts(sorted_sel_electrons.charge[:, 1:], axis=1)
+            is_os = e1_charge == -e2_charge
+            # store global variables
+            channel_id = update_channel_ids(events, channel_id, ch_ee.id, is_ee)
+            leptons_os = ak.where(is_ee, is_os, leptons_os)
+            single_triggered = ak.where(is_ee & is_single, True, single_triggered)
+            cross_triggered = ak.where(is_ee & is_cross, True, cross_triggered)
+            sel_electron_mask = ak.where(is_ee, electron_mask, sel_electron_mask)
+
+        # mumu channel
+        if trigger.has_tag("single_mu") and (
             self.dataset_inst.is_mc or
             self.dataset_inst.has_tag("mumu")
         ):
-            first_muon_indices, second_muon_indices, muon_veto_indices = self[muon_selection](
-                events,
-                trigger,
-                leg_masks,
-                mumu_selection=True,
-                **sel_kwargs,
+            # fold trigger matching into the selection
+            trig_muon_mask = (
+                muon_mask &
+                self[muon_trigger_matching](events, trigger, leg_masks, **sel_kwargs)
             )
 
-            mumu_muon_indices = ak.concatenate([first_muon_indices, second_muon_indices], axis=1)
+            # check if the first (hardest) muon matched
+            muon_sorting_indices = ak.argsort(events.Muon.pt, axis=1, ascending=False)
+            leading_muon_matched = ak.fill_none(
+                ak.firsts(trig_muon_mask[muon_sorting_indices[muon_mask[muon_sorting_indices]]], axis=1),
+                False,
+            )
 
             # expect 2 muons, 2 veto muons, 0 veto electrons, and ignore the taus
             is_mumu = (
                 trigger_fired &
-                (ak.num(first_muon_indices, axis=1) == 1) &
-                (ak.num(second_muon_indices, axis=1) == 1) &
-                (ak.num(muon_veto_indices, axis=1) == 2) &
-                (ak.num(electron_veto_indices, axis=1) == 0) &
-                (ak.num(tau_indices, axis=1) >= 0)  # to remove?
+                (ak.sum(muon_mask, axis=1) == 2) &
+                leading_muon_matched &
+                (ak.sum(muon_veto_mask, axis=1) == 2) &
+                (ak.sum(electron_veto_mask, axis=1) == 0)
             )
-            # store necessary global variables
-            channel_id = update_channel_ids(events, channel_id, ch_mumu.id, is_mumu)
-            sel_muon_indices = ak.where(is_mumu, mumu_muon_indices, sel_muon_indices)
-            single_triggered = ak.where(is_mumu & is_single, True, single_triggered)
-            cross_triggered = ak.where(is_mumu & is_cross, True, cross_triggered)
 
-            # define fake iso regions for mumu, as there is not necessarily a tau to be isolated
-            # should be always false, as no tau are used
-            is_iso = ak.sum(tau_iso_mask, axis=1) < 0
-            # determine the os/ss charge sign relation
-            mu1_charge = ak.firsts(events.Muon[muon_indices].charge, axis=1)
-            mu2_charge = ak.firsts(events.Muon[muon_indices].charge[..., 1:], axis=1)
+            # get selected, sorted muons to obtain quantities
+            # (this will be correct for events for which is_mumu is actually True)
+            sorted_sel_muons = events.Muon[muon_sorting_indices][muon_mask[muon_sorting_indices]]
+            # determine the relative charge
+            mu1_charge = ak.firsts(sorted_sel_muons.charge, axis=1)
+            mu2_charge = ak.firsts(sorted_sel_muons.charge[:, 1:], axis=1)
             is_os = mu1_charge == -mu2_charge
             # store global variables
-            tau2_isolated = ak.where(is_mumu, is_iso, tau2_isolated)
+            channel_id = update_channel_ids(events, channel_id, ch_mumu.id, is_mumu)
             leptons_os = ak.where(is_mumu, is_os, leptons_os)
+            single_triggered = ak.where(is_mumu & is_single, True, single_triggered)
+            cross_triggered = ak.where(is_mumu & is_cross, True, cross_triggered)
+            sel_muon_mask = ak.where(is_mumu, muon_mask, sel_muon_mask)
 
         # emu channel
         if (
-            (trigger.has_tag({"single_e"}) and (self.dataset_inst.is_mc or self.dataset_inst.has_tag("emu_from_e"))) or
-            (trigger.has_tag({"single_mu"}) and (self.dataset_inst.is_mc or self.dataset_inst.has_tag("emu_from_mu")))
+            (emu_from_e := (
+                trigger.has_tag("single_e") and
+                (self.dataset_inst.is_mc or self.dataset_inst.has_tag("emu_from_e"))
+            )) or (
+                trigger.has_tag("single_mu") and
+                (self.dataset_inst.is_mc or self.dataset_inst.has_tag("emu_from_mu"))
+            )
         ):
-            # behavior for Single Muon dataset
-            if trigger.has_tag({"single_mu"}) and (self.dataset_inst.is_mc or self.dataset_inst.has_tag("emu_from_mu")):
-                for trigger_emu, trigger_fired_emu, leg_masks_emu in trigger_results.x.trigger_data:
-                    # verify that the single electron trigger is matched if the single electron trigger is fired
-                    # if not, the matching is not verified in the selection.
-                    # This is done by giving the electron selection the trigger_fired_mask
-
-                    # TODO: handle the case where there are several single e triggers (maybe not necessary?)
-                    # as of now, only the last single electron trigger in the list of triggers applied to the dataset
-                    # is used
-                    if trigger_emu.has_tag("single_e"):
-                        electron_indices, electron_veto_indices = self[electron_selection](
-                            events,
-                            trigger_emu,
-                            # TODO: this was leg_masks before, why?
-                            leg_masks_emu,
-                            trigger_fire_list=trigger_fired_emu,
-                            **sel_kwargs,
-                        )
-                not_muon_in_e_trigger_fired = True
-
-            # behavior for Single Electron dataset
-            elif trigger.has_tag({"single_e"}) and (self.dataset_inst.is_mc or self.dataset_inst.has_tag("emu_from_e")):
-                muon_indices, muon_veto_indices = self[muon_selection](
-                    events,
-                    trigger,
-                    leg_masks,
-                    select_without_trigger=True,
-                    **sel_kwargs,
+            if emu_from_e:
+                emu_electron_mask = electron_mask
+                # fold trigger matching into the selection
+                trig_electron_mask = (
+                    electron_mask &
+                    self[electron_trigger_matching](events, trigger, leg_masks, **sel_kwargs)
                 )
-                muon_triggers_fire_list = []
-                for trigger_emu, trigger_fired_emu, leg_masks_emu in trigger_results.x.trigger_data:
-                    if trigger_emu.has_tag("single_mu"):
-                        muon_triggers_fire_list += [trigger_fired_emu]
-                muon_trigger_fired = reduce(
-                    or_,
-                    muon_triggers_fire_list,
+                # for muons, loop over triggers, find single triggers and make sure none of them
+                # fired in order to avoid double counting
+                emu_muon_mask = False
+                mu_trig_fired = ak.full_like(events.event, False, dtype=bool)
+                for _trigger, _trigger_fired, _ in trigger_results.x.trigger_data:
+                    if not _trigger.has_tag("single_mu"):
+                        continue
+                    # evaluate the muon selection once (it is the same for all single triggers)
+                    if emu_muon_mask is False:
+                        emu_muon_mask, _ = self[muon_selection](events, _trigger, **sel_kwargs)
+                    # store the trigger decision
+                    mu_trig_fired = mu_trig_fired | _trigger_fired
+                # muons obey the trigger rules if no single trigger fired
+                trig_muon_mask = emu_muon_mask & ~mu_trig_fired
+
+            else:
+                emu_muon_mask = muon_mask
+                # fold trigger matching into the selection
+                trig_muon_mask = (
+                    muon_mask &
+                    self[muon_trigger_matching](events, trigger, leg_masks, **sel_kwargs)
                 )
-                not_muon_in_e_trigger_fired = ~muon_trigger_fired
+                # for electrons, loop over triggers, find single triggers and check the matching
+                # only in case a trigger fired
+                emu_electron_mask = False
+                e_trig_fired = ak.full_like(events.event, False, dtype=bool)
+                e_match_mask = ak.full_like(events.Electron.pt, False, dtype=bool)
+                for _trigger, _trigger_fired, _leg_masks in trigger_results.x.trigger_data:
+                    if not _trigger.has_tag("single_e"):
+                        continue
+                    # evaluate the electron selection once (it is the same for all single triggers)
+                    if emu_electron_mask is False:
+                        emu_electron_mask, _ = self[electron_selection](events, _trigger, **sel_kwargs)
+                    # store the trigger decision
+                    e_trig_fired = e_trig_fired | _trigger_fired
+                    # evaluate the matching
+                    e_match_mask = e_match_mask | (
+                        self[electron_trigger_matching](events, _trigger, _leg_masks, **sel_kwargs) &
+                        _trigger_fired
+                    )
+                # for events in which no single e trigger fired, consider the matching as successful
+                e_match_mask = e_match_mask | ~e_trig_fired
+                trig_electron_mask = emu_electron_mask & e_match_mask
 
-            # general emu channel selection
             # expect 1 electron, 1 muon, 1 veto electron, 1 veto muon, and ignore taus
             is_emu = (
-                trigger_fired & not_muon_in_e_trigger_fired &
-                (ak.num(electron_indices, axis=1) == 1) &
-                (ak.num(electron_veto_indices, axis=1) == 1) &
-                (ak.num(muon_indices, axis=1) == 1) &
-                (ak.num(muon_veto_indices, axis=1) == 1) &
-                (ak.num(tau_indices, axis=1) >= 0)  # to remove?
+                trigger_fired &
+                (ak.sum(emu_electron_mask, axis=1) == 1) &
+                (ak.sum(trig_electron_mask, axis=1) == 1) &
+                (ak.sum(electron_veto_mask, axis=1) == 1) &
+                (ak.sum(emu_muon_mask, axis=1) == 1) &
+                (ak.sum(trig_muon_mask, axis=1) == 1) &
+                (ak.sum(muon_veto_mask, axis=1) == 1)
             )
 
-            # store necessary global variables
+            # determine the relative charge
+            e_charge = ak.firsts(events.Electron[trig_electron_mask].charge, axis=1)
+            mu_charge = ak.firsts(events.Muon[trig_muon_mask].charge, axis=1)
+            is_os = e_charge == -mu_charge
+            # store global variables
             channel_id = update_channel_ids(events, channel_id, ch_emu.id, is_emu)
-            sel_electron_indices = ak.where(is_emu, electron_indices, sel_electron_indices)
-            sel_muon_indices = ak.where(is_emu, muon_indices, sel_muon_indices)
+            leptons_os = ak.where(is_emu, is_os, leptons_os)
             single_triggered = ak.where(is_emu & is_single, True, single_triggered)
             cross_triggered = ak.where(is_emu & is_cross, True, cross_triggered)
-
-            # define fake iso regions for emu, as there is not necessarily a tau to be isolated
-            # should be always false, as no tau are used
-            is_iso = ak.sum(tau_iso_mask, axis=1) < 0
-            # determine the os/ss charge sign relation
-            mu_charge = ak.firsts(events.Muon[muon_indices].charge, axis=1)
-            e_charge = ak.firsts(events.Electron[electron_indices].charge, axis=1)
-            is_os = mu_charge == -e_charge
-            # store global variables
-            tau2_isolated = ak.where(is_emu, is_iso, tau2_isolated)
-            leptons_os = ak.where(is_emu, is_os, leptons_os)
-            # print("number events in emu channel", ak.sum(is_emu))
+            sel_electron_mask = ak.where(is_emu, trig_electron_mask, sel_electron_mask)
+            sel_muon_mask = ak.where(is_emu, trig_muon_mask, sel_muon_mask)
 
     # some final type conversions
     channel_id = ak.values_astype(channel_id, np.uint8)
     leptons_os = ak.fill_none(leptons_os, False)
-    sel_electron_indices = ak.values_astype(sel_electron_indices, np.int32)
-    sel_muon_indices = ak.values_astype(sel_muon_indices, np.int32)
-    sel_tau_indices = ak.values_astype(sel_tau_indices, np.int32)
 
     # save new columns
     events = set_ak_column(events, "channel_id", channel_id)
@@ -737,6 +787,11 @@ def lepton_selection(
     events = set_ak_column(events, "single_triggered", single_triggered)
     events = set_ak_column(events, "cross_triggered", cross_triggered)
 
+    # convert lepton masks to sorted indices (pt for e/mu, iso for tau)
+    sel_electron_indices = sorted_indices_from_mask(sel_electron_mask, events.Electron.pt, ascending=False)
+    sel_muon_indices = sorted_indices_from_mask(sel_muon_mask, events.Muon.pt, ascending=False)
+    sel_tau_indices = sorted_indices_from_mask(sel_tau_mask, tau_sorting_key, ascending=False)
+
     return events, SelectionResult(
         steps={
             "lepton": channel_id != 0,
@@ -769,3 +824,9 @@ def lepton_selection(
             "leading_taus": leading_taus,
         },
     )
+
+
+@lepton_selection.init
+def lepton_selection_init(self: Selector) -> None:
+    # add column to load the raw tau tagger score
+    self.uses.add(f"Tau.raw{self.config_inst.x.tau_tagger}VSjet")

From 73d231ca990af7d507dded2323eee7f2bb6cd920 Mon Sep 17 00:00:00 2001
From: "Marcel R." <github.riga@icloud.com>
Date: Wed, 8 Jan 2025 19:53:14 +0100
Subject: [PATCH 04/20] Improve trigger leg matching.

---
 hbt/selection/jet.py    |  9 ++--
 hbt/selection/lepton.py | 98 ++++++++++++++++++++++++++++-------------
 modules/columnflow      |  2 +-
 3 files changed, 74 insertions(+), 35 deletions(-)

diff --git a/hbt/selection/jet.py b/hbt/selection/jet.py
index a0afcca8..18f09161 100644
--- a/hbt/selection/jet.py
+++ b/hbt/selection/jet.py
@@ -11,6 +11,7 @@
 from columnflow.selection.cms.jets import jet_veto_map
 from columnflow.columnar_util import (
     EMPTY_FLOAT, set_ak_column, sorted_indices_from_mask, mask_from_indices, flat_np_view,
+    full_like,
 )
 from columnflow.util import maybe_import, InsertableDict
 
@@ -185,7 +186,7 @@ def jet_selection(
     # should be done and should therefore be ignored.
 
     # create a mask to select tautau events that were only triggered by a tau-tau-jet cross trigger
-    false_mask = ak.full_like(events.event, False, dtype=bool)
+    false_mask = full_like(events.event, False, dtype=bool)
     ttj_mask = (
         (events.channel_id == 3) &
         ~ak.any(reduce(or_, [(events.trigger_ids == tid) for tid in self.trigger_ids_ttc], false_mask), axis=1) &
@@ -195,7 +196,7 @@ def jet_selection(
     # only perform this special treatment when applicable
     if ak.any(ttj_mask):
         # check which jets can be matched to any of the jet legs
-        matching_mask = ak.full_like(events.Jet.pt[ttj_mask], False, dtype=bool)
+        matching_mask = full_like(events.Jet.pt[ttj_mask], False, dtype=bool)
         for trigger, _, leg_masks in trigger_results.x.trigger_data:
             if trigger.id in self.trigger_ids_ttjc:
                 trig_objs = events.TrigObj[leg_masks["jet"]]
@@ -246,7 +247,7 @@ def jet_selection(
 
         # insert back into the full hhbjet_mask
         flat_hhbjet_mask = flat_np_view(hhbjet_mask)
-        flat_jet_mask = ak.flatten(ak.full_like(events.Jet.pt, False, dtype=bool) | ttj_mask)
+        flat_jet_mask = ak.flatten(full_like(events.Jet.pt, False, dtype=bool) | ttj_mask)
         flat_hhbjet_mask[flat_jet_mask] = ak.flatten(sel_hhbjet_mask)
 
     # validate that either none or two hhbjets were identified
@@ -306,7 +307,7 @@ def jet_selection(
     # extra requirements for events for which only the tau tau vbf cross trigger fired
     cross_vbf_ids = [t.id for t in self.config_inst.x.triggers if t.has_tag("cross_tau_tau_vbf")]
     if not cross_vbf_ids:
-        cross_vbf_mask = ak.full_like(1 * events.event, False, dtype=bool)
+        cross_vbf_mask = full_like(1 * events.event, False, dtype=bool)
     else:
         cross_vbf_masks = [events.trigger_ids == tid for tid in cross_vbf_ids]
         # This combines "at least one cross trigger is fired" and "no other triggers are fired"
diff --git a/hbt/selection/lepton.py b/hbt/selection/lepton.py
index 6c04ec7b..899db374 100644
--- a/hbt/selection/lepton.py
+++ b/hbt/selection/lepton.py
@@ -12,7 +12,9 @@
 from functools import reduce
 
 from columnflow.selection import Selector, SelectionResult, selector
-from columnflow.columnar_util import set_ak_column, sorted_indices_from_mask
+from columnflow.columnar_util import (
+    set_ak_column, sorted_indices_from_mask, flat_np_view, full_like,
+)
 from columnflow.util import maybe_import
 
 from hbt.util import IF_NANO_V9, IF_NANO_GE_V10
@@ -28,21 +30,36 @@
 def trigger_object_matching(
     vectors1: ak.Array,
     vectors2: ak.Array,
+    /,
+    *,
     threshold: float = 0.5,
     axis: int = 2,
+    event_mask: ak.Array | type(Ellipsis) | None = None,
 ) -> ak.Array:
     """
     Helper to check per object in *vectors1* if there is at least one object in *vectors2* that
     leads to a delta R metric below *threshold*. The final reduction is applied over *axis* of the
-    resulting metric table containing the full combinatorics. When *return_all_matches* is *True*,
-    the matrix with all matching decisions is returned as well.
+    resulting metric table containing the full combinatorics. If an *event_mask* is given, the
+    the matching is performed only for those events, but a full object mask with the same shape as
+    that of *vectors1* is returned, which all objects set to *False* where not matching was done.
     """
+    # handle event masks
+    used_event_mask = event_mask is not None and event_mask is not Ellipsis
+    event_mask = Ellipsis if event_mask is None else event_mask
+
     # delta_r for all combinations
-    dr = vectors1.metric_table(vectors2)
+    dr = vectors1[event_mask].metric_table(vectors2[event_mask])
 
     # check per element in vectors1 if there is at least one matching element in vectors2
     any_match = ak.any(dr < threshold, axis=axis)
 
+    # expand to original shape if an event mask was given
+    if used_event_mask:
+        full_any_match = full_like(vectors1.pt, False, dtype=bool)
+        flat_full_any_match = flat_np_view(full_any_match)
+        flat_full_any_match[flat_np_view(full_any_match | event_mask)] = flat_np_view(any_match)
+        any_match = full_any_match
+
     return any_match
 
 
@@ -134,13 +151,13 @@ def electron_trigger_matching(
     self: Selector,
     events: ak.Array,
     trigger: Trigger,
+    trigger_fired: ak.Array,
     leg_masks: dict[str, ak.Array],
     **kwargs,
 ) -> tuple[ak.Array]:
     """
     Electron trigger matching.
     """
-    # TODO: only perform this for events where the trigger fired, needs more info and flat_views
     is_single = trigger.has_tag("single_e")
     is_cross = trigger.has_tag("cross_e_tau")
 
@@ -149,7 +166,11 @@ def electron_trigger_matching(
     assert trigger.n_legs == len(leg_masks) == (1 if is_single else 2)
     assert abs(trigger.legs["e"].pdg_id) == 11
 
-    return trigger_object_matching(events.Electron, events.TrigObj[leg_masks["e"]])
+    return trigger_object_matching(
+        events.Electron,
+        events.TrigObj[leg_masks["e"]],
+        event_mask=trigger_fired,
+    )
 
 
 @selector(
@@ -211,13 +232,13 @@ def muon_trigger_matching(
     self: Selector,
     events: ak.Array,
     trigger: Trigger,
+    trigger_fired: ak.Array,
     leg_masks: dict[str, ak.Array],
     **kwargs,
 ) -> tuple[ak.Array]:
     """
     Muon trigger matching.
     """
-    # TODO: only perform this for events where the trigger fired, needs more info and flat_views
     is_single = trigger.has_tag("single_mu")
     is_cross = trigger.has_tag("cross_mu_tau")
 
@@ -226,7 +247,11 @@ def muon_trigger_matching(
     assert trigger.n_legs == len(leg_masks) == (1 if is_single else 2)
     assert abs(trigger.legs["mu"].pdg_id) == 13
 
-    return trigger_object_matching(events.Muon, events.TrigObj[leg_masks["mu"]])
+    return trigger_object_matching(
+        events.Muon,
+        events.TrigObj[leg_masks["mu"]],
+        event_mask=trigger_fired,
+    )
 
 
 @selector(
@@ -253,7 +278,7 @@ def tau_selection(
     # return empty mask if no tagged taus exists in the chunk
     if ak.all(ak.num(events.Tau) == 0):
         logger.info("no taus found in event chunk")
-        false_mask = ak.full_like(events.Tau.pt, False, dtype=bool)
+        false_mask = full_like(events.Tau.pt, False, dtype=bool)
         return false_mask, false_mask
 
     is_single_e = trigger.has_tag("single_e")
@@ -317,16 +342,16 @@ def tau_trigger_matching(
     self: Selector,
     events: ak.Array,
     trigger: Trigger,
+    trigger_fired: ak.Array,
     leg_masks: dict[str, ak.Array],
     **kwargs,
 ) -> tuple[ak.Array]:
     """
     Tau trigger matching.
     """
-    # TODO: only perform this for events where the trigger fired, needs more info and flat_views
     if ak.all(ak.num(events.Tau) == 0):
         logger.info("no taus found in event chunk")
-        return ak.full_like(events.Tau.pt, False, dtype=bool)
+        return full_like(events.Tau.pt, False, dtype=bool)
 
     is_cross_e = trigger.has_tag("cross_e_tau")
     is_cross_mu = trigger.has_tag("cross_mu_tau")
@@ -342,7 +367,11 @@ def tau_trigger_matching(
         assert trigger.n_legs == len(leg_masks) == 2
         assert abs(trigger.legs["tau"].pdg_id) == 15
         # match leg 1
-        return trigger_object_matching(events.Tau, events.TrigObj[leg_masks["tau"]])
+        return trigger_object_matching(
+            events.Tau,
+            events.TrigObj[leg_masks["tau"]],
+            event_mask=trigger_fired,
+        )
 
     # is_any_cross_tau
     # catch config errors
@@ -351,8 +380,17 @@ def tau_trigger_matching(
     assert abs(trigger.legs["tau2"].pdg_id) == 15
 
     # match both legs
-    matches_leg0 = trigger_object_matching(events.Tau, events.TrigObj[leg_masks["tau1"]])
-    matches_leg1 = trigger_object_matching(events.Tau, events.TrigObj[leg_masks["tau2"]])
+    matches_leg0 = trigger_object_matching(
+        events.Tau,
+        events.TrigObj[leg_masks["tau1"]],
+        event_mask=trigger_fired,
+    )
+    matches_leg1 = trigger_object_matching(
+        events.Tau,
+        events.TrigObj[leg_masks["tau2"]],
+        event_mask=trigger_fired,
+    )
+
     # taus need to be matched to at least one leg, but as a side condition
     # each leg has to have at least one match to a tau
     matches = (
@@ -417,9 +455,9 @@ def lepton_selection(
     leptons_os = false_mask
     single_triggered = false_mask
     cross_triggered = false_mask
-    sel_electron_mask = ak.full_like(events.Electron.pt, False, dtype=bool)
-    sel_muon_mask = ak.full_like(events.Muon.pt, False, dtype=bool)
-    sel_tau_mask = ak.full_like(events.Tau.pt, False, dtype=bool)
+    sel_electron_mask = full_like(events.Electron.pt, False, dtype=bool)
+    sel_muon_mask = full_like(events.Muon.pt, False, dtype=bool)
+    sel_tau_mask = full_like(events.Tau.pt, False, dtype=bool)
     leading_taus = events.Tau[:, :0]
 
     # indices for sorting taus first by isolation, then by pt
@@ -465,13 +503,13 @@ def lepton_selection(
             # fold trigger matching into the selection
             trig_electron_mask = (
                 electron_mask &
-                self[electron_trigger_matching](events, trigger, leg_masks, **sel_kwargs)
+                self[electron_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs)
             )
             trig_tau_mask = tau_mask
             if trigger.has_tag("cross_e_tau"):
                 trig_tau_mask = (
                     trig_tau_mask &
-                    self[tau_trigger_matching](events, trigger, leg_masks, **sel_kwargs)
+                    self[tau_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs)
                 )
 
             # check if the most isolated tau among the selected ones is matched
@@ -517,13 +555,13 @@ def lepton_selection(
             # fold trigger matching into the selection
             trig_muon_mask = (
                 muon_mask &
-                self[muon_trigger_matching](events, trigger, leg_masks, **sel_kwargs)
+                self[muon_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs)
             )
             trig_tau_mask = tau_mask
             if trigger.has_tag("cross_e_tau"):
                 trig_tau_mask = (
                     trig_tau_mask &
-                    self[tau_trigger_matching](events, trigger, leg_masks, **sel_kwargs)
+                    self[tau_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs)
                 )
 
             # check if the most isolated tau among the selected ones is matched
@@ -569,7 +607,7 @@ def lepton_selection(
             # fold trigger matching into the selection
             trig_tau_mask = (
                 tau_mask &
-                self[tau_trigger_matching](events, trigger, leg_masks, **sel_kwargs)
+                self[tau_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs)
             )
 
             # check if the two leading (most isolated) taus are matched
@@ -618,7 +656,7 @@ def lepton_selection(
             # fold trigger matching into the selection
             trig_electron_mask = (
                 electron_mask &
-                self[electron_trigger_matching](events, trigger, leg_masks, **sel_kwargs)
+                self[electron_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs)
             )
 
             # check if the first (hardest) electron matched
@@ -659,7 +697,7 @@ def lepton_selection(
             # fold trigger matching into the selection
             trig_muon_mask = (
                 muon_mask &
-                self[muon_trigger_matching](events, trigger, leg_masks, **sel_kwargs)
+                self[muon_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs)
             )
 
             # check if the first (hardest) muon matched
@@ -707,12 +745,12 @@ def lepton_selection(
                 # fold trigger matching into the selection
                 trig_electron_mask = (
                     electron_mask &
-                    self[electron_trigger_matching](events, trigger, leg_masks, **sel_kwargs)
+                    self[electron_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs)
                 )
                 # for muons, loop over triggers, find single triggers and make sure none of them
                 # fired in order to avoid double counting
                 emu_muon_mask = False
-                mu_trig_fired = ak.full_like(events.event, False, dtype=bool)
+                mu_trig_fired = full_like(events.event, False, dtype=bool)
                 for _trigger, _trigger_fired, _ in trigger_results.x.trigger_data:
                     if not _trigger.has_tag("single_mu"):
                         continue
@@ -729,13 +767,13 @@ def lepton_selection(
                 # fold trigger matching into the selection
                 trig_muon_mask = (
                     muon_mask &
-                    self[muon_trigger_matching](events, trigger, leg_masks, **sel_kwargs)
+                    self[muon_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs)
                 )
                 # for electrons, loop over triggers, find single triggers and check the matching
                 # only in case a trigger fired
                 emu_electron_mask = False
-                e_trig_fired = ak.full_like(events.event, False, dtype=bool)
-                e_match_mask = ak.full_like(events.Electron.pt, False, dtype=bool)
+                e_trig_fired = full_like(events.event, False, dtype=bool)
+                e_match_mask = full_like(events.Electron.pt, False, dtype=bool)
                 for _trigger, _trigger_fired, _leg_masks in trigger_results.x.trigger_data:
                     if not _trigger.has_tag("single_e"):
                         continue
@@ -746,7 +784,7 @@ def lepton_selection(
                     e_trig_fired = e_trig_fired | _trigger_fired
                     # evaluate the matching
                     e_match_mask = e_match_mask | (
-                        self[electron_trigger_matching](events, _trigger, _leg_masks, **sel_kwargs) &
+                        self[electron_trigger_matching](events, _trigger, _trigger_fired, _leg_masks, **sel_kwargs) &
                         _trigger_fired
                     )
                 # for events in which no single e trigger fired, consider the matching as successful
diff --git a/modules/columnflow b/modules/columnflow
index c58104f5..154e7b0b 160000
--- a/modules/columnflow
+++ b/modules/columnflow
@@ -1 +1 @@
-Subproject commit c58104f593588a6c5393b9ed19c0604f431a7078
+Subproject commit 154e7b0bbf0ff2a1e2c3263923cf42c5f3e48da1

From 02c15879657e2dd743304357dfabb5063aece098 Mon Sep 17 00:00:00 2001
From: "Marcel R." <github.riga@icloud.com>
Date: Thu, 9 Jan 2025 08:46:00 +0100
Subject: [PATCH 05/20] Update cf.

---
 modules/columnflow | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/columnflow b/modules/columnflow
index 154e7b0b..701b8b36 160000
--- a/modules/columnflow
+++ b/modules/columnflow
@@ -1 +1 @@
-Subproject commit 154e7b0bbf0ff2a1e2c3263923cf42c5f3e48da1
+Subproject commit 701b8b36b8b962c16c643397a7388a399dbebe13

From cd179d148069ea81df946aa4a6370dbc974f857b Mon Sep 17 00:00:00 2001
From: "Marcel R." <github.riga@icloud.com>
Date: Thu, 9 Jan 2025 11:20:07 +0100
Subject: [PATCH 06/20] Fix sync_mode.

---
 hbt/config/configs_hbt.py | 14 +++++++-------
 modules/cmsdb             |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/hbt/config/configs_hbt.py b/hbt/config/configs_hbt.py
index fadd094b..06581b14 100644
--- a/hbt/config/configs_hbt.py
+++ b/hbt/config/configs_hbt.py
@@ -330,13 +330,12 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
         *if_era(year=2023, tag="postBPix", values=[
             f"data_{stream}_d{v}" for stream in ["mu", "e", "tau", "met"] for v in "12"
         ]),
-
-        # sync
-        *if_era(year=[2022, 2023], sync=True, values=[
-            "hh_ggf_hbb_htt_kl1_kt1_powheg",
-        ]),
     ]
     for dataset_name in dataset_names:
+        # skip when in sync mode and not exiting
+        if sync_mode and not campaign.has_dataset(dataset_name):
+            continue
+
         # add the dataset
         dataset = cfg.add_dataset(campaign.get_dataset(dataset_name))
 
@@ -353,7 +352,7 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
             dataset.add_tag({"has_top", "single_top", "st"})
         if dataset.name.startswith("dy_"):
             dataset.add_tag("dy")
-        if re.match(r"^(ww|wz|zz)_.*pythia$", dataset.name):
+        if re.match(r"^(ww|wz|zz)_.*_pythia$", dataset.name):
             dataset.add_tag("no_lhe_weights")
         if dataset_name.startswith("hh_"):
             dataset.add_tag("signal")
@@ -388,7 +387,8 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
                 info.n_files = 1
 
     # verify that the root process of each dataset is part of any of the registered processes
-    verify_config_processes(cfg, warn=True)
+    if not sync_mode:
+        verify_config_processes(cfg, warn=True)
 
     ################################################################################################
     # task defaults and groups
diff --git a/modules/cmsdb b/modules/cmsdb
index 54953603..ccac9ae6 160000
--- a/modules/cmsdb
+++ b/modules/cmsdb
@@ -1 +1 @@
-Subproject commit 549536034fdd0b20efbddc1adf4403a3713161ab
+Subproject commit ccac9ae6e3d7f761e3dcb8bc7109712d797c33f6

From b52f7acbd6ad58ac84c4e127d8c241897985b4d2 Mon Sep 17 00:00:00 2001
From: "Marcel R." <github.riga@icloud.com>
Date: Thu, 9 Jan 2025 16:52:45 +0100
Subject: [PATCH 07/20] Update cf.

---
 hbt/selection/jet.py | 85 --------------------------------------------
 modules/columnflow   |  2 +-
 2 files changed, 1 insertion(+), 86 deletions(-)

diff --git a/hbt/selection/jet.py b/hbt/selection/jet.py
index 18f09161..9478ff4e 100644
--- a/hbt/selection/jet.py
+++ b/hbt/selection/jet.py
@@ -86,91 +86,6 @@ def jet_selection(
     # hhb-jet identification
     #
 
-    # # get the hhbtag values per jet per event
-    # hhbtag_scores = self[hhbtag](events, default_mask, lepton_results.x.lepton_pair, **kwargs)
-
-    # # get the score indices of each hhbtag value
-    # score_indices = ak.argsort(hhbtag_scores, axis=1, ascending=False)
-
-    # # only consider tautau events for which the tau_tau_jet trigger fired and no other tau tau trigger
-    # trigger_mask = (
-    #     (events.channel_id == 3) &
-    #     ~ak.any((events.trigger_ids == 505) | (events.trigger_ids == 603), axis=1) &
-    #     ak.any(events.trigger_ids == 701, axis=1)
-    # )
-
-    # # score (and local index) of the hhbtag for each jet coming from events which passed the trigger mask
-    # sel_score_indices = score_indices[trigger_mask]
-    # sel_score_indices_li = ak.local_index(sel_score_indices, axis=1)
-
-    # # ids of the objects which fired the jet leg of the cross_tau_tau_jet trigger
-    # for trigger, trigger_fired, leg_masks in trigger_results.x.trigger_data:
-    #     if trigger.has_tag("cross_tau_tau_jet"):
-    #         # Zip the trigger legs with their corresponding masks
-    #         for leg, mask in zip(trigger.legs, leg_masks):
-    #             if leg.pdg_id == 1:
-    #                 obj_ids = mask[trigger_mask]
-
-    # # trigger objects corresponding to the above ids
-    # sel_trig_objs = events.TrigObj[trigger_mask][obj_ids]
-
-    # # mask that checks wheather or not the selected hhbjets *matches* the trigger object with delta R < 0.5
-    # matching_mask = trigger_object_matching(events[trigger_mask].Jet[sel_score_indices], sel_trig_objs)
-
-    # # index of the highest scored hhbjet *matching* the trigger object
-    # sel_hhbjet_idx = ak.argmax(matching_mask, axis=1)
-
-    # # create mask to remove all jets except the highest scored hhbjet *matching* the trigger object
-    # unselected_hhbjet_idx = sel_score_indices_li != sel_hhbjet_idx
-
-    # # hhbtag score of the highest scored hhbjet *matching* the trigger object
-    # first_hhbjet_idx = sel_score_indices[~unselected_hhbjet_idx]
-
-    # # select the highest scored hhbjet of the remaining *matched and unmatched* hhbjets
-    # remaining_hhbjets_score_indices = sel_score_indices[unselected_hhbjet_idx]
-    # second_hhbjet_idx = ak.singletons((ak.firsts(remaining_hhbjets_score_indices)))
-
-    # # concatenate both selected hhbjet indices; 1st index is *matched*; 2nd can be *matched* or *unmatched*
-    # sel_hhbjets_score_indices = ak.concatenate([first_hhbjet_idx, second_hhbjet_idx], axis=1)
-    # # sort the selected score indices (now the *matched* hhbjet can be in either position)
-    # sorted_sel_hhbjets_score_indices = ak.sort(sel_hhbjets_score_indices, axis=1)
-
-    # # when only 1 hhbjet was found fill the second index with None
-    # sel_hhbjets_score_indices = ak.pad_none(sel_hhbjets_score_indices, 2, axis=1)
-    # sorted_sel_hhbjets_score_indices = ak.pad_none(sorted_sel_hhbjets_score_indices, 2, axis=1)
-
-    # # all event indices
-    # event_idx = ak.local_index(score_indices, axis=0)
-    # # indices of events passing the trigger mask
-    # sel_event_idx = event_idx[trigger_mask]
-
-    # # store the selected hhbjet score indices in the corresponding event indices
-    # temp_mask = (event_idx[:, None] == sel_event_idx)
-    # pos_mask = ak.any(temp_mask, axis=1)
-    # match_index = ak.argmax(temp_mask, axis=1)
-
-    # # initializing a None array
-    # none = ak.Array([[None, None]])
-    # none_expanded = ak.broadcast_arrays(none, event_idx)[0]
-
-    # # save selected hhbjet scores for the selected events and save [None, None] to the remaining events
-    # padded_hhbjet_indices = ak.where(pos_mask, sel_hhbjets_score_indices[match_index], none_expanded)
-    # hhbjet_mask = ((li == padded_hhbjet_indices[..., [0]]) | (li == padded_hhbjet_indices[..., [1]]))
-    # # --------------------------------------------------------------------------------------------
-
-    # # get indices for actual book keeping only for events with both lepton candidates and where at
-    # # least two jets pass the default mask (bjet candidates)
-    # valid_score_mask = (
-    #     default_mask &
-    #     (ak.sum(default_mask, axis=1) >= 2) &
-    #     (ak.num(lepton_results.x.lepton_pair, axis=1) == 2)
-    # )
-    # hhbjet_indices = score_indices[valid_score_mask[score_indices]][..., :2]
-
-    #
-    # compressed version, to be checked
-    #
-
     # get the hhbtag values per jet per event
     hhbtag_scores = self[hhbtag](events, default_mask, lepton_results.x.lepton_pair, **kwargs)
 
diff --git a/modules/columnflow b/modules/columnflow
index 701b8b36..52207354 160000
--- a/modules/columnflow
+++ b/modules/columnflow
@@ -1 +1 @@
-Subproject commit 701b8b36b8b962c16c643397a7388a399dbebe13
+Subproject commit 522073541c68d16938daa1bd1b137664dfc6d332

From 9adc9368d7ca02fe3ed84d7f3b3cbbb326b0d2ac Mon Sep 17 00:00:00 2001
From: "Marcel R." <github.riga@icloud.com>
Date: Thu, 9 Jan 2025 17:02:34 +0100
Subject: [PATCH 08/20] Fix name in sync.

---
 hbt/tasks/sync.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hbt/tasks/sync.py b/hbt/tasks/sync.py
index 26902842..dfe1b6c4 100644
--- a/hbt/tasks/sync.py
+++ b/hbt/tasks/sync.py
@@ -289,7 +289,7 @@ def select_leptons(events: ak.Array, common_fields: dict[str, int | float]) -> a
                 "run": events.run,
                 "lumi": events.luminosityBlock,
                 # high-level events variables
-                "channel": events.channel_id,
+                "channel_id": events.channel_id,
                 "os": events.leptons_os * 1,
                 "iso": events.tau2_isolated * 1,
                 "deterministic_seed": to_str(events.deterministic_seed),

From a5913c9869919c1bb0b9030799685ffd2898872e Mon Sep 17 00:00:00 2001
From: "Marcel R." <github.riga@icloud.com>
Date: Thu, 9 Jan 2025 17:57:38 +0100
Subject: [PATCH 09/20] Make deeptau cuts vs e and mu channel-dependent.

---
 hbt/selection/lepton.py | 41 ++++++++++++++++++++++++++++++++---------
 1 file changed, 32 insertions(+), 9 deletions(-)

diff --git a/hbt/selection/lepton.py b/hbt/selection/lepton.py
index 899db374..485e35c7 100644
--- a/hbt/selection/lepton.py
+++ b/hbt/selection/lepton.py
@@ -316,9 +316,8 @@ def tau_selection(
         (events.Tau.pt > min_pt) &
         (abs(events.Tau.dz) < 0.2) &
         reduce(or_, [events.Tau.decayMode == mode for mode in (0, 1, 10, 11)]) &
-        (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vloose) &
-        (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.tight) &
         (events.Tau[get_tau_tagger("jet")] >= wp_config.tau_vs_jet.vvvloose)
+        # vs e and mu cuts are channel dependent and thus applied in the overall lepton selection
     )
 
     # remove taus with too close spatial separation to previously selected leptons
@@ -440,6 +439,9 @@ def lepton_selection(
     """
     Combined lepton selection.
     """
+    wp_config = self.config_inst.x.tau_id_working_points
+    get_tau_tagger = lambda tag: f"id{self.config_inst.x.tau_tagger}VS{tag}"
+
     # get channels from the config
     ch_etau = self.config_inst.get_channel("etau")
     ch_mutau = self.config_inst.get_channel("mutau")
@@ -500,12 +502,19 @@ def lepton_selection(
             self.dataset_inst.is_mc or
             self.dataset_inst.has_tag("etau")
         ):
+            # channel dependent deeptau cuts vs e and mu
+            ch_tau_mask = (
+                tau_mask &
+                (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vloose) &
+                (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.tight)
+            )
+
             # fold trigger matching into the selection
             trig_electron_mask = (
                 electron_mask &
                 self[electron_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs)
             )
-            trig_tau_mask = tau_mask
+            trig_tau_mask = ch_tau_mask
             if trigger.has_tag("cross_e_tau"):
                 trig_tau_mask = (
                     trig_tau_mask &
@@ -514,7 +523,7 @@ def lepton_selection(
 
             # check if the most isolated tau among the selected ones is matched
             first_tau_matched = ak.fill_none(
-                ak.firsts(trig_tau_mask[tau_sorting_indices[tau_mask[tau_sorting_indices]]], axis=1),
+                ak.firsts(trig_tau_mask[tau_sorting_indices[ch_tau_mask[tau_sorting_indices]]], axis=1),
                 False,
             )
 
@@ -552,12 +561,19 @@ def lepton_selection(
             self.dataset_inst.is_mc or
             self.dataset_inst.has_tag("mutau")
         ):
+            # channel dependent deeptau cuts vs e and mu
+            ch_tau_mask = (
+                tau_mask &
+                (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vvloose) &
+                (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.tight)
+            )
+
             # fold trigger matching into the selection
             trig_muon_mask = (
                 muon_mask &
                 self[muon_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs)
             )
-            trig_tau_mask = tau_mask
+            trig_tau_mask = ch_tau_mask
             if trigger.has_tag("cross_e_tau"):
                 trig_tau_mask = (
                     trig_tau_mask &
@@ -566,7 +582,7 @@ def lepton_selection(
 
             # check if the most isolated tau among the selected ones is matched
             first_tau_matched = ak.fill_none(
-                ak.firsts(trig_tau_mask[tau_sorting_indices[tau_mask[tau_sorting_indices]]], axis=1),
+                ak.firsts(trig_tau_mask[tau_sorting_indices[ch_tau_mask[tau_sorting_indices]]], axis=1),
                 False,
             )
 
@@ -604,16 +620,23 @@ def lepton_selection(
             trigger.has_tag({"cross_tau_tau", "cross_tau_tau_vbf", "cross_tau_tau_jet"}) and
             (self.dataset_inst.is_mc or self.dataset_inst.has_tag("tautau"))
         ):
+            # channel dependent deeptau cuts vs e and mu
+            ch_tau_mask = (
+                tau_mask &
+                (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vvloose) &
+                (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.vloose)
+            )
+
             # fold trigger matching into the selection
             trig_tau_mask = (
-                tau_mask &
+                ch_tau_mask &
                 self[tau_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs)
             )
 
             # check if the two leading (most isolated) taus are matched
             leading_taus_matched = ak.fill_none(
-                ak.firsts(trig_tau_mask[tau_sorting_indices[tau_mask[tau_sorting_indices]]], axis=1) &
-                ak.firsts(trig_tau_mask[tau_sorting_indices[tau_mask[tau_sorting_indices]]][:, 1:], axis=1),
+                ak.firsts(trig_tau_mask[tau_sorting_indices[ch_tau_mask[tau_sorting_indices]]], axis=1) &
+                ak.firsts(trig_tau_mask[tau_sorting_indices[ch_tau_mask[tau_sorting_indices]]][:, 1:], axis=1),
                 False,
             )
 

From a7fb9bcdde915b57cc71cf9047a10ec5beb34064 Mon Sep 17 00:00:00 2001
From: "Marcel R." <github.riga@icloud.com>
Date: Thu, 9 Jan 2025 18:06:48 +0100
Subject: [PATCH 10/20] Move jet_veto_map selection from jet to overall
 selection.

---
 hbt/selection/default.py | 14 ++++++++++----
 hbt/selection/jet.py     | 10 ++--------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/hbt/selection/default.py b/hbt/selection/default.py
index 8cd35719..b0957136 100644
--- a/hbt/selection/default.py
+++ b/hbt/selection/default.py
@@ -14,6 +14,7 @@
 from columnflow.selection.stats import increment_stats
 from columnflow.selection.cms.json_filter import json_filter
 from columnflow.selection.cms.met_filters import met_filters
+from columnflow.selection.cms.jets import jet_veto_map
 from columnflow.production.processes import process_ids
 from columnflow.production.cms.mc_weight import mc_weight
 from columnflow.production.cms.pileup import pu_weight
@@ -53,10 +54,10 @@ def get_met_filters(self: Selector) -> Iterable[str]:
 
 @selector(
     uses={
-        json_filter, hbt_met_filters, trigger_selection, lepton_selection, jet_selection, mc_weight,
-        pu_weight, btag_weights_deepjet, IF_RUN_3(btag_weights_pnet), process_ids, cutflow_features,
-        increment_stats, attach_coffea_behavior, patch_ecalBadCalibFilter,
-        IF_DATASET_HAS_LHE_WEIGHTS(pdf_weights, murmuf_weights),
+        json_filter, hbt_met_filters, IF_RUN_3(jet_veto_map), trigger_selection, lepton_selection,
+        jet_selection, mc_weight, pu_weight, btag_weights_deepjet, IF_RUN_3(btag_weights_pnet),
+        process_ids, cutflow_features, increment_stats, attach_coffea_behavior,
+        patch_ecalBadCalibFilter, IF_DATASET_HAS_LHE_WEIGHTS(pdf_weights, murmuf_weights),
     },
     produces={
         trigger_selection, lepton_selection, jet_selection, mc_weight, pu_weight,
@@ -96,6 +97,11 @@ def default(
         )
     results += met_filter_results
 
+    # jet veto map
+    if self.has_dep(jet_veto_map):
+        events, veto_result = self[jet_veto_map](events, **kwargs)
+        results += veto_result
+
     # trigger selection
     events, trigger_results = self[trigger_selection](events, **kwargs)
     results += trigger_results
diff --git a/hbt/selection/jet.py b/hbt/selection/jet.py
index 9478ff4e..5d91c159 100644
--- a/hbt/selection/jet.py
+++ b/hbt/selection/jet.py
@@ -8,14 +8,13 @@
 from functools import reduce
 
 from columnflow.selection import Selector, SelectionResult, selector
-from columnflow.selection.cms.jets import jet_veto_map
 from columnflow.columnar_util import (
     EMPTY_FLOAT, set_ak_column, sorted_indices_from_mask, mask_from_indices, flat_np_view,
     full_like,
 )
 from columnflow.util import maybe_import, InsertableDict
 
-from hbt.util import IF_RUN_2, IF_RUN_3
+from hbt.util import IF_RUN_2
 from hbt.production.hhbtag import hhbtag
 from hbt.selection.lepton import trigger_object_matching
 
@@ -25,7 +24,7 @@
 
 @selector(
     uses={
-        hhbtag, IF_RUN_3(jet_veto_map),
+        hhbtag,
         "trigger_ids", "TrigObj.{pt,eta,phi}",
         "Jet.{pt,eta,phi,mass,jetId}", IF_RUN_2("Jet.puId"),
         "FatJet.{pt,eta,phi,mass,msoftdrop,jetId,subJetIdx1,subJetIdx2}",
@@ -314,11 +313,6 @@ def jet_selection(
         },
     )
 
-    # additional jet veto map, vetoing entire events
-    if self.has_dep(jet_veto_map):
-        events, veto_result = self[jet_veto_map](events, **kwargs)
-        result += veto_result
-
     return events, result
 
 

From 9f7254138b8d66cffa4dc355ea2216d2e874e09d Mon Sep 17 00:00:00 2001
From: "Marcel R." <github.riga@icloud.com>
Date: Thu, 9 Jan 2025 18:07:35 +0100
Subject: [PATCH 11/20] Update cmsdb ref.

---
 modules/cmsdb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/cmsdb b/modules/cmsdb
index ccac9ae6..f2b84f7c 160000
--- a/modules/cmsdb
+++ b/modules/cmsdb
@@ -1 +1 @@
-Subproject commit ccac9ae6e3d7f761e3dcb8bc7109712d797c33f6
+Subproject commit f2b84f7cceaea02a3bc5270436369da6562ecd68

From b2a23e8079fdec8c1b85742aa7ec13306bfdc99c Mon Sep 17 00:00:00 2001
From: "Marcel R." <github.riga@icloud.com>
Date: Fri, 10 Jan 2025 08:31:43 +0100
Subject: [PATCH 12/20] Clean variables and categories.

---
 hbt/config/categories.py  | 16 +++++++++++++---
 hbt/config/variables.py   | 12 +++++-------
 hbt/production/weights.py |  6 ++++++
 3 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/hbt/config/categories.py b/hbt/config/categories.py
index b86371f0..7f69a7bf 100644
--- a/hbt/config/categories.py
+++ b/hbt/config/categories.py
@@ -71,7 +71,12 @@ def kwargs_fn(categories, add_qcd_group=True):
         "sign": [config.get_category("os"), config.get_category("ss")],
         "tau2": [config.get_category("iso"), config.get_category("noniso")],
     }
-    create_category_combinations(config, main_categories, name_fn, kwargs_fn)
+    create_category_combinations(
+        config=config,
+        categories=main_categories,
+        name_fn=name_fn,
+        kwargs_fn=functools.partial(kwargs_fn, add_qcd_group=True),
+    )
 
     # control categories
     control_categories = {
@@ -82,6 +87,11 @@ def kwargs_fn(categories, add_qcd_group=True):
         # kinematic regions in the middle (to be extended)
         "kin": [config.get_category("incl"), config.get_category("2j")],
         # relative sign last
-        "sign": [config.get_category("os"), config.get_category("ss")],
+        "sign": [config.get_category("os")],
     }
-    create_category_combinations(config, control_categories, name_fn, functools.partial(kwargs_fn, add_qcd_group=False))
+    create_category_combinations(
+        config=config,
+        categories=control_categories,
+        name_fn=name_fn,
+        kwargs_fn=functools.partial(kwargs_fn, add_qcd_group=False),
+    )
diff --git a/hbt/config/variables.py b/hbt/config/variables.py
index 58da3b01..d3ec4994 100644
--- a/hbt/config/variables.py
+++ b/hbt/config/variables.py
@@ -87,26 +87,24 @@ def add_variables(config: od.Config) -> None:
     )
     config.add_variable(
         name="met_phi",
-        expression="MET.phi",
+        expression="PuppiMET.phi",
         null_value=EMPTY_FLOAT,
         binning=(33, -3.3, 3.3),
         x_title=r"MET $\phi$",
     )
-
     config.add_variable(
-        name="e_pt",
-        expression="Electron.pt",
+        name="e1_pt",
+        expression="Electron.pt[:, 0]",
         null_value=EMPTY_FLOAT,
         binning=(40, 0, 400),
-        x_title=r"Electron p$_{T}$",
+        x_title=r"Leading electron p$_{T}$",
     )
-
     config.add_variable(
         name="mu1_pt",
         expression="Muon.pt[:,0]",
         null_value=EMPTY_FLOAT,
         binning=(40, 0, 400),
-        x_title=r"Muon 1 p$_{T}$",
+        x_title=r"Leading muon p$_{T}$",
     )
 
     # weights
diff --git a/hbt/production/weights.py b/hbt/production/weights.py
index f4777e74..716cbc96 100644
--- a/hbt/production/weights.py
+++ b/hbt/production/weights.py
@@ -6,6 +6,7 @@
 
 from columnflow.production import Producer, producer
 from columnflow.production.cms.pileup import pu_weight
+from columnflow.production.cms.pdf import pdf_weights
 from columnflow.util import maybe_import, safe_div, InsertableDict
 from columnflow.columnar_util import set_ak_column
 
@@ -153,6 +154,11 @@ def normalized_pdf_weight_setup(
     }
 
 
+# variation of the pdf weights producer that does not store up and down shifted weights
+# but that stores all available pdf weights for the full treatment based on histograms
+all_pdf_weights = pdf_weights.derive("all_pdf_weights", cls_dict={"store_all_weights": True})
+
+
 @producer(
     uses={"murmuf_weight{,_up,_down}"},
     produces={"normalized_murmuf_weight{,_up,_down}"},

From 3895d976662dad85441c39411e319875a1fd7fdc Mon Sep 17 00:00:00 2001
From: "Marcel R." <github.riga@icloud.com>
Date: Mon, 13 Jan 2025 09:28:16 +0100
Subject: [PATCH 13/20] Simplify trigger bits, adapt to cf.

---
 hbt/config/configs_hbt.py |  42 +++++++++----
 hbt/config/triggers.py    | 120 +++++++++++++++++---------------------
 hbt/config/util.py        |  44 +++++++++++++-
 hbt/production/btag.py    |  13 +++--
 hbt/production/weights.py |  24 +++-----
 hbt/util.py               |   2 +-
 law.cfg                   |   4 +-
 modules/cmsdb             |   2 +-
 modules/columnflow        |   2 +-
 9 files changed, 148 insertions(+), 105 deletions(-)

diff --git a/hbt/config/configs_hbt.py b/hbt/config/configs_hbt.py
index 06581b14..73b70e73 100644
--- a/hbt/config/configs_hbt.py
+++ b/hbt/config/configs_hbt.py
@@ -7,7 +7,6 @@
 from __future__ import annotations
 
 import os
-import re
 import itertools
 import functools
 
@@ -167,6 +166,8 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
             proc.add_tag({"ttbar", "tt"})
         if process_name.startswith("dy_"):
             proc.add_tag("dy")
+        if process_name.startswith("w_lnu_"):
+            proc.add_tag("w_lnu")
 
         # add the process
         cfg.add_process(proc)
@@ -293,10 +294,10 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
         "ww_pythia",
 
         # vvv
-        "zzz_amcatnlo",
-        "wzz_amcatnlo",
-        "wwz_4f_amcatnlo",
         "www_4f_amcatnlo",
+        "wwz_4f_amcatnlo",
+        "wzz_amcatnlo",
+        "zzz_amcatnlo",
 
         # single H
         "h_ggf_htt_powheg",
@@ -319,16 +320,16 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
 
         # data
         *if_era(year=2022, tag="preEE", values=[
-            f"data_{stream}_{period}" for stream in ["mu", "e", "tau", "met"] for period in "cd"
+            f"data_{stream}_{period}" for stream in ["mu", "e", "tau"] for period in "cd"
         ]),
         *if_era(year=2022, tag="postEE", values=[
-            f"data_{stream}_{period}" for stream in ["mu", "e", "tau", "met"] for period in "efg"
+            f"data_{stream}_{period}" for stream in ["mu", "e", "tau"] for period in "efg"
         ]),
         *if_era(year=2023, tag="preBPix", values=[
-            f"data_{stream}_c{v}" for stream in ["mu", "e", "tau", "met"] for v in "1234"
+            f"data_{stream}_c{v}" for stream in ["mu", "e", "tau"] for v in "1234"
         ]),
         *if_era(year=2023, tag="postBPix", values=[
-            f"data_{stream}_d{v}" for stream in ["mu", "e", "tau", "met"] for v in "12"
+            f"data_{stream}_d{v}" for stream in ["mu", "e", "tau"] for v in "12"
         ]),
     ]
     for dataset_name in dataset_names:
@@ -352,8 +353,19 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
             dataset.add_tag({"has_top", "single_top", "st"})
         if dataset.name.startswith("dy_"):
             dataset.add_tag("dy")
-        if re.match(r"^(ww|wz|zz)_.*_pythia$", dataset.name):
+        if dataset.name.startswith("w_lnu_"):
+            dataset.add_tag("w_lnu")
+        # datasets that are known to have no lhe info at all
+        if law.util.multi_match(dataset.name, [
+            r"^(ww|wz|zz)_.*pythia$",
+            r"^tt(w|z)_.*amcatnlo$",
+            r"^hh_ggf_hbb_htt_kl[^1]+_kt1_powheg$",  # only SM model has LHE weighs, TODO: in all configs?
+        ]):
             dataset.add_tag("no_lhe_weights")
+        # datasets that are allowed to contain some events with missing lhe infos
+        # (known to happen for amcatnlo)
+        if dataset.name.endswith("_amcatnlo"):
+            dataset.add_tag("partial_lhe_weights")
         if dataset_name.startswith("hh_"):
             dataset.add_tag("signal")
             dataset.add_tag("nonresonant_signal")
@@ -409,6 +421,7 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
     cfg.x.process_groups = {
         "signals": [
             "hh_ggf_hbb_htt_kl1_kt1",
+            "hh_vbf_hbb_htt_kv1_k2v1_kl1",
         ],
         "signals_ggf": [
             "hh_ggf_hbb_htt_kl0_kt1",
@@ -464,7 +477,11 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
 
     # dataset groups for conveniently looping over certain datasets
     # (used in wrapper_factory and during plotting)
-    cfg.x.dataset_groups = {}
+    cfg.x.dataset_groups = {
+        "dy": [dataset.name for dataset in cfg.datasets if dataset.has_tag("dy")],
+        "w_lnu": [dataset.name for dataset in cfg.datasets if dataset.has_tag("w_lnu")],
+        # TODO: resonant (mostly for excluding them)
+    }
 
     # category groups for conveniently looping over certain categories
     # (used during plotting)
@@ -485,6 +502,7 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
     }
     cfg.x.default_selector_steps = "default"
 
+    # plotting overwrites
     cfg.x.custom_style_config_groups = {
         "small_legend": {
             "legend_cfg": {"ncols": 2, "fontsize": 16, "columnspacing": 0.6},
@@ -1280,10 +1298,10 @@ def get_dataset_lfns(
             lfn_base = dir_cls(path, fs=fs)
 
             # loop though files and interpret paths as lfns
-            return [
+            return sorted(
                 "/" + lfn_base.child(basename, type="f").path.lstrip("/")
                 for basename in lfn_base.listdir(pattern="*.root")
-            ]
+            )
 
         # define the lfn retrieval function
         cfg.x.get_dataset_lfns = get_dataset_lfns
diff --git a/hbt/config/triggers.py b/hbt/config/triggers.py
index 0fc77340..6f882d3b 100644
--- a/hbt/config/triggers.py
+++ b/hbt/config/triggers.py
@@ -66,27 +66,12 @@
 from __future__ import annotations
 
 import functools
-from dataclasses import dataclass
 
 import order as od
 
 from columnflow.util import DotDict
-from columnflow.types import ClassVar
 
-from hbt.config.util import Trigger, TriggerLeg
-
-
-@dataclass
-class Bits:
-    v12: int | None = None
-    v14: int | None = None
-
-    supported_versions: ClassVar[set[int]] = {12, 14}
-
-    def get(self, nano_version: int) -> int:
-        if nano_version not in self.supported_versions:
-            raise ValueError(f"nano_version {nano_version} not supported")
-        return getattr(self, f"v{nano_version}")
+from hbt.config.util import Trigger, TriggerLeg, TriggerBits as Bits
 
 
 # use the CCLub names for the trigger bits and improve them when necessary
@@ -99,10 +84,10 @@ def get(self, nano_version: int) -> int:
     # last update in https://github.com/cms-sw/cmssw/blob/CMSSW_14_0_X/PhysicsTools/NanoAOD/python/triggerObjects_cff.py
 
     "e": {
-        "CaloIdLTrackIdLIsoVL": Bits(v12=1, v14=1),
-        "WPTightTrackIso": Bits(v12=2, v14=2),
-        "WPLooseTrackIso": Bits(v12=4, v14=4),
-        "OverlapFilterPFTau": Bits(v12=8, v14=8),
+        "CaloIdLTrackIdLIsoVL": Bits(v12=1, v14="v12"),
+        "WPTightTrackIso": Bits(v12=2, v14="v12"),
+        "WPLooseTrackIso": Bits(v12=4, v14="v12"),
+        "OverlapFilterPFTau": Bits(v12=8, v14="v12"),
         "DiElectron": Bits(v12=16),
         "DiElectronLeg1": Bits(v14=16),
         "DiElectronLeg2": Bits(v14=32),
@@ -120,19 +105,19 @@ def get(self, nano_version: int) -> int:
         "EleTauPNet": Bits(v14=131072),
     },
     "mu": {
-        "TrkIsoVVL": Bits(v12=1, v14=1),
-        "Iso": Bits(v12=2, v14=2),
-        "OverlapFilterPFTau": Bits(v12=4, v14=4),
-        "SingleMuon": Bits(v12=8, v14=8),
-        "DiMuon": Bits(v12=16, v14=16),
-        "MuEle": Bits(v12=32, v14=32),
-        "MuTau": Bits(v12=64, v14=64),
-        "TripleMuon": Bits(v12=128, v14=128),
-        "DiMuonSingleEle": Bits(v12=256, v14=256),
-        "SingleMuonDiEle": Bits(v12=512, v14=512),
-        "Mu50": Bits(v12=1024, v14=1024),
-        "Mu100": Bits(v12=2048, v14=2048),
-        "SingleMuonSinglePhoton": Bits(v12=4096, v14=4096),
+        "TrkIsoVVL": Bits(v12=1, v14="v12"),
+        "Iso": Bits(v12=2, v14="v12"),
+        "OverlapFilterPFTau": Bits(v12=4, v14="v12"),
+        "SingleMuon": Bits(v12=8, v14="v12"),
+        "DiMuon": Bits(v12=16, v14="v12"),
+        "MuEle": Bits(v12=32, v14="v12"),
+        "MuTau": Bits(v12=64, v14="v12"),
+        "TripleMuon": Bits(v12=128, v14="v12"),
+        "DiMuonSingleEle": Bits(v12=256, v14="v12"),
+        "SingleMuonDiEle": Bits(v12=512, v14="v12"),
+        "Mu50": Bits(v12=1024, v14="v12"),
+        "Mu100": Bits(v12=2048, v14="v12"),
+        "SingleMuonSinglePhoton": Bits(v12=4096, v14="v12"),
         "MuTauPNet": Bits(v14=8192),
     },
     "tau": {  # general comment: lot of v14 paths contain PNet paths, not available in v12, e.g. OverlapFilterIsoEle
@@ -142,7 +127,7 @@ def get(self, nano_version: int) -> int:
         "Medium": Bits(v14=2),
         "TightChargedIso": Bits(v12=4),
         "Tight": Bits(v14=4),
-        "DeepTau": Bits(v12=8, v14=8),
+        "DeepTau": Bits(v12=8, v14="v12"),
         "PNet": Bits(v14=16),
         "TightOOSCPhotons": Bits(v12=16),
         "HPS": Bits(v12=32, v14=268435456),
@@ -161,7 +146,7 @@ def get(self, nano_version: int) -> int:
         "VBFpDoublePFTau_run3": Bits(v12=4096),  # warning: this trigger bit expects "ChargedIso" in the filter name, this does not correspond to our actual VBF filter name  # noqa
         "DiTau": Bits(v14=2048),
         "DiPFJetAndDiTau": Bits(v12=8192),
-        "DiTauAndPFJet": Bits(v12=16384, v14=16384),
+        "DiTauAndPFJet": Bits(v12=16384, v14="v12"),
         "DisplacedTau": Bits(v12=32768),
         "ETauDisplaced": Bits(v14=32768),
         "MuTauDisplaced": Bits(v14=65536),
@@ -182,37 +167,37 @@ def get(self, nano_version: int) -> int:
         "VBFSingleTau": Bits(v14=1073741824),
     },
     "jet": {
-        "4PixelOnlyPFCentralJetTightIDPt20": Bits(v12=1, v14=1),
-        "3PixelOnlyPFCentralJetTightIDPt30": Bits(v12=2, v14=2),
-        "PFJetFilterTwoC30": Bits(v12=4, v14=4),
-        "4PFCentralJetTightIDPt30": Bits(v12=8, v14=8),
-        "4PFCentralJetTightIDPt35": Bits(v12=16, v14=16),
-        "QuadCentralJet30": Bits(v12=32, v14=32),
-        "2PixelOnlyPFCentralJetTightIDPt40": Bits(v12=64, v14=64),
-        "L1sTripleJetVBF_orHTT_orDoubleJet_orSingleJet": Bits(v12=128, v14=128),
-        "3PFCentralJetTightIDPt40": Bits(v12=256, v14=256),
-        "3PFCentralJetTightIDPt45": Bits(v12=512, v14=512),
-        "L1sQuadJetsHT": Bits(v12=1024, v14=1024),
-        "BTagCaloDeepCSVp17Double": Bits(v12=2048, v14=2048),
-        "PFCentralJetLooseIDQuad30": Bits(v12=4096, v14=4096),
-        "1PFCentralJetLooseID75": Bits(v12=8192, v14=8192),
-        "2PFCentralJetLooseID60": Bits(v12=16384, v14=16384),
-        "3PFCentralJetLooseID45": Bits(v12=32768, v14=32768),
-        "4PFCentralJetLooseID40": Bits(v12=65536, v14=65536),
-        "DoubleTau+Jet": Bits(v12=131072, v14=131072),  # v14 also contains PNet paths
-        "VBFcrossCleanedDeepTauPFTau": Bits(v12=262144, v14=262144),  # more general VBFDiTauJets in v14  TODO: change name?  # noqa
-        "VBFcrossCleanedUsingDijetCorr": Bits(v12=524288, v14=524288),  # more general VBFSingleTauJets in v14  TODO: change name?  # noqa
-        "MonitoringMuon+Tau+Jet": Bits(v12=1048576, v14=1048576),
-        "2PFCentralJetTightIDPt50": Bits(v12=2097152, v14=2097152),
-        "1PixelOnlyPFCentralJetTightIDPt60": Bits(v12=4194304, v14=4194304),
-        "1PFCentralJetTightIDPt70": Bits(v12=8388608, v14=8388608),
-        "BTagPFDeepJet1p5Single": Bits(v12=16777216, v14=16777216),
-        "BTagPFDeepJet4p5Triple": Bits(v12=33554432, v14=33554432),
-        "2BTagSumOR2BTagMeanPaths": Bits(v12=67108864, v14=67108864),
-        "2/1PixelOnlyPFCentralJetTightIDPt20/50": Bits(v12=134217728, v14=134217728),
-        "2PFCentralJetTightIDPt30": Bits(v12=268435456, v14=268435456),
-        "1PFCentralJetTightIDPt60": Bits(v12=536870912, v14=536870912),
-        "PF2CentralJetPt30PNet2BTagMean0p50": Bits(v12=1073741824, v14=1073741824),
+        "4PixelOnlyPFCentralJetTightIDPt20": Bits(v12=1, v14="v12"),
+        "3PixelOnlyPFCentralJetTightIDPt30": Bits(v12=2, v14="v12"),
+        "PFJetFilterTwoC30": Bits(v12=4, v14="v12"),
+        "4PFCentralJetTightIDPt30": Bits(v12=8, v14="v12"),
+        "4PFCentralJetTightIDPt35": Bits(v12=16, v14="v12"),
+        "QuadCentralJet30": Bits(v12=32, v14="v12"),
+        "2PixelOnlyPFCentralJetTightIDPt40": Bits(v12=64, v14="v12"),
+        "L1sTripleJetVBF_orHTT_orDoubleJet_orSingleJet": Bits(v12=128, v14="v12"),
+        "3PFCentralJetTightIDPt40": Bits(v12=256, v14="v12"),
+        "3PFCentralJetTightIDPt45": Bits(v12=512, v14="v12"),
+        "L1sQuadJetsHT": Bits(v12=1024, v14="v12"),
+        "BTagCaloDeepCSVp17Double": Bits(v12=2048, v14="v12"),
+        "PFCentralJetLooseIDQuad30": Bits(v12=4096, v14="v12"),
+        "1PFCentralJetLooseID75": Bits(v12=8192, v14="v12"),
+        "2PFCentralJetLooseID60": Bits(v12=16384, v14="v12"),
+        "3PFCentralJetLooseID45": Bits(v12=32768, v14="v12"),
+        "4PFCentralJetLooseID40": Bits(v12=65536, v14="v12"),
+        "DoubleTau+Jet": Bits(v12=131072, v14="v12"),  # v14 also contains PNet paths
+        "VBFcrossCleanedDeepTauPFTau": Bits(v12=262144, v14="v12"),  # more general VBFDiTauJets in v14  TODO: change name?  # noqa
+        "VBFcrossCleanedUsingDijetCorr": Bits(v12=524288, v14="v12"),  # more general VBFSingleTauJets in v14  TODO: change name?  # noqa
+        "MonitoringMuon+Tau+Jet": Bits(v12=1048576, v14="v12"),
+        "2PFCentralJetTightIDPt50": Bits(v12=2097152, v14="v12"),
+        "1PixelOnlyPFCentralJetTightIDPt60": Bits(v12=4194304, v14="v12"),
+        "1PFCentralJetTightIDPt70": Bits(v12=8388608, v14="v12"),
+        "BTagPFDeepJet1p5Single": Bits(v12=16777216, v14="v12"),
+        "BTagPFDeepJet4p5Triple": Bits(v12=33554432, v14="v12"),
+        "2BTagSumOR2BTagMeanPaths": Bits(v12=67108864, v14="v12"),
+        "2/1PixelOnlyPFCentralJetTightIDPt20/50": Bits(v12=134217728, v14="v12"),
+        "2PFCentralJetTightIDPt30": Bits(v12=268435456, v14="v12"),
+        "1PFCentralJetTightIDPt60": Bits(v12=536870912, v14="v12"),
+        "PF2CentralJetPt30PNet2BTagMean0p50": Bits(v12=1073741824, v14="v12"),
     },
 })
 
@@ -1308,7 +1293,8 @@ def add_triggers_2023(config: od.Config) -> None:
                     # WPTightTrackIso
                     trigger_bits=get_bit_sum_v("e", [
                         "WPTightTrackIso",
-                        "Tight" if nano_version == 14 else None,
+                        # TODO: Tight not existing, needs fixing
+                        # "Tight" if nano_version == 14 else None,
                     ]),
                 ),
             ),
diff --git a/hbt/config/util.py b/hbt/config/util.py
index 9daad8ad..26be7ef9 100644
--- a/hbt/config/util.py
+++ b/hbt/config/util.py
@@ -6,10 +6,13 @@
 
 from __future__ import annotations
 
+import re
+from dataclasses import dataclass
+
 from order import UniqueObject, TagMixin
 from order.util import typed
 
-from columnflow.types import Callable, Any, Sequence, Hashable
+from columnflow.types import Callable, Any, Sequence, Hashable, ClassVar
 
 
 class TriggerLeg(object):
@@ -224,3 +227,42 @@ def n_legs(self):
     def hlt_field(self):
         # remove the first four "HLT_" characters
         return self.name[4:]
+
+
+@dataclass
+class TriggerBits:
+    """
+    Lightweight container wrapping trigger bits for different versions of NanoAOD.
+    """
+
+    v12: int | None = None
+    v14: int | None = None
+
+    supported_versions: ClassVar[set[int]] = {12, 14}
+
+    def __post_init__(self) -> None:
+        # versions might be strings such as "v12" that act as references
+        cre = re.compile(r"^v(\d+)$")
+        for v in self.supported_versions:
+            attr = f"v{v}"
+            # only check strings
+            if not isinstance((val := getattr(self, attr)), str):
+                continue
+            # check format
+            if not (m := cre.match(val)):
+                raise ValueError(f"invalid reference {attr} -> {val}")
+            # check if not circular
+            ref_v = int(m.group(1))
+            if v == ref_v:
+                raise ValueError(f"reference to same version {attr} -> {val}")
+            # check type of referred value
+            ref_val = getattr(self, f"v{ref_v}")
+            if not isinstance(ref_val, int) and v is not None:
+                raise ValueError(f"wrong reference value in {attr} -> {val}")
+            # set it
+            setattr(self, attr, ref_val)
+
+    def get(self, nano_version: int) -> int:
+        if nano_version not in self.supported_versions:
+            raise ValueError(f"nano_version {nano_version} not supported")
+        return getattr(self, f"v{nano_version}")
diff --git a/hbt/production/btag.py b/hbt/production/btag.py
index 907da7ec..6a594553 100644
--- a/hbt/production/btag.py
+++ b/hbt/production/btag.py
@@ -93,24 +93,25 @@ def _normalized_btag_weights_init(self: Producer) -> None:
 @_normalized_btag_weights.requires
 def _normalized_btag_weights_requires(self: Producer, reqs: dict) -> None:
     from columnflow.tasks.selection import MergeSelectionStats
-    reqs["selection_stats"] = MergeSelectionStats.req(
+    reqs["selection_stats"] = MergeSelectionStats.req_different_branching(
         self.task,
-        tree_index=0,
-        branch=-1,
-        _exclude=MergeSelectionStats.exclude_params_forest_merge,
+        branch=-1 if self.task.is_workflow() else 0,
     )
 
 
 @_normalized_btag_weights.setup
 def _normalized_btag_weights_setup(self: Producer, reqs: dict, inputs: dict, reader_targets: InsertableDict) -> None:
+    # BUG in prod3: some stats fields were missing so skip them for now
+    return
+
     # load the selection stats
     selection_stats = self.task.cached_value(
         key="selection_stats",
-        func=lambda: inputs["selection_stats"]["collection"][0]["stats"].load(formatter="json"),
+        func=lambda: inputs["selection_stats"]["stats"].load(formatter="json"),
     )
 
     # get the unique process ids in that dataset
-    key = f"sum_mc_weight_selected_nob_{self.tagger_name}_per_process_and_njet"
+    key = f"sum_btag_weight_{self.tagger_name}_selected_nob_{self.tagger_name}_per_process_and_njet"
     self.unique_process_ids = list(map(int, selection_stats[key].keys()))
 
     # get the maximum numbers of jets
diff --git a/hbt/production/weights.py b/hbt/production/weights.py
index 716cbc96..e63a36c2 100644
--- a/hbt/production/weights.py
+++ b/hbt/production/weights.py
@@ -60,11 +60,9 @@ def normalized_pu_weight_init(self: Producer) -> None:
 @normalized_pu_weight.requires
 def normalized_pu_weight_requires(self: Producer, reqs: dict) -> None:
     from columnflow.tasks.selection import MergeSelectionStats
-    reqs["selection_stats"] = MergeSelectionStats.req(
+    reqs["selection_stats"] = MergeSelectionStats.req_different_branching(
         self.task,
-        tree_index=0,
-        branch=-1,
-        _exclude=MergeSelectionStats.exclude_params_forest_merge,
+        branch=-1 if self.task.is_workflow() else 0,
     )
 
 
@@ -78,7 +76,7 @@ def normalized_pu_weight_setup(
     # load the selection stats
     selection_stats = self.task.cached_value(
         key="selection_stats",
-        func=lambda: inputs["selection_stats"]["collection"][0]["stats"].load(formatter="json"),
+        func=lambda: inputs["selection_stats"]["stats"].load(formatter="json"),
     )
 
     # get the unique process ids in that dataset
@@ -126,11 +124,9 @@ def normalized_pdf_weight(self: Producer, events: ak.Array, **kwargs) -> ak.Arra
 @normalized_pdf_weight.requires
 def normalized_pdf_weight_requires(self: Producer, reqs: dict) -> None:
     from columnflow.tasks.selection import MergeSelectionStats
-    reqs["selection_stats"] = MergeSelectionStats.req(
+    reqs["selection_stats"] = MergeSelectionStats.req_different_branching(
         self.task,
-        tree_index=0,
-        branch=-1,
-        _exclude=MergeSelectionStats.exclude_params_forest_merge,
+        branch=-1 if self.task.is_workflow() else 0,
     )
 
 
@@ -144,7 +140,7 @@ def normalized_pdf_weight_setup(
     # load the selection stats
     selection_stats = self.task.cached_value(
         key="selection_stats",
-        func=lambda: inputs["selection_stats"]["collection"][0]["stats"].load(formatter="json"),
+        func=lambda: inputs["selection_stats"]["stats"].load(formatter="json"),
     )
 
     # save average weights
@@ -180,11 +176,9 @@ def normalized_murmuf_weight(self: Producer, events: ak.Array, **kwargs) -> ak.A
 @normalized_murmuf_weight.requires
 def normalized_murmuf_weight_requires(self: Producer, reqs: dict) -> None:
     from columnflow.tasks.selection import MergeSelectionStats
-    reqs["selection_stats"] = MergeSelectionStats.req(
+    reqs["selection_stats"] = MergeSelectionStats.req_different_branching(
         self.task,
-        tree_index=0,
-        branch=-1,
-        _exclude=MergeSelectionStats.exclude_params_forest_merge,
+        branch=-1 if self.task.is_workflow() else 0,
     )
 
 
@@ -198,7 +192,7 @@ def normalized_murmuf_weight_setup(
     # load the selection stats
     selection_stats = self.task.cached_value(
         key="selection_stats",
-        func=lambda: inputs["selection_stats"]["collection"][0]["stats"].load(formatter="json"),
+        func=lambda: inputs["selection_stats"]["stats"].load(formatter="json"),
     )
 
     # save average weights
diff --git a/hbt/util.py b/hbt/util.py
index 0c3055de..0e4d18a5 100644
--- a/hbt/util.py
+++ b/hbt/util.py
@@ -58,7 +58,7 @@ def IF_DATASET_HAS_LHE_WEIGHTS(
     if getattr(func, "dataset_inst", None) is None:
         return self.get()
 
-    return None if func.dataset_inst.has_tag("no_lhe_weights") else self.get()
+    return self.get() if not func.dataset_inst.has_tag("no_lhe_weights") else None
 
 
 @deferred_column
diff --git a/law.cfg b/law.cfg
index 312a3107..fc6509b6 100644
--- a/law.cfg
+++ b/law.cfg
@@ -30,6 +30,7 @@ mattermost_user: HH → bb𝛕𝛕
 job_file_dir: $CF_JOB_BASE
 job_file_dir_cleanup: False
 job_file_dir_mkdtemp: sub_{{task_id}}_XXX
+crab_sandbox_name: CMSSW_14_2_1::arch=el9_amd64_gcc12
 
 
 #
@@ -65,6 +66,7 @@ skip_ensure_proxy: False
 # (resources like memory and disk can also be set in [resources] with more granularity)
 htcondor_flavor: $CF_HTCONDOR_FLAVOR
 htcondor_share_software: True
+# 2GB -> short "lite" queue, otherwise long "bide" queue on the naf
 htcondor_memory: 2GB
 htcondor_disk: 5GB
 slurm_flavor: $CF_SLURM_FLAVOR
@@ -192,7 +194,7 @@ cf.CreateSyncFile: wlcg
 [resources]
 
 # default selection with hhbtag requires more memory
-cf.{Select,Reduce}Events__sel_default: htcondor_memory=5GB
+cf.{Select,Reduce}Events__sel_default: htcondor_memory=5GB, crab_memory=5000MB
 
 
 #
diff --git a/modules/cmsdb b/modules/cmsdb
index f2b84f7c..fcdb5d9c 160000
--- a/modules/cmsdb
+++ b/modules/cmsdb
@@ -1 +1 @@
-Subproject commit f2b84f7cceaea02a3bc5270436369da6562ecd68
+Subproject commit fcdb5d9ca251b178a29ebdecfb1eb66802e9b1ec
diff --git a/modules/columnflow b/modules/columnflow
index 52207354..dd0655af 160000
--- a/modules/columnflow
+++ b/modules/columnflow
@@ -1 +1 @@
-Subproject commit 522073541c68d16938daa1bd1b137664dfc6d332
+Subproject commit dd0655afda8756c99bc434a901929f0a028dacf9

From f31f7690661d140a07bbd3caf6f028355e0d08d0 Mon Sep 17 00:00:00 2001
From: "Marcel R." <github.riga@icloud.com>
Date: Mon, 13 Jan 2025 09:30:42 +0100
Subject: [PATCH 14/20] Port stitching updates from #55.

---
 hbt/config/configs_hbt.py   |  16 ++-
 hbt/production/processes.py | 258 +++++++++++++++++++++++-------------
 hbt/selection/default.py    |  51 ++++---
 hbt/util.py                 |  11 ++
 4 files changed, 226 insertions(+), 110 deletions(-)

diff --git a/hbt/config/configs_hbt.py b/hbt/config/configs_hbt.py
index 73b70e73..9a7ab0c3 100644
--- a/hbt/config/configs_hbt.py
+++ b/hbt/config/configs_hbt.py
@@ -473,7 +473,21 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
             },
         }
         # w+jets
-        # TODO: add
+        cfg.x.w_lnu_stitching = {
+            "incl": {
+                "inclusive_dataset": cfg.datasets.n.w_lnu_amcatnlo,
+                "leaf_processes": [
+                    # the following processes cover the full njet and pt phasespace
+                    procs.n.w_lnu_0j,
+                    *(
+                        procs.get(f"w_lnu_{nj}j_pt{pt}")
+                        for nj in [1, 2]
+                        for pt in ["0to40", "40to100", "100to200", "200to400", "400to600", "600toinf"]
+                    ),
+                    procs.n.w_lnu_ge3j,
+                ],
+            },
+        }
 
     # dataset groups for conveniently looping over certain datasets
     # (used in wrapper_factory and during plotting)
diff --git a/hbt/production/processes.py b/hbt/production/processes.py
index 17e39011..200bd646 100644
--- a/hbt/production/processes.py
+++ b/hbt/production/processes.py
@@ -4,15 +4,18 @@
 Process ID producer relevant for the stitching of the DY samples.
 """
 
-import functools
+from __future__ import annotations
+
+import abc
 
 import law
+import order
 
-from columnflow.production import Producer, producer
+from columnflow.production import Producer
 from columnflow.util import maybe_import, InsertableDict
-from columnflow.columnar_util import set_ak_column
+from columnflow.columnar_util import set_ak_column, Route
 
-from hbt.util import IF_DATASET_IS_DY
+from hbt.util import IF_DATASET_IS_DY, IF_DATASET_IS_W_LNU
 
 np = maybe_import("numpy")
 ak = maybe_import("awkward")
@@ -25,88 +28,160 @@
 NJetsRange = tuple[int, int]
 PtRange = tuple[float, float]
 
-set_ak_column_i64 = functools.partial(set_ak_column, value_type=np.int64)
 
+class stitched_process_ids(Producer):
+    """General class to calculate process ids for stitched samples.
 
-@producer(
-    uses={IF_DATASET_IS_DY("LHE.NpNLO", "LHE.Vpt")},
-    produces={IF_DATASET_IS_DY("process_id")},
-)
-def process_ids_dy(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
-    """
-    Assigns each dy event a single process id, based on the number of jets and the di-lepton pt of
-    the LHE record. This is used for the stitching of the DY samples.
+    Individual producers should derive from this class and set the following attributes:
+
+    :param id_table: scipy lookup table mapping processes variables (using key_func) to process ids
+    :param key_func: function to generate keys for the lookup, receiving values of stitching columns
+    :param stitching_columns: list of observables to use for stitching
+    :param cross_check_translation_dict: dictionary to translate stitching columns to auxiliary
+        fields of process objects, used for cross checking the validity of obtained ranges
+    :param include_condition: condition for including stitching columns in used columns
     """
-    # as always, we assume that each dataset has exactly one process associated to it
-    if len(self.dataset_inst.processes) != 1:
-        raise NotImplementedError(
-            f"dataset {self.dataset_inst.name} has {len(self.dataset_inst.processes)} processes "
-            "assigned, which is not yet implemented",
-        )
-    process_inst = self.dataset_inst.processes.get_first()
-
-    # get the number of nlo jets and the di-lepton pt
-    njets = events.LHE.NpNLO
-    pt = events.LHE.Vpt
-
-    # raise a warning if a datasets was already created for a specific "bin" (leaf process),
-    # but actually does not fit
-    njets_range = process_inst.x("njets", None)
-    if njets_range is not None:
-        outliers = (njets < njets_range[0]) | (njets >= njets_range[1])
-        if ak.any(outliers):
-            logger.warning(
-                f"dataset {self.dataset_inst.name} is meant to contain njet values in the range "
-                f"[{njets_range[0]}, {njets_range[0]}), but found {ak.sum(outliers)} events "
-                "outside this range",
+
+    @abc.abstractproperty
+    def id_table(self) -> sp.sparse._lil.lil_matrix:
+        # must be overwritten by inheriting classes
+        ...
+
+    @abc.abstractmethod
+    def key_func(self, *values: ak.Array) -> int:
+        # must be overwritten by inheriting classes
+        ...
+
+    @abc.abstractproperty
+    def cross_check_translation_dict(self) -> dict[str, str]:
+        # must be overwritten by inheriting classes
+        ...
+
+    def init_func(self, *args, **kwargs):
+        # if there is a include_condition set, apply it to both used and produced columns
+        cond = lambda args: {self.include_condition(*args)} if self.include_condition else {*args}
+        self.uses |= cond(self.stitching_columns or [])
+        self.produces |= cond(["process_id"])
+
+    def call_func(self, events: ak.Array, **kwargs) -> ak.Array:
+        """
+        Assigns each event a single process id, based on the stitching values extracted per event.
+        This id can be used for the stitching of the respective datasets downstream.
+        """
+        # ensure that each dataset has exactly one process associated to it
+        if len(self.dataset_inst.processes) != 1:
+            raise NotImplementedError(
+                f"dataset {self.dataset_inst.name} has {len(self.dataset_inst.processes)} processes "
+                "assigned, which is not yet implemented",
             )
-    pt_range = process_inst.x("ptll", None)
-    if pt_range is not None:
-        outliers = (pt < pt_range[0]) | (pt >= pt_range[1])
-        if ak.any(outliers):
-            logger.warning(
-                f"dataset {self.dataset_inst.name} is meant to contain ptll values in the range "
-                f"[{pt_range[0]}, {pt_range[1]}), but found {ak.sum(outliers)} events outside this "
-                "range",
+        process_inst = self.dataset_inst.processes.get_first()
+
+        # get stitching observables
+        stitching_values = [Route(obs).apply(events) for obs in self.stitching_columns]
+
+        # run the cross check function if defined
+        if callable(self.stitching_range_cross_check):
+            self.stitching_range_cross_check(process_inst, stitching_values)
+
+        # lookup the id and check for invalid values
+        process_ids = np.squeeze(np.asarray(self.id_table[self.key_func(*stitching_values)].todense()))
+        invalid_mask = process_ids == 0
+        if ak.any(invalid_mask):
+            raise ValueError(
+                f"found {sum(invalid_mask)} events that could not be assigned to a process",
             )
 
-    # lookup the id and check for invalid values
-    process_ids = np.squeeze(np.asarray(self.id_table[self.key_func(njets, pt)].todense()))
-    invalid_mask = process_ids == 0
-    if ak.any(invalid_mask):
-        raise ValueError(
-            f"found {sum(invalid_mask)} dy events that could not be assigned to a process",
-        )
-
-    # store them
-    events = set_ak_column_i64(events, "process_id", process_ids)
-
-    return events
-
-
-@process_ids_dy.setup
-def process_ids_dy_setup(
-    self: Producer,
-    reqs: dict,
-    inputs: dict,
-    reader_targets: InsertableDict,
-) -> None:
-    # define stitching ranges for the DY datasets covered by this producer's dy_inclusive_dataset
-    stitching_ranges: dict[NJetsRange, list[PtRange]] = {}
-    for proc in self.dy_leaf_processes:
-        njets = proc.x.njets
-        stitching_ranges.setdefault(njets, [])
-        if proc.has_aux("ptll"):
-            stitching_ranges[njets].append(proc.x.ptll)
-
-    # sort by the first element of the ptll range
-    sorted_stitching_ranges: list[tuple[NJetsRange, list[PtRange]]] = [
-        (nj_range, sorted(stitching_ranges[nj_range], key=lambda ptll_range: ptll_range[0]))
-        for nj_range in sorted(stitching_ranges.keys(), key=lambda nj_range: nj_range[0])
-    ]
-
-    # define a key function that maps njets and pt to a unique key for use in a lookup table
-    def key_func(njets, pt):
+        # store them
+        events = set_ak_column(events, "process_id", process_ids, value_type=np.int64)
+
+        return events
+
+    def stitching_range_cross_check(
+        self: Producer,
+        process_inst: order.Process,
+        stitching_values: list[ak.Array],
+    ) -> None:
+        # define lookup for stitching observable -> process auxiliary values to compare with
+        # raise a warning if a datasets was already created for a specific "bin" (leaf process),
+        # but actually does not fit
+        for column, values in zip(self.stitching_columns, stitching_values):
+            aux_name = self.cross_check_translation_dict[str(column)]
+            if not process_inst.has_aux(aux_name):
+                continue
+            aux_min, aux_max = process_inst.x(aux_name)
+            outliers = (values < aux_min) | (values >= aux_max)
+            if ak.any(outliers):
+                logger.warning(
+                    f"dataset {self.dataset_inst.name} is meant to contain {aux_name} values in "
+                    f"the range [{aux_min}, {aux_max}), but found {ak.sum(outliers)} events "
+                    "outside this range",
+                )
+
+
+class stiched_process_ids_nj_pt(stitched_process_ids):
+    """
+    Process identifier for subprocesses spanned by a jet multiplicity and an optional pt range, such
+    as DY or W->lnu, which have (e.g.) "*_1j" as well as "*_1j_pt100to200" subprocesses.
+    """
+
+    # id table is set during setup, create a non-abstract class member in the meantime
+    id_table = None
+
+    # required aux fields
+    njets_aux = "njets"
+    pt_aux = "ptll"
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        # setup during setup
+        self.sorted_stitching_ranges: list[tuple[NJetsRange, list[PtRange]]]
+
+        # check that aux fields are present in cross_check_translation_dict
+        for field in (self.njets_aux, self.pt_aux):
+            if field not in self.cross_check_translation_dict.values():
+                raise ValueError(f"field {field} must be present in cross_check_translation_dict")
+
+    @abc.abstractproperty
+    def leaf_processes(self) -> list[order.Process]:
+        # must be overwritten by inheriting classes
+        ...
+
+    def setup_func(
+        self,
+        reqs: dict,
+        inputs: dict,
+        reader_targets: InsertableDict,
+    ) -> None:
+        # define stitching ranges for the DY datasets covered by this producer's dy_inclusive_dataset
+        stitching_ranges: dict[NJetsRange, list[PtRange]] = {}
+        for proc in self.leaf_processes:
+            njets = proc.x(self.njets_aux)
+            stitching_ranges.setdefault(njets, [])
+            if proc.has_aux(self.pt_aux):
+                stitching_ranges[njets].append(proc.x(self.pt_aux))
+
+        # sort by the first element of the ptll range
+        self.sorted_stitching_ranges = [
+            (nj_range, sorted(stitching_ranges[nj_range], key=lambda ptll_range: ptll_range[0]))
+            for nj_range in sorted(stitching_ranges.keys(), key=lambda nj_range: nj_range[0])
+        ]
+
+        # define the lookup table
+        max_nj_bin = len(self.sorted_stitching_ranges)
+        max_pt_bin = max(map(len, stitching_ranges.values()))
+        self.id_table = sp.sparse.lil_matrix((max_nj_bin + 1, max_pt_bin + 1), dtype=np.int64)
+
+        # fill it
+        for proc in self.leaf_processes:
+            key = self.key_func(proc.x(self.njets_aux)[0], proc.x(self.pt_aux, [-1])[0])
+            self.id_table[key] = proc.id
+
+    def key_func(
+        self,
+        njets: int | np.ndarray,
+        pt: int | float | np.ndarray,
+    ) -> tuple[int, int] | tuple[np.ndarray, np.ndarray]:
         # potentially convert single values into arrays
         single = False
         if isinstance(njets, int):
@@ -118,7 +193,7 @@ def key_func(njets, pt):
         # map into bins (index 0 means no binning)
         nj_bins = np.zeros(len(njets), dtype=np.int32)
         pt_bins = np.zeros(len(pt), dtype=np.int32)
-        for nj_bin, (nj_range, pt_ranges) in enumerate(sorted_stitching_ranges, 1):
+        for nj_bin, (nj_range, pt_ranges) in enumerate(self.sorted_stitching_ranges, 1):
             # nj_bin
             nj_mask = (nj_range[0] <= njets) & (njets < nj_range[1])
             nj_bins[nj_mask] = nj_bin
@@ -129,14 +204,17 @@ def key_func(njets, pt):
 
         return (nj_bins[0], pt_bins[0]) if single else (nj_bins, pt_bins)
 
-    self.key_func = key_func
 
-    # define the lookup table
-    max_nj_bin = len(sorted_stitching_ranges)
-    max_pt_bin = max(map(len, stitching_ranges.values()))
-    self.id_table = sp.sparse.lil_matrix((max_nj_bin + 1, max_pt_bin + 1), dtype=np.int64)
+process_ids_dy = stiched_process_ids_nj_pt.derive("process_ids_dy", cls_dict={
+    "stitching_columns": ["LHE.NpNLO", "LHE.Vpt"],
+    "cross_check_translation_dict": {"LHE.NpNLO": "njets", "LHE.Vpt": "ptll"},
+    "include_condition": IF_DATASET_IS_DY,
+    # still misses leaf_processes, must be set dynamically
+})
 
-    # fill it
-    for proc in self.dy_leaf_processes:
-        key = key_func(proc.x.njets[0], proc.x("ptll", [-1])[0])
-        self.id_table[key] = proc.id
+process_ids_w_lnu = stiched_process_ids_nj_pt.derive("process_ids_w_lnu", cls_dict={
+    "stitching_columns": ["LHE.NpNLO", "LHE.Vpt"],
+    "cross_check_translation_dict": {"LHE.NpNLO": "njets", "LHE.Vpt": "ptll"},
+    "include_condition": IF_DATASET_IS_W_LNU,
+    # still misses leaf_processes, must be set dynamically
+})
diff --git a/hbt/selection/default.py b/hbt/selection/default.py
index b0957136..039ec45e 100644
--- a/hbt/selection/default.py
+++ b/hbt/selection/default.py
@@ -27,7 +27,7 @@
 from hbt.selection.trigger import trigger_selection
 from hbt.selection.lepton import lepton_selection
 from hbt.selection.jet import jet_selection
-from hbt.production.processes import process_ids_dy
+import hbt.production.processes as process_producers
 from hbt.production.btag import btag_weights_deepjet, btag_weights_pnet
 from hbt.production.features import cutflow_features
 from hbt.production.patches import patch_ecalBadCalibFilter
@@ -148,6 +148,8 @@ def default(
     # create process ids
     if self.process_ids_dy is not None:
         events = self[self.process_ids_dy](events, **kwargs)
+    elif self.process_ids_w_lnu is not None:
+        events = self[self.process_ids_w_lnu](events, **kwargs)
     else:
         events = self[process_ids](events, **kwargs)
 
@@ -189,24 +191,33 @@ def default_init(self: Selector) -> None:
     if getattr(self, "dataset_inst", None) is None:
         return
 
-    self.process_ids_dy: process_ids_dy | None = None
-    if self.dataset_inst.has_tag("dy"):
-        # check if this dataset is covered by any dy id producer
-        for name, dy_cfg in self.config_inst.x.dy_stitching.items():
-            dataset_inst = dy_cfg["inclusive_dataset"]
-            # the dataset is "covered" if its process is a subprocess of that of the dy dataset
-            if dataset_inst.has_process(self.dataset_inst.processes.get_first()):
-                self.process_ids_dy = process_ids_dy.derive(f"process_ids_dy_{name}", cls_dict={
-                    "dy_inclusive_dataset": dataset_inst,
-                    "dy_leaf_processes": dy_cfg["leaf_processes"],
-                })
-
-                # add it as a dependency
-                self.uses.add(self.process_ids_dy)
-                self.produces.add(self.process_ids_dy)
-
-                # stop after the first match
-                break
+    # build and store derived process id producers
+    for tag in ("dy", "w_lnu"):
+        prod_name = f"process_ids_{tag}"
+        setattr(self, prod_name, None)
+        if not self.dataset_inst.has_tag(tag):
+            continue
+        # check if the producer was already created and saved in the config
+        if (prod := self.config_inst.x(prod_name, None)) is None:
+            # check if this dataset is covered by any dy id producer
+            for stitch_name, cfg in self.config_inst.x(f"{tag}_stitching").items():
+                incl_dataset_inst = cfg["inclusive_dataset"]
+                # the dataset is "covered" if its process is a subprocess of that of the dy dataset
+                if incl_dataset_inst.has_process(self.dataset_inst.processes.get_first()):
+                    base_prod = getattr(process_producers, prod_name)
+                    prod = base_prod.derive(f"{prod_name}_{stitch_name}", cls_dict={
+                        "leaf_processes": cfg["leaf_processes"],
+                    })
+                    # cache it
+                    self.config_inst.set_aux(prod_name, prod)
+                    # stop after the first match
+                    break
+        if prod is not None:
+            # add it as a dependency
+            self.uses.add(prod)
+            self.produces.add(prod)
+            # save it as an attribute
+            setattr(self, prod_name, prod)
 
 
 empty = default.derive("empty", cls_dict={})
@@ -288,6 +299,8 @@ def empty_call(
     # create process ids
     if self.process_ids_dy is not None:
         events = self[self.process_ids_dy](events, **kwargs)
+    elif self.process_ids_w_lnu is not None:
+        events = self[self.process_ids_w_lnu](events, **kwargs)
     else:
         events = self[process_ids](events, **kwargs)
 
diff --git a/hbt/util.py b/hbt/util.py
index 0e4d18a5..20f37848 100644
--- a/hbt/util.py
+++ b/hbt/util.py
@@ -72,6 +72,17 @@ def IF_DATASET_IS_DY(
     return self.get() if func.dataset_inst.has_tag("dy") else None
 
 
+@deferred_column
+def IF_DATASET_IS_W_LNU(
+    self: ArrayFunction.DeferredColumn,
+    func: ArrayFunction,
+) -> Any | set[Any]:
+    if getattr(func, "dataset_inst", None) is None:
+        return self.get()
+
+    return self.get() if func.dataset_inst.has_tag("w_lnu") else None
+
+
 def hash_events(arr: np.ndarray) -> np.ndarray:
     """
     Helper function to create a hash value from the event, run and luminosityBlock columns.

From 2efea55d7e1516f4cd29f89179d7ffe71337a169 Mon Sep 17 00:00:00 2001
From: "Marcel R." <github.riga@icloud.com>
Date: Mon, 13 Jan 2025 15:06:31 +0100
Subject: [PATCH 15/20] Adapt to cf changes.

---
 hbt/config/configs_hbt.py |  1 +
 hbt/production/btag.py    | 40 +++++++++++++++++++---------------
 hbt/selection/default.py  | 27 ++++++++++++++++++-----
 law.cfg                   |  4 +++-
 modules/columnflow        |  2 +-
 setup.sh                  | 45 ++++++++++++++++++++-------------------
 6 files changed, 73 insertions(+), 46 deletions(-)

diff --git a/hbt/config/configs_hbt.py b/hbt/config/configs_hbt.py
index 9a7ab0c3..d329f105 100644
--- a/hbt/config/configs_hbt.py
+++ b/hbt/config/configs_hbt.py
@@ -441,6 +441,7 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
             "ewk",
         ]),
         "dy_split": [
+            # TODO
             # "dy_m4to10", "dy_m10to50", "dy_m50toinf",
             # "dy_m50toinf_0j", "dy_m50toinf_1j", "dy_m50toinf_2j",
             "dy_m50toinf_1j_pt40to100", "dy_m50toinf_1j_pt100to200", "dy_m50toinf_1j_pt200to400",
diff --git a/hbt/production/btag.py b/hbt/production/btag.py
index 6a594553..168ddaa6 100644
--- a/hbt/production/btag.py
+++ b/hbt/production/btag.py
@@ -47,23 +47,29 @@ def _normalized_btag_weights(self: Producer, events: ak.Array, **kwargs) -> ak.A
         if not weight_name.startswith(self.weight_name):
             continue
 
-        # create a weight vectors starting with ones for both weight variations, i.e.,
-        # nomalization per pid and normalization per pid and jet multiplicity
-        norm_weight_per_pid = np.ones(len(events), dtype=np.float32)
-        norm_weight_per_pid_njet = np.ones(len(events), dtype=np.float32)
-
-        # fill weights with a new mask per unique process id (mostly just one)
-        for pid in self.unique_process_ids:
-            pid_mask = events.process_id == pid
-            # single value
-            norm_weight_per_pid[pid_mask] = self.ratio_per_pid[weight_name][pid]
-            # lookup table
-            n_jets = ak.to_numpy(ak.num(events[pid_mask].Jet.pt, axis=1))
-            norm_weight_per_pid_njet[pid_mask] = self.ratio_per_pid_njet[weight_name][pid][n_jets]
-
-        # multiply with actual weight
-        norm_weight_per_pid = norm_weight_per_pid * events[weight_name]
-        norm_weight_per_pid_njet = norm_weight_per_pid_njet * events[weight_name]
+        # BUG in prod3: some stats fields were missing so skip them for now
+        # # create a weight vectors starting with ones for both weight variations, i.e.,
+        # # nomalization per pid and normalization per pid and jet multiplicity
+        # norm_weight_per_pid = np.ones(len(events), dtype=np.float32)
+        # norm_weight_per_pid_njet = np.ones(len(events), dtype=np.float32)
+
+        # # fill weights with a new mask per unique process id (mostly just one)
+        # for pid in self.unique_process_ids:
+        #     pid_mask = events.process_id == pid
+        #     # single value
+        #     norm_weight_per_pid[pid_mask] = self.ratio_per_pid[weight_name][pid]
+        #     # lookup table
+        #     n_jets = ak.to_numpy(ak.num(events[pid_mask].Jet.pt, axis=1))
+        #     norm_weight_per_pid_njet[pid_mask] = self.ratio_per_pid_njet[weight_name][pid][n_jets]
+
+        # # multiply with actual weight
+        # norm_weight_per_pid = norm_weight_per_pid * events[weight_name]
+        # norm_weight_per_pid_njet = norm_weight_per_pid_njet * events[weight_name]
+
+        # fake values
+        from columnflow.columnar_util import full_like
+        norm_weight_per_pid = full_like(events.event, 1.0, dtype=np.float32)
+        norm_weight_per_pid_njet = norm_weight_per_pid
 
         # store them
         events = set_ak_column_f32(events, f"normalized_{weight_name}", norm_weight_per_pid)
diff --git a/hbt/selection/default.py b/hbt/selection/default.py
index 039ec45e..7f09d2f0 100644
--- a/hbt/selection/default.py
+++ b/hbt/selection/default.py
@@ -120,7 +120,13 @@ def default(
 
         # pdf weights
         if self.has_dep(pdf_weights):
-            events = self[pdf_weights](events, outlier_log_mode="debug", **kwargs)
+            events = self[pdf_weights](
+                events,
+                outlier_log_mode="debug",
+                # allow some datasets to contain a few events with missing lhe infos
+                invalid_weights_action="ignore" if self.dataset_inst.has_tag("partial_lhe_weights") else "raise",
+                **kwargs,
+            )
 
         # renormalization/factorization scale weights
         if self.has_dep(murmuf_weights):
@@ -181,6 +187,7 @@ def event_sel_nob(btag_weight_cls):
             "nob_pnet": event_sel_nob(btag_weights_pnet) if self.has_dep(btag_weights_pnet) else None,
         },
         njets=results.x.n_central_jets,
+        **kwargs,
     )
 
     return events, results
@@ -326,6 +333,7 @@ def empty_call(
             "nob_pnet": results.event if self.has_dep(btag_weights_pnet) else None,
         },
         njets=ak.num(events.Jet, axis=1),
+        **kwargs,
     )
 
     return events, results
@@ -360,6 +368,9 @@ def setup_and_increment_stats(
         event_sel_variations = {}
     event_sel_variations = {n: s for n, s in event_sel_variations.items() if s is not None}
 
+    # when a shift was requested, skip all other systematic variations
+    skip_shifts = self.global_shift_inst != "nominal"
+
     # start creating a weight, group and group combination map
     weight_map = {
         "num_events": Ellipsis,
@@ -379,18 +390,17 @@ def setup_and_increment_stats(
 
         # pu weights with variations
         for route in sorted(self[pu_weight].produced_columns):
-            name = str(route)
-            weight_map[f"sum_mc_weight_{name}"] = (events.mc_weight * events[name], Ellipsis)
+            weight_map[f"sum_mc_weight_{route}"] = (events.mc_weight * route.apply(events), Ellipsis)
 
         # pdf weights with variations
         if self.has_dep(pdf_weights):
-            for v in ["", "_up", "_down"]:
+            for v in (("",) if skip_shifts else ("", "_up", "_down")):
                 weight_map[f"sum_pdf_weight{v}"] = events[f"pdf_weight{v}"]
                 weight_map[f"sum_pdf_weight{v}_selected"] = (events[f"pdf_weight{v}"], event_sel)
 
         # mur/muf weights with variations
         if self.has_dep(murmuf_weights):
-            for v in ["", "_up", "_down"]:
+            for v in (("",) if skip_shifts else ("", "_up", "_down")):
                 weight_map[f"sum_murmuf_weight{v}"] = events[f"murmuf_weight{v}"]
                 weight_map[f"sum_murmuf_weight{v}_selected"] = (events[f"murmuf_weight{v}"], event_sel)
 
@@ -402,6 +412,8 @@ def setup_and_increment_stats(
                 weight_name = str(route)
                 if not weight_name.startswith(prod.weight_name):
                     continue
+                if skip_shifts and weight_name.endswith(("_up", "_down")):
+                    continue
                 weight_map[f"sum_{weight_name}"] = events[weight_name]
                 weight_map[f"sum_{weight_name}_selected"] = (events[weight_name], event_sel)
                 for var_name, var_sel in event_sel_variations.items():
@@ -427,6 +439,10 @@ def setup_and_increment_stats(
         # combinations
         group_combinations.append(("process", "njet"))
 
+    def skip_func(weight_name: str, group_names: list[str]) -> bool:
+        # TODO: add not needed combinations here
+        return False
+
     return self[increment_stats](
         events,
         results,
@@ -434,5 +450,6 @@ def setup_and_increment_stats(
         weight_map=weight_map,
         group_map=group_map,
         group_combinations=group_combinations,
+        skip_func=skip_func,
         **kwargs,
     )
diff --git a/law.cfg b/law.cfg
index fc6509b6..cba7c5fc 100644
--- a/law.cfg
+++ b/law.cfg
@@ -188,7 +188,9 @@ cf.CreateSyncFile: wlcg
 
 [versions]
 
-# none yet
+# for first plots (13.1.2025)
+22pre_v14__cf.CalibrateEvents: prod3
+22pre_v14__cf.{SelectEvents,MergeSelectionStats,ReduceEvents,MergeReductionStats,ProvideReducedEvents}: prod3
 
 
 [resources]
diff --git a/modules/columnflow b/modules/columnflow
index dd0655af..2a9b095d 160000
--- a/modules/columnflow
+++ b/modules/columnflow
@@ -1 +1 @@
-Subproject commit dd0655afda8756c99bc434a901929f0a028dacf9
+Subproject commit 2a9b095d5bd067d06f3463a08edf1be84026d329
diff --git a/setup.sh b/setup.sh
index 977613d2..0e2d3e14 100644
--- a/setup.sh
+++ b/setup.sh
@@ -27,25 +27,34 @@ setup_hbt() {
     #   HBT_SETUP
     #       A flag that is set to 1 after the setup was successful.
 
+    #
+    # load cf setup helpers
+    #
+
+    local shell_is_zsh="$( [ -z "${ZSH_VERSION}" ] && echo "false" || echo "true" )"
+    local this_file="$( ${shell_is_zsh} && echo "${(%):-%x}" || echo "${BASH_SOURCE[0]}" )"
+    local this_dir="$( cd "$( dirname "${this_file}" )" && pwd )"
+    local cf_base="${this_dir}/modules/columnflow"
+    CF_SKIP_SETUP="true" source "${cf_base}/setup.sh" "" || return "$?"
+
+    #
     # prevent repeated setups
-    if [ "${HBT_SETUP}" = "1" ] && [ "${CF_ON_SLURM}" != "1" ]; then
+    #
+
+    cf_export_bool HBT_SETUP
+    if ${HBT_SETUP} && ! ${CF_ON_SLURM}; then
         >&2 echo "the HH -> bbtautau analysis was already succesfully setup"
         >&2 echo "re-running the setup requires a new shell"
         return "1"
     fi
 
-
     #
     # prepare local variables
     #
 
-    local shell_is_zsh="$( [ -z "${ZSH_VERSION}" ] && echo "false" || echo "true" )"
-    local this_file="$( ${shell_is_zsh} && echo "${(%):-%x}" || echo "${BASH_SOURCE[0]}" )"
-    local this_dir="$( cd "$( dirname "${this_file}" )" && pwd )"
     local orig="${PWD}"
     local setup_name="${1:-default}"
     local setup_is_default="false"
-    local env_is_remote="$( [ "${CF_REMOTE_ENV}" = "1" ] && echo "true" || echo "false" )"
     [ "${setup_name}" = "default" ] && setup_is_default="true"
 
     # zsh options
@@ -54,7 +63,6 @@ setup_hbt() {
         setopt globdots
     fi
 
-
     #
     # global variables
     # (HBT = hh2bbtautau, CF = columnflow)
@@ -62,18 +70,15 @@ setup_hbt() {
 
     # start exporting variables
     export HBT_BASE="${this_dir}"
-    export CF_BASE="${this_dir}/modules/columnflow"
+    export CF_BASE="${cf_base}"
     export CF_REPO_BASE="${HBT_BASE}"
     export CF_REPO_BASE_ALIAS="HBT_BASE"
     export CF_SETUP_NAME="${setup_name}"
     export CF_SCHEDULER_HOST="${CF_SCHEDULER_HOST:-naf-cms14.desy.de}"
     export CF_SCHEDULER_PORT="${CF_SCHEDULER_PORT:-8088}"
 
-    # load cf setup helpers
-    CF_SKIP_SETUP="1" source "${CF_BASE}/setup.sh" "" || return "$?"
-
     # interactive setup
-    if ! ${env_is_remote}; then
+    if ! ${CF_REMOTE_ENV}; then
         cf_setup_interactive_body() {
             # the flavor will be cms
             export CF_FLAVOR="cms"
@@ -91,14 +96,12 @@ setup_hbt() {
     export CF_VENV_BASE="${CF_VENV_BASE:-${CF_SOFTWARE_BASE}/venvs}"
     export CF_CMSSW_BASE="${CF_CMSSW_BASE:-${CF_SOFTWARE_BASE}/cmssw}"
 
-
     #
     # common variables
     #
 
     cf_setup_common_variables || return "$?"
 
-
     #
     # minimal local software setup
     #
@@ -110,23 +113,21 @@ setup_hbt() {
     export PYTHONPATH="${HBT_BASE}:${HBT_BASE}/modules/cmsdb:${PYTHONPATH}"
 
     # initialze submodules
-    if ! ${env_is_remote} && [ -e "${HBT_BASE}/.git" ]; then
+    if ! ${CF_REMOTE_ENV} && [ -e "${HBT_BASE}/.git" ]; then
         local m
         for m in $( ls -1q "${HBT_BASE}/modules" ); do
             cf_init_submodule "${HBT_BASE}" "modules/${m}"
         done
     fi
 
-
     #
     # git hooks
     #
 
-    if [ "${CF_LOCAL_ENV}" = "1" ]; then
+    if ${CF_LOCAL_ENV}; then
         cf_setup_git_hooks || return "$?"
     fi
 
-
     #
     # law setup
     #
@@ -135,7 +136,7 @@ setup_hbt() {
     export LAW_CONFIG_FILE="${LAW_CONFIG_FILE:-${HBT_BASE}/law.cfg}"
 
     # run the indexing when not remote
-    if ! ${env_is_remote} && which law &> /dev/null; then
+    if ! ${CF_REMOTE_ENV} && which law &> /dev/null; then
         # source law's bash completion scipt
         source "$( law completion )" ""
 
@@ -148,12 +149,12 @@ setup_hbt() {
 
     # update the law config file to switch from mirrored to bare wlcg targets
     # as local mounts are typically not available remotely
-    if ${env_is_remote}; then
+    if ${CF_REMOTE_ENV}; then
         sed -i -r 's/(.+\: ?)wlcg_mirrored, local_.+, ?(wlcg_[^\s]+)/\1wlcg, \2/g' "${LAW_CONFIG_FILE}"
     fi
 
     # finalize
-    export HBT_SETUP="1"
+    export HBT_SETUP="true"
 }
 
 main() {
@@ -171,6 +172,6 @@ main() {
 }
 
 # entry point
-if [ "${HBT_SKIP_SETUP}" != "1" ]; then
+if [ "${HBT_SKIP_SETUP}" != "true" ]; then
     main "$@"
 fi

From f30eb56c6a79fd115d64192a8081d661b94cf75b Mon Sep 17 00:00:00 2001
From: "Marcel R." <github.riga@icloud.com>
Date: Mon, 13 Jan 2025 17:57:19 +0100
Subject: [PATCH 16/20] Fix hhbtag score handling (@taaaeen).

---
 hbt/production/hhbtag.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hbt/production/hhbtag.py b/hbt/production/hhbtag.py
index 5e08ff73..cdf4ba8b 100644
--- a/hbt/production/hhbtag.py
+++ b/hbt/production/hhbtag.py
@@ -99,11 +99,11 @@ def split(where):
     even_mask = ak.to_numpy((events[event_mask].event % 2) == 0)
     if ak.sum(even_mask):
         input_features_even = split(even_mask)
-        scores_even = self.hhbtag_model_even(input_features_even)[0].numpy()
+        scores_even = self.hhbtag_model_even(input_features_even).numpy()
         scores[even_mask] = scores_even
     if ak.sum(~even_mask):
         input_features_odd = split(~even_mask)
-        scores_odd = self.hhbtag_model_odd(input_features_odd)[0].numpy()
+        scores_odd = self.hhbtag_model_odd(input_features_odd).numpy()
         scores[~even_mask] = scores_odd
 
     # remove the scores of padded jets

From a0efe7e08cd7cb2e7936a4c17649173b8824c68d Mon Sep 17 00:00:00 2001
From: "Marcel R." <github.riga@icloud.com>
Date: Mon, 13 Jan 2025 19:34:24 +0100
Subject: [PATCH 17/20] Update cf.

---
 modules/columnflow | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/columnflow b/modules/columnflow
index 2a9b095d..693f14d6 160000
--- a/modules/columnflow
+++ b/modules/columnflow
@@ -1 +1 @@
-Subproject commit 2a9b095d5bd067d06f3463a08edf1be84026d329
+Subproject commit 693f14d6b188ae42197a824b39548b50a624d667

From dd45b7c5c0b61b31f1b0e6480898bc4606d43c5f Mon Sep 17 00:00:00 2001
From: "Marcel R." <github.riga@icloud.com>
Date: Tue, 14 Jan 2025 08:33:43 +0100
Subject: [PATCH 18/20] Update plots.

---
 hbt/config/categories.py       |  22 ++---
 hbt/config/configs_hbt.py      |  14 ++--
 hbt/config/variables.py        | 141 ++++++++++++++++++++++++---------
 hbt/production/res_networks.py |   8 +-
 modules/columnflow             |   2 +-
 5 files changed, 130 insertions(+), 57 deletions(-)

diff --git a/hbt/config/categories.py b/hbt/config/categories.py
index 7f69a7bf..453f2337 100644
--- a/hbt/config/categories.py
+++ b/hbt/config/categories.py
@@ -16,18 +16,18 @@ def add_categories(config: od.Config) -> None:
     Adds all categories to a *config*.
     """
     # lepton channels
-    add_category(config, name="etau", id=1, selection="cat_etau", label=r"$e\tau_{h}$")
-    add_category(config, name="mutau", id=2, selection="cat_mutau", label=r"$\mu\tau_{h}$")
-    add_category(config, name="tautau", id=3, selection="cat_tautau", label=r"$\tau_{h}\tau_{h}$")
-    add_category(config, name="ee", id=4, selection="cat_ee", label=r"$ee$")
-    add_category(config, name="mumu", id=5, selection="cat_mumu", label=r"$\mu\mu$")
-    add_category(config, name="emu", id=6, selection="cat_emu", label=r"$e\mu$")
+    add_category(config, name="etau", id=1, selection="cat_etau", label=config.channels.n.etau.label)
+    add_category(config, name="mutau", id=2, selection="cat_mutau", label=config.channels.n.mutau.label)
+    add_category(config, name="tautau", id=3, selection="cat_tautau", label=config.channels.n.tautau.label)
+    add_category(config, name="ee", id=4, selection="cat_ee", label=config.channels.n.ee.label)
+    add_category(config, name="mumu", id=5, selection="cat_mumu", label=config.channels.n.mumu.label)
+    add_category(config, name="emu", id=6, selection="cat_emu", label=config.channels.n.emu.label)
 
     # qcd regions
-    add_category(config, name="os", id=10, selection="cat_os", label="Opposite sign", tags={"os"})
-    add_category(config, name="ss", id=11, selection="cat_ss", label="Same sign", tags={"ss"})
-    add_category(config, name="iso", id=12, selection="cat_iso", label=r"$\tau_{h,2} isolated$", tags={"iso"})
-    add_category(config, name="noniso", id=13, selection="cat_noniso", label=r"$\tau_{h,2} non-isolated$", tags={"noniso"})  # noqa: E501
+    add_category(config, name="os", id=10, selection="cat_os", label="OS", tags={"os"})
+    add_category(config, name="ss", id=11, selection="cat_ss", label="SS", tags={"ss"})
+    add_category(config, name="iso", id=12, selection="cat_iso", label=r"$\tau_{h,2} iso$", tags={"iso"})
+    add_category(config, name="noniso", id=13, selection="cat_noniso", label=r"$\tau_{h,2} non-iso$", tags={"noniso"})  # noqa: E501
 
     # kinematic categories
     add_category(config, name="incl", id=100, selection="cat_incl", label="inclusive")
@@ -57,6 +57,8 @@ def kwargs_fn(categories, add_qcd_group=True):
             "tags": set.union(*[cat.tags for cat in categories.values() if cat]),
             # auxiliary information
             "aux": aux,
+            # label
+            "label": [cat.label or name for name, cat in categories.items()] or None,
         }
 
     # main analysis categories
diff --git a/hbt/config/configs_hbt.py b/hbt/config/configs_hbt.py
index d329f105..aa9f174e 100644
--- a/hbt/config/configs_hbt.py
+++ b/hbt/config/configs_hbt.py
@@ -521,6 +521,8 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
     cfg.x.custom_style_config_groups = {
         "small_legend": {
             "legend_cfg": {"ncols": 2, "fontsize": 16, "columnspacing": 0.6},
+            "annotate_cfg": {"fontsize": 18, "style": "italic"},
+            "cms_label": "wip",
         },
     }
     cfg.x.default_custom_style_config = "small_legend"
@@ -1239,12 +1241,12 @@ def add_external(name, value):
     ################################################################################################
 
     # channels
-    cfg.add_channel(name="etau", id=1)
-    cfg.add_channel(name="mutau", id=2)
-    cfg.add_channel(name="tautau", id=3)
-    cfg.add_channel(name="ee", id=4)
-    cfg.add_channel(name="mumu", id=5)
-    cfg.add_channel(name="emu", id=6)
+    cfg.add_channel(name="etau", id=1, label=r"$e\tau_{h}$")
+    cfg.add_channel(name="mutau", id=2, label=r"$\mu\tau_{h}$")
+    cfg.add_channel(name="tautau", id=3, label=r"$\tau_{h}\tau_{h}$")
+    cfg.add_channel(name="ee", id=4, label=r"$ee$")
+    cfg.add_channel(name="mumu", id=5, label=r"$\mu\mu$")
+    cfg.add_channel(name="emu", id=6, label=r"$e\mu$")
 
     # add categories
     from hbt.config.categories import add_categories
diff --git a/hbt/config/variables.py b/hbt/config/variables.py
index d3ec4994..681dced4 100644
--- a/hbt/config/variables.py
+++ b/hbt/config/variables.py
@@ -13,48 +13,55 @@ def add_variables(config: od.Config) -> None:
     """
     Adds all variables to a *config*.
     """
-    config.add_variable(
+    add_variable(
+        config,
         name="event",
         expression="event",
         binning=(1, 0.0, 1.0e9),
         x_title="Event number",
         discrete_x=True,
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="run",
         expression="run",
         binning=(1, 100000.0, 500000.0),
         x_title="Run number",
         discrete_x=True,
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="lumi",
         expression="luminosityBlock",
         binning=(1, 0.0, 5000.0),
         x_title="Luminosity block",
         discrete_x=True,
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="n_jet",
         expression="n_jet",
         binning=(11, -0.5, 10.5),
         x_title="Number of jets",
         discrete_x=True,
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="n_hhbtag",
         expression="n_hhbtag",
         binning=(4, -0.5, 3.5),
         x_title="Number of HH b-tags",
         discrete_x=True,
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="ht",
         binning=[0, 80, 120, 160, 200, 240, 280, 320, 400, 500, 600, 800],
         unit="GeV",
         x_title="HT",
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="jet_pt",
         expression="Jet.pt",
         null_value=EMPTY_FLOAT,
@@ -62,7 +69,8 @@ def add_variables(config: od.Config) -> None:
         unit="GeV",
         x_title=r"all Jet $p_{T}$",
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="jet1_pt",
         expression="Jet.pt[:,0]",
         null_value=EMPTY_FLOAT,
@@ -70,14 +78,16 @@ def add_variables(config: od.Config) -> None:
         unit="GeV",
         x_title=r"Jet 1 $p_{T}$",
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="jet1_eta",
         expression="Jet.eta[:,0]",
         null_value=EMPTY_FLOAT,
         binning=(30, -3.0, 3.0),
         x_title=r"Jet 1 $\eta$",
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="jet2_pt",
         expression="Jet.pt[:,1]",
         null_value=EMPTY_FLOAT,
@@ -85,21 +95,24 @@ def add_variables(config: od.Config) -> None:
         unit="GeV",
         x_title=r"Jet 2 $p_{T}$",
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="met_phi",
         expression="PuppiMET.phi",
         null_value=EMPTY_FLOAT,
         binning=(33, -3.3, 3.3),
         x_title=r"MET $\phi$",
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="e1_pt",
         expression="Electron.pt[:, 0]",
         null_value=EMPTY_FLOAT,
         binning=(40, 0, 400),
         x_title=r"Leading electron p$_{T}$",
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="mu1_pt",
         expression="Muon.pt[:,0]",
         null_value=EMPTY_FLOAT,
@@ -108,37 +121,43 @@ def add_variables(config: od.Config) -> None:
     )
 
     # weights
-    config.add_variable(
+    add_variable(
+        config,
         name="mc_weight",
         expression="mc_weight",
         binning=(200, -10, 10),
         x_title="MC weight",
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="pu_weight",
         expression="pu_weight",
         binning=(40, 0, 2),
         x_title="Pileup weight",
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="normalized_pu_weight",
         expression="normalized_pu_weight",
         binning=(40, 0, 2),
         x_title="Normalized pileup weight",
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="btag_weight",
         expression="btag_weight",
         binning=(60, 0, 3),
         x_title="b-tag weight",
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="normalized_btag_weight",
         expression="normalized_btag_weight",
         binning=(60, 0, 3),
         x_title="Normalized b-tag weight",
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="normalized_njet_btag_weight",
         expression="normalized_njet_btag_weight",
         binning=(60, 0, 3),
@@ -146,40 +165,46 @@ def add_variables(config: od.Config) -> None:
     )
 
     # cutflow variables
-    config.add_variable(
+    add_variable(
+        config,
         name="cf_njet",
         expression="cutflow.n_jet",
         binning=(17, -0.5, 16.5),
         x_title="Jet multiplicity",
         discrete_x=True,
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="cf_ht",
         expression="cutflow.ht",
         binning=(40, 0.0, 400.0),
         unit="GeV",
         x_title=r"$H_{T}$",
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="cf_jet1_pt",
         expression="cutflow.jet1_pt",
         binning=(40, 0.0, 400.0),
         unit="GeV",
         x_title=r"Jet 1 $p_{T}$",
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="cf_jet1_eta",
         expression="cutflow.jet1_eta",
         binning=(40, -5.0, 5.0),
         x_title=r"Jet 1 $\eta$",
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="cf_jet1_phi",
         expression="cutflow.jet1_phi",
         binning=(32, -3.2, 3.2),
         x_title=r"Jet 1 $\phi$",
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="cf_jet2_pt",
         expression="cutflow.jet2_pt",
         binning=(40, 0.0, 400.0),
@@ -188,21 +213,24 @@ def add_variables(config: od.Config) -> None:
     )
 
     # variables of interest
-    config.add_variable(
+    add_variable(
+        config,
         name="hh_mass",
         expression="hh.mass",
         binning=(20, 250, 750.0),
         unit="GeV",
         x_title=r"$m_{hh}$",
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="hh_pt",
         expression="hh.pt",
         binning=(100, 0, 500.0),
         unit="GeV",
         x_title=r"$p_T$",
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="hh_eta",
         expression="hh.eta",
         binning=(100, -3.0, 3.0),
@@ -210,21 +238,24 @@ def add_variables(config: od.Config) -> None:
         x_title=r"$\eta$",
     )
 
-    config.add_variable(
+    add_variable(
+        config,
         name="ditau_mass",
         expression="diTau.mass",
         binning=(20, 50, 200.0),
         unit="GeV",
         x_title=r"$m_{\tau\tau}$",
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="ditau_pt",
         expression="diTau.pt",
         binning=(100, 0, 500.0),
         unit="GeV",
         x_title=r"$p_T$",
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="ditau_eta",
         expression="diTau.eta",
         binning=(100, -3.0, 3.0),
@@ -232,21 +263,24 @@ def add_variables(config: od.Config) -> None:
         x_title=r"$\eta$",
     )
 
-    config.add_variable(
+    add_variable(
+        config,
         name="dibjet_mass",
         expression="diBJet.mass",
         binning=(20, 0, 500.0),
         unit="GeV",
         x_title=r"$m_{bb}$",
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="dibjet_pt",
         expression="diBJet.pt",
         binning=(100, 0, 500.0),
         unit="GeV",
         x_title=r"$p_T$",
     )
-    config.add_variable(
+    add_variable(
+        config,
         name="dibjet_eta",
         expression="diBJet.eta",
         binning=(100, -3.0, 3.0),
@@ -254,9 +288,26 @@ def add_variables(config: od.Config) -> None:
         x_title=r"$\eta$",
     )
 
+    def dilep_mass_test(events):
+        import awkward as ak
+        leps = ak.concatenate([events.Electron * 1, events.Muon * 1, events.Tau * 1], axis=1)[:, :2]
+        dilep = leps.sum(axis=1)
+        return dilep.mass
+
+    add_variable(
+        config,
+        name="dilep_mass",
+        expression=dilep_mass_test,
+        aux={"inputs": ["{Electron,Muon,Tau}.{pt,eta,phi,mass}"]},
+        binning=(40, 40, 120),
+        unit="GeV",
+        x_title=r"$m_{ll}$",
+    )
+
     for proc in ["hh", "tt", "dy"]:
         # outputs of the resonant pDNN at SM-like mass and spin values
-        config.add_variable(
+        add_variable(
+            config,
             name=f"res_pdnn_{proc}",
             expression=f"res_pdnn_s0_m500_{proc}",
             binning=(25, 0.0, 1.0),
@@ -264,16 +315,32 @@ def add_variables(config: od.Config) -> None:
         )
 
         # outputs of the resonant DNN trained over flat masses
-        config.add_variable(
+        add_variable(
+            config,
             name=f"res_dnn_{proc}",
             expression=f"res_dnn_{proc}",
             binning=(25, 0.0, 1.0),
             x_title=rf"{proc.upper()} output node, res. DNN",
         )
 
-        config.add_variable(
+        add_variable(
+            config,
             name=f"res_dnn_{proc}_fine",
             expression=f"res_dnn_{proc}",
             binning=(5000, 0.0, 1.0),
             x_title=rf"{proc.upper()} output node, res. DNN",
         )
+
+
+# helper to add a variable to the config with some defaults
+def add_variable(config: od.Config, *args, **kwargs) -> od.Variable:
+    # create the variable
+    variable = config.add_variable(*args, **kwargs)
+
+    # defaults
+    if not variable.has_aux("underflow"):
+        variable.x.underflow = True
+    if not variable.has_aux("overflow"):
+        variable.x.overflow = True
+
+    return variable
diff --git a/hbt/production/res_networks.py b/hbt/production/res_networks.py
index d7484cef..bddd0a41 100644
--- a/hbt/production/res_networks.py
+++ b/hbt/production/res_networks.py
@@ -12,8 +12,10 @@
 import law
 
 from columnflow.production import Producer, producer
-from columnflow.production.util import attach_coffea_behavior, default_collections
-from columnflow.columnar_util import set_ak_column, attach_behavior, flat_np_view, EMPTY_FLOAT
+from columnflow.production.util import attach_coffea_behavior
+from columnflow.columnar_util import (
+    set_ak_column, attach_behavior, flat_np_view, EMPTY_FLOAT, default_coffea_collections,
+)
 from columnflow.util import maybe_import, dev_sandbox, InsertableDict, DotDict
 
 np = maybe_import("numpy")
@@ -68,7 +70,7 @@ def _res_dnn_evaluation(
     # ensure coffea behavior
     events = self[attach_coffea_behavior](
         events,
-        collections={"HHBJet": default_collections["Jet"]},
+        collections={"HHBJet": default_coffea_collections["Jet"]},
         **kwargs,
     )
 
diff --git a/modules/columnflow b/modules/columnflow
index 693f14d6..1b01b794 160000
--- a/modules/columnflow
+++ b/modules/columnflow
@@ -1 +1 @@
-Subproject commit 693f14d6b188ae42197a824b39548b50a624d667
+Subproject commit 1b01b7948c960474e1fff52c660d03960e6da83a

From 92d00546a816b4fd4eca3220fb9b62e80ddbc4c5 Mon Sep 17 00:00:00 2001
From: "Marcel R." <github.riga@icloud.com>
Date: Tue, 14 Jan 2025 17:55:46 +0100
Subject: [PATCH 19/20] Make plot style CMS compatible.

---
 hbt/config/categories.py  |  6 ++++-
 hbt/config/configs_hbt.py | 48 ++++++++++++++++++++++++++-----------
 hbt/config/styles.py      | 50 +++++++++++++++++++++++++++++----------
 hbt/selection/jet.py      | 15 ++++++++----
 modules/columnflow        |  2 +-
 5 files changed, 89 insertions(+), 32 deletions(-)

diff --git a/hbt/config/categories.py b/hbt/config/categories.py
index 453f2337..0d8aa4ad 100644
--- a/hbt/config/categories.py
+++ b/hbt/config/categories.py
@@ -58,7 +58,11 @@ def kwargs_fn(categories, add_qcd_group=True):
             # auxiliary information
             "aux": aux,
             # label
-            "label": [cat.label or name for name, cat in categories.items()] or None,
+            "label": [
+                cat.label or cat.name
+                for cat in categories.values()
+                if cat.name != "os"  # os is the default
+            ] or None,
         }
 
     # main analysis categories
diff --git a/hbt/config/configs_hbt.py b/hbt/config/configs_hbt.py
index aa9f174e..4132e8ab 100644
--- a/hbt/config/configs_hbt.py
+++ b/hbt/config/configs_hbt.py
@@ -103,6 +103,12 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
             label="Multiboson",
             processes=[procs.n.vv, procs.n.vvv],
         )
+        cfg.add_process(
+            name="all_v",
+            id=7996,
+            label="Multiboson",
+            processes=[cfg.processes.n.v, cfg.processes.n.multiboson],
+        )
         cfg.add_process(
             name="tt_multiboson",
             id=7999,
@@ -173,7 +179,7 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
         cfg.add_process(proc)
 
     # configure colors, labels, etc
-    from hbt.config.styles import stylize_processes
+    from hbt.config.styles import stylize_processes, update_legend_labels
     stylize_processes(cfg)
 
     ################################################################################################
@@ -430,14 +436,13 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
             "hh_ggf_hbb_htt_kl5_kt1",
         ],
         "backgrounds": (backgrounds := [
-            "h",
-            "tt",
             "dy",
+            "tt",
             "qcd",
             "st",
-            "v",
-            "multiboson",
             "tt_multiboson",
+            "all_v",
+            "h",
             "ewk",
         ]),
         "dy_split": [
@@ -493,9 +498,17 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
     # dataset groups for conveniently looping over certain datasets
     # (used in wrapper_factory and during plotting)
     cfg.x.dataset_groups = {
+        "data": (data_group := [dataset.name for dataset in cfg.datasets if dataset.is_data]),
+        "backgrounds": (backgrounds := [
+            dataset.name for dataset in cfg.datasets
+            if dataset.is_mc and not dataset.has_tag("signal")
+        ]),
+        "sm_ggf": (sm_ggf_group := ["hh_ggf_hbb_htt_kl1_kt1_powheg", *backgrounds]),
+        "sm": (sm_group := ["hh_ggf_hbb_htt_kl1_kt1_powheg", "hh_vbf_hbb_htt_kv1_k2v1_kl1_madgraph", *backgrounds]),
+        "sm_ggf_data": data_group + sm_ggf_group,
+        "sm_data": data_group + sm_group,
         "dy": [dataset.name for dataset in cfg.datasets if dataset.has_tag("dy")],
         "w_lnu": [dataset.name for dataset in cfg.datasets if dataset.has_tag("w_lnu")],
-        # TODO: resonant (mostly for excluding them)
     }
 
     # category groups for conveniently looping over certain categories
@@ -513,15 +526,18 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
     # selector step groups for conveniently looping over certain steps
     # (used in cutflow tasks)
     cfg.x.selector_step_groups = {
-        "default": ["json", "trigger", "met_filter", "jet_veto_map", "lepton", "jet", "bjet"],
+        "default": ["json", "trigger", "met_filter", "jet_veto_map", "lepton", "jet2", "bjet"],
     }
     cfg.x.default_selector_steps = "default"
 
     # plotting overwrites
     cfg.x.custom_style_config_groups = {
         "small_legend": {
-            "legend_cfg": {"ncols": 2, "fontsize": 16, "columnspacing": 0.6},
-            "annotate_cfg": {"fontsize": 18, "style": "italic"},
+            "legend_cfg": {
+                "ncols": 2, "fontsize": 16, "columnspacing": 0.6, "labelspacing": 0.275,
+                "update_handles_labels": update_legend_labels,
+            },
+            "annotate_cfg": {"fontsize": 18},
             "cms_label": "wip",
         },
     }
@@ -1307,14 +1323,18 @@ def get_dataset_lfns(
             path = f"store/{dataset_inst.data_source}/{main_campaign}/{dataset_id}/{tier}/{sub_campaign}/0"
 
             # create the lfn base directory, local or remote
-            dir_cls = law.LocalDirectoryTarget
-            fs = f"local_fs_{cfg.campaign.x.custom['name']}"
-            if not law.config.has_section(fs):
-                dir_cls = law.wlcg.WLCGDirectoryTarget
-                fs = f"wlcg_fs_{cfg.campaign.x.custom['name']}"
+            dir_cls = law.wlcg.WLCGDirectoryTarget
+            fs = f"wlcg_fs_{cfg.campaign.x.custom['name']}"
+            local_fs = f"local_fs_{cfg.campaign.x.custom['name']}"
+            if law.config.has_section(local_fs):
+                base = law.target.file.remove_scheme(law.config.get_expanded(local_fs, "base"))
+                if os.path.exists(base):
+                    dir_cls = law.LocalDirectoryTarget
+                    fs = local_fs
             lfn_base = dir_cls(path, fs=fs)
 
             # loop though files and interpret paths as lfns
+            print(lfn_base)
             return sorted(
                 "/" + lfn_base.child(basename, type="f").path.lstrip("/")
                 for basename in lfn_base.listdir(pattern="*.root")
diff --git a/hbt/config/styles.py b/hbt/config/styles.py
index bc04cd7f..c1edfa7a 100644
--- a/hbt/config/styles.py
+++ b/hbt/config/styles.py
@@ -16,6 +16,7 @@ def stylize_processes(config: od.Config) -> None:
     cfg = config
 
     # recommended cms colors
+    # see https://cms-analysis.docs.cern.ch/guidelines/plotting/colors
     cfg.x.colors = DotDict(
         bright_blue="#3f90da",
         dark_blue="#011c87",
@@ -32,7 +33,7 @@ def stylize_processes(config: od.Config) -> None:
 
     for kl in ["0", "1", "2p45", "5"]:
         if (p := config.get_process(f"hh_ggf_hbb_htt_kl{kl}_kt1", default=None)):
-            p.color1 = cfg.x.colors.bright_blue
+            p.color1 = cfg.x.colors.dark_blue
             p.label = (
                 r"$HH_{ggf} \rightarrow bb\tau\tau$ __SCALE__"
                 "\n"
@@ -40,7 +41,7 @@ def stylize_processes(config: od.Config) -> None:
             )
 
     if (p := config.get_process("hh_vbf_hbb_htt_kv1_k2v1_kl1", default=None)):
-        p.color1 = cfg.x.colors.dark_blue
+        p.color1 = cfg.x.colors.brown
         p.label = (
             r"$HH_{vbf} \rightarrow bb\tau\tau$ __SCALE__"
             "\n"
@@ -48,40 +49,43 @@ def stylize_processes(config: od.Config) -> None:
         )
 
     if (p := config.get_process("h", default=None)):
-        p.color1 = cfg.x.colors.purple
+        p.color1 = cfg.x.colors.teal
 
     if (p := config.get_process("tt", default=None)):
         p.color1 = cfg.x.colors.bright_orange
         p.label = r"$t\bar{t}$"
 
     if (p := config.get_process("st", default=None)):
-        p.color1 = cfg.x.colors.aubergine
+        p.color1 = cfg.x.colors.purple
 
     if (p := config.get_process("dy", default=None)):
-        p.color1 = cfg.x.colors.dark_orange
+        p.color1 = cfg.x.colors.bright_blue
 
     if (p := config.get_process("vv", default=None)):
-        p.color1 = cfg.x.colors.yellow
+        p.color1 = cfg.x.colors.aubergine
 
     if (p := config.get_process("vvv", default=None)):
-        p.color1 = cfg.x.colors.yellow
+        p.color1 = cfg.x.colors.aubergine
 
     if (p := config.get_process("multiboson", default=None)):
-        p.color1 = cfg.x.colors.yellow
+        p.color1 = cfg.x.colors.aubergine
 
     if (p := config.get_process("w", default=None)):
-        p.color1 = cfg.x.colors.teal
+        p.color1 = cfg.x.colors.aubergine
         p.label = "W"
 
     if (p := config.get_process("z", default=None)):
-        p.color1 = cfg.x.colors.brown
+        p.color1 = cfg.x.colors.aubergine
         p.label = "Z"
 
     if (p := config.get_process("v", default=None)):
-        p.color1 = cfg.x.colors.teal
+        p.color1 = cfg.x.colors.aubergine
+
+    if (p := config.get_process("all_v", default=None)):
+        p.color1 = cfg.x.colors.aubergine
 
     if (p := config.get_process("ewk", default=None)):
-        p.color1 = cfg.x.colors.brown
+        p.color1 = cfg.x.colors.dark_orange
 
     if (p := config.get_process("ttv", default=None)):
         p.color1 = cfg.x.colors.grey
@@ -96,3 +100,25 @@ def stylize_processes(config: od.Config) -> None:
 
     if (p := config.get_process("qcd", default=None)):
         p.color1 = cfg.x.colors.red
+
+
+def update_legend_labels(ax, handles, labels, n_cols):
+    """
+    Fill empty handles so that all backgrounds are in the first column.
+    """
+    # only implemented for 2 columns so far
+    if n_cols != 2:
+        return
+    # get number of entries per column
+    n_backgrounds = sum(1 for handle in handles if handle.__class__.__name__ == "StepPatch")
+    n_other = len(handles) - n_backgrounds
+    if n_backgrounds == n_other:
+        return
+
+    # fill left or right column
+    n_add = abs(n_backgrounds - n_other)
+    pos = len(handles) if n_backgrounds > n_other else n_backgrounds
+    empty_handle = ax.plot([], label="", linestyle="None")[0]
+    for _ in range(n_add):
+        handles.insert(pos, empty_handle)
+        labels.insert(pos, "")
diff --git a/hbt/selection/jet.py b/hbt/selection/jet.py
index 5d91c159..3bde8a30 100644
--- a/hbt/selection/jet.py
+++ b/hbt/selection/jet.py
@@ -268,7 +268,13 @@ def jet_selection(
     )
 
     # final event selection (only looking at number of default jets for now)
-    jet_sel = ak.sum(default_mask, axis=1) >= 2
+    # perform a cut on ≥1 jet and all other cuts first, and then cut on ≥2, resulting in an
+    # additional, _skippable_ step
+    jet_sel = (
+        (ak.sum(default_mask, axis=1) >= 1)
+        # add additional cuts here in the future
+    )
+    jet_sel2 = jet_sel & (ak.sum(default_mask, axis=1) >= 2)
 
     # some final type conversions
     jet_indices = ak.values_astype(ak.fill_none(jet_indices, 0), np.int32)
@@ -284,11 +290,12 @@ def jet_selection(
     result = SelectionResult(
         steps={
             "jet": jet_sel,
+            "jet2": jet_sel2,
             # the btag weight normalization requires a selection with everything but the bjet
             # selection, so add this step here
-            # note: there is currently no b-tag discriminant cut at this point, so take jet_sel
-            "bjet_deepjet": jet_sel,
-            "bjet_pnet": jet_sel,  # no need in run 2
+            # note: there is currently no b-tag discriminant cut at this point, so skip it
+            # "bjet_deepjet": jet_sel,
+            # "bjet_pnet": jet_sel,  # no need in run 2
         },
         objects={
             "Jet": {
diff --git a/modules/columnflow b/modules/columnflow
index 1b01b794..de74536b 160000
--- a/modules/columnflow
+++ b/modules/columnflow
@@ -1 +1 @@
-Subproject commit 1b01b7948c960474e1fff52c660d03960e6da83a
+Subproject commit de74536b1038ce9ae445d108324a709848bbcf23

From e7f46e5773600556ed2caa5b4e7ee032728e3cfd Mon Sep 17 00:00:00 2001
From: "Marcel R." <github.riga@icloud.com>
Date: Wed, 15 Jan 2025 11:15:22 +0100
Subject: [PATCH 20/20] Plot updates.

---
 hbt/calibration/default.py | 63 +++++++++++++++++++++++---------------
 hbt/config/categories.py   |  4 +--
 hbt/config/configs_hbt.py  | 18 +++++++----
 hbt/config/styles.py       | 39 +++++++++++------------
 hbt/config/variables.py    |  4 ++-
 hbt/production/btag.py     | 43 ++++++++++----------------
 modules/cmsdb              |  2 +-
 modules/columnflow         |  2 +-
 8 files changed, 95 insertions(+), 80 deletions(-)

diff --git a/hbt/calibration/default.py b/hbt/calibration/default.py
index c5c9c0bc..d4eb8ab7 100644
--- a/hbt/calibration/default.py
+++ b/hbt/calibration/default.py
@@ -71,30 +71,45 @@ def default_init(self: Calibrator) -> None:
     met_name = self.config_inst.x.met_name
     raw_met_name = self.config_inst.x.raw_met_name
 
-    # derive calibrators to add settings
-    self.jec_full_cls = jec.derive("jec_full", cls_dict={
-        "mc_only": True,
-        "nominal_only": True,
-        "met_name": met_name,
-        "raw_met_name": raw_met_name,
-    })
-    self.jec_nominal_cls = jec_nominal.derive("jec_nominal", cls_dict={
-        "met_name": met_name,
-        "raw_met_name": raw_met_name,
-    })
-
-    # version of jer that uses the first random number from deterministic_seeds
-    self.deterministic_jer_cls = jer.derive("deterministic_jer", cls_dict={
-        "deterministic_seed_index": 0,
-        "met_name": met_name,
-    })
-
-    # derive tec calibrators
-    self.tec_cls = tec.derive("tec", cls_dict={"met_name": met_name})
-    self.tec_nominal_cls = tec_nominal.derive("tec_nominal", cls_dict={"met_name": met_name})
-
-    # derive met_phi calibrator (currently only for run 2)
-    self.met_phi_cls = met_phi.derive("met_phi", cls_dict={"met_name": met_name})
+    # derive calibrators to add settings once
+    flag = f"custom_calibs_registered_{self.cls_name}"
+    if not self.config_inst.x(flag, False):
+        # jec calibrators
+        self.config_inst.x.calib_jec_full_cls = jec.derive("jec_full", cls_dict={
+            "mc_only": True,
+            "nominal_only": True,
+            "met_name": met_name,
+            "raw_met_name": raw_met_name,
+        })
+        self.config_inst.x.calib_jec_nominal_cls = jec_nominal.derive("jec_nominal", cls_dict={
+            "met_name": met_name,
+            "raw_met_name": raw_met_name,
+        })
+        # version of jer that uses the first random number from deterministic_seeds
+        self.config_inst.x.calib_deterministic_jer_cls = jer.derive("deterministic_jer", cls_dict={
+            "deterministic_seed_index": 0,
+            "met_name": met_name,
+        })
+        # derive tec calibrators
+        self.config_inst.x.calib_jec_cls = tec.derive("tec", cls_dict={
+            "met_name": met_name,
+        })
+        self.config_inst.x.calib_jec_cls = tec_nominal.derive("tec_nominal", cls_dict={
+            "met_name": met_name,
+        })
+        # derive met_phi calibrator (currently only used in run 2)
+        self.config_inst.x.calib_met_phi_cls = met_phi.derive("met_phi", cls_dict={
+            "met_name": met_name,
+        })
+        # change the flag
+        self.config_inst.set_aux(flag, True)
+
+    self.jec_full_cls = self.config_inst.x.calib_jec_full_cls
+    self.jec_nominal_cls = self.config_inst.x.calib_jec_nominal_cls
+    self.deterministic_jer_cls = self.config_inst.x.calib_deterministic_jer_cls
+    self.tec_cls = self.config_inst.x.calib_jec_cls
+    self.tec_nominal_cls = self.config_inst.x.calib_jec_cls
+    self.met_phi_cls = self.config_inst.x.calib_met_phi_cls
 
     # collect derived calibrators and add them to the calibrator uses and produces
     derived_calibrators = {
diff --git a/hbt/config/categories.py b/hbt/config/categories.py
index 0d8aa4ad..efd819da 100644
--- a/hbt/config/categories.py
+++ b/hbt/config/categories.py
@@ -58,11 +58,11 @@ def kwargs_fn(categories, add_qcd_group=True):
             # auxiliary information
             "aux": aux,
             # label
-            "label": [
+            "label": ", ".join([
                 cat.label or cat.name
                 for cat in categories.values()
                 if cat.name != "os"  # os is the default
-            ] or None,
+            ]) or None,
         }
 
     # main analysis categories
diff --git a/hbt/config/configs_hbt.py b/hbt/config/configs_hbt.py
index 4132e8ab..37477e49 100644
--- a/hbt/config/configs_hbt.py
+++ b/hbt/config/configs_hbt.py
@@ -179,7 +179,7 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
         cfg.add_process(proc)
 
     # configure colors, labels, etc
-    from hbt.config.styles import stylize_processes, update_legend_labels
+    from hbt.config.styles import stylize_processes
     stylize_processes(cfg)
 
     ################################################################################################
@@ -441,7 +441,8 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
             "qcd",
             "st",
             "tt_multiboson",
-            "all_v",
+            "v",
+            "multiboson",
             "h",
             "ewk",
         ]),
@@ -531,14 +532,19 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]
     cfg.x.default_selector_steps = "default"
 
     # plotting overwrites
+    from hbt.config.styles import legend_entries_per_column
+    cfg.x.default_general_settings = {
+        "cms_label": "wip",
+        "whitespace_fraction": 0.31,
+    }
     cfg.x.custom_style_config_groups = {
         "small_legend": {
             "legend_cfg": {
-                "ncols": 2, "fontsize": 16, "columnspacing": 0.6, "labelspacing": 0.275,
-                "update_handles_labels": update_legend_labels,
+                "ncols": 3, "borderpad": 0.7, "loc": "upper left", "fontsize": 16,
+                "columnspacing": 1.6, "labelspacing": 0.28,
+                "entries_per_column": legend_entries_per_column,
             },
-            "annotate_cfg": {"fontsize": 18},
-            "cms_label": "wip",
+            "annotate_cfg": {"fontsize": 16, "xycoords": "axes fraction", "xy": (0.035, 0.73), "style": "italic"},
         },
     }
     cfg.x.default_custom_style_config = "small_legend"
diff --git a/hbt/config/styles.py b/hbt/config/styles.py
index c1edfa7a..6b79221a 100644
--- a/hbt/config/styles.py
+++ b/hbt/config/styles.py
@@ -62,13 +62,13 @@ def stylize_processes(config: od.Config) -> None:
         p.color1 = cfg.x.colors.bright_blue
 
     if (p := config.get_process("vv", default=None)):
-        p.color1 = cfg.x.colors.aubergine
+        p.color1 = cfg.x.colors.yellow
 
     if (p := config.get_process("vvv", default=None)):
-        p.color1 = cfg.x.colors.aubergine
+        p.color1 = cfg.x.colors.yellow
 
     if (p := config.get_process("multiboson", default=None)):
-        p.color1 = cfg.x.colors.aubergine
+        p.color1 = cfg.x.colors.yellow
 
     if (p := config.get_process("w", default=None)):
         p.color1 = cfg.x.colors.aubergine
@@ -102,23 +102,24 @@ def stylize_processes(config: od.Config) -> None:
         p.color1 = cfg.x.colors.red
 
 
-def update_legend_labels(ax, handles, labels, n_cols):
+def legend_entries_per_column(ax, handles: list, labels: list, n_cols: int) -> list[int]:
     """
-    Fill empty handles so that all backgrounds are in the first column.
+    Control  number of entries such that backgrounds are in the first n - 1 columns, and everything
+    else in the last one.
     """
-    # only implemented for 2 columns so far
-    if n_cols != 2:
-        return
-    # get number of entries per column
+    # get number of background and remaining entries
     n_backgrounds = sum(1 for handle in handles if handle.__class__.__name__ == "StepPatch")
     n_other = len(handles) - n_backgrounds
-    if n_backgrounds == n_other:
-        return
-
-    # fill left or right column
-    n_add = abs(n_backgrounds - n_other)
-    pos = len(handles) if n_backgrounds > n_other else n_backgrounds
-    empty_handle = ax.plot([], label="", linestyle="None")[0]
-    for _ in range(n_add):
-        handles.insert(pos, empty_handle)
-        labels.insert(pos, "")
+
+    # fill number of entries per column
+    entries_per_col = n_cols * [0]
+    n_bkg_cols = n_cols
+    # set last column if non-backgrounds are present
+    if n_other:
+        entries_per_col[-1] = n_other
+        n_bkg_cols -= 1
+    # fill background columns
+    for i in range(n_bkg_cols):
+        entries_per_col[i] = n_backgrounds // n_bkg_cols + (n_backgrounds % n_bkg_cols > i)
+
+    return entries_per_col
diff --git a/hbt/config/variables.py b/hbt/config/variables.py
index 681dced4..2f746224 100644
--- a/hbt/config/variables.py
+++ b/hbt/config/variables.py
@@ -298,7 +298,9 @@ def dilep_mass_test(events):
         config,
         name="dilep_mass",
         expression=dilep_mass_test,
-        aux={"inputs": ["{Electron,Muon,Tau}.{pt,eta,phi,mass}"]},
+        aux={
+            "inputs": ["{Electron,Muon,Tau}.{pt,eta,phi,mass}"],
+        },
         binning=(40, 40, 120),
         unit="GeV",
         x_title=r"$m_{ll}$",
diff --git a/hbt/production/btag.py b/hbt/production/btag.py
index 168ddaa6..14edd9b8 100644
--- a/hbt/production/btag.py
+++ b/hbt/production/btag.py
@@ -47,29 +47,23 @@ def _normalized_btag_weights(self: Producer, events: ak.Array, **kwargs) -> ak.A
         if not weight_name.startswith(self.weight_name):
             continue
 
-        # BUG in prod3: some stats fields were missing so skip them for now
-        # # create a weight vectors starting with ones for both weight variations, i.e.,
-        # # nomalization per pid and normalization per pid and jet multiplicity
-        # norm_weight_per_pid = np.ones(len(events), dtype=np.float32)
-        # norm_weight_per_pid_njet = np.ones(len(events), dtype=np.float32)
-
-        # # fill weights with a new mask per unique process id (mostly just one)
-        # for pid in self.unique_process_ids:
-        #     pid_mask = events.process_id == pid
-        #     # single value
-        #     norm_weight_per_pid[pid_mask] = self.ratio_per_pid[weight_name][pid]
-        #     # lookup table
-        #     n_jets = ak.to_numpy(ak.num(events[pid_mask].Jet.pt, axis=1))
-        #     norm_weight_per_pid_njet[pid_mask] = self.ratio_per_pid_njet[weight_name][pid][n_jets]
-
-        # # multiply with actual weight
-        # norm_weight_per_pid = norm_weight_per_pid * events[weight_name]
-        # norm_weight_per_pid_njet = norm_weight_per_pid_njet * events[weight_name]
-
-        # fake values
-        from columnflow.columnar_util import full_like
-        norm_weight_per_pid = full_like(events.event, 1.0, dtype=np.float32)
-        norm_weight_per_pid_njet = norm_weight_per_pid
+        # create a weight vectors starting with ones for both weight variations, i.e.,
+        # nomalization per pid and normalization per pid and jet multiplicity
+        norm_weight_per_pid = np.ones(len(events), dtype=np.float32)
+        norm_weight_per_pid_njet = np.ones(len(events), dtype=np.float32)
+
+        # fill weights with a new mask per unique process id (mostly just one)
+        for pid in self.unique_process_ids:
+            pid_mask = events.process_id == pid
+            # single value
+            norm_weight_per_pid[pid_mask] = self.ratio_per_pid[weight_name][pid]
+            # lookup table
+            n_jets = ak.to_numpy(ak.num(events[pid_mask].Jet.pt, axis=1))
+            norm_weight_per_pid_njet[pid_mask] = self.ratio_per_pid_njet[weight_name][pid][n_jets]
+
+        # multiply with actual weight
+        norm_weight_per_pid = norm_weight_per_pid * events[weight_name]
+        norm_weight_per_pid_njet = norm_weight_per_pid_njet * events[weight_name]
 
         # store them
         events = set_ak_column_f32(events, f"normalized_{weight_name}", norm_weight_per_pid)
@@ -107,9 +101,6 @@ def _normalized_btag_weights_requires(self: Producer, reqs: dict) -> None:
 
 @_normalized_btag_weights.setup
 def _normalized_btag_weights_setup(self: Producer, reqs: dict, inputs: dict, reader_targets: InsertableDict) -> None:
-    # BUG in prod3: some stats fields were missing so skip them for now
-    return
-
     # load the selection stats
     selection_stats = self.task.cached_value(
         key="selection_stats",
diff --git a/modules/cmsdb b/modules/cmsdb
index fcdb5d9c..b443d41d 160000
--- a/modules/cmsdb
+++ b/modules/cmsdb
@@ -1 +1 @@
-Subproject commit fcdb5d9ca251b178a29ebdecfb1eb66802e9b1ec
+Subproject commit b443d41df03e43c6a918f34c0600e617ea2d79d4
diff --git a/modules/columnflow b/modules/columnflow
index de74536b..47bfb3d6 160000
--- a/modules/columnflow
+++ b/modules/columnflow
@@ -1 +1 @@
-Subproject commit de74536b1038ce9ae445d108324a709848bbcf23
+Subproject commit 47bfb3d66a7114ec4c4b2130aee964cffa62096c