From b6aabeab79529eb551e68b5d61c220ef8b13e5c0 Mon Sep 17 00:00:00 2001 From: "Marcel R." Date: Wed, 8 Jan 2025 11:31:09 +0100 Subject: [PATCH 01/20] Update subjet handling. --- hbt/config/configs_hbt.py | 5 ++--- hbt/selection/jet.py | 28 +++++++++++----------------- modules/cmsdb | 2 +- 3 files changed, 14 insertions(+), 21 deletions(-) diff --git a/hbt/config/configs_hbt.py b/hbt/config/configs_hbt.py index 7806ef79..b909b544 100644 --- a/hbt/config/configs_hbt.py +++ b/hbt/config/configs_hbt.py @@ -1206,9 +1206,8 @@ def add_external(name, value): ################################################################################################ # channels - # TODO: switch etau and mutau, also check if change needed in res_dnn's - cfg.add_channel(name="mutau", id=1) - cfg.add_channel(name="etau", id=2) + cfg.add_channel(name="etau", id=1) + cfg.add_channel(name="mutau", id=2) cfg.add_channel(name="tautau", id=3) cfg.add_channel(name="mumu", id=4) cfg.add_channel(name="emu", id=5) diff --git a/hbt/selection/jet.py b/hbt/selection/jet.py index afefe759..4d17ac65 100644 --- a/hbt/selection/jet.py +++ b/hbt/selection/jet.py @@ -183,13 +183,15 @@ def jet_selection( # trigger leg matching for tautau events that were only triggered by a tau-tau-jet cross trigger # with two different strategies (still under scrutiny): # a) select the two highest scoring hhbjets of which one must match the jet leg - # (folds matching into the hhbjet identification itself) + # (folds matching decisions into the hhbjet identification itself) # b) _after_ selecting the two hhbjets, at least one of them must match the jet leg # (does the hhbjet identification first, and then filters using the matching) + + # create the mask select those events false_mask = ak.full_like(events.event, False, dtype=bool) ttj_mask = ( (events.channel_id == 3) & - ~ak.any(reduce(or_, [(events.trigger_ids == tid) for tid in self.trigger_ids_ttc], false_mask), axis=1) & + ~ak.any(reduce(or_, [(events.trigger_ids == tid) for tid in self.op], false_mask), axis=1) & ak.any(reduce(or_, [(events.trigger_ids == tid) for tid in self.trigger_ids_ttjc], false_mask), axis=1) ) @@ -237,7 +239,7 @@ def jet_selection( flat_hhbjet_mask[flat_jet_mask] = ak.flatten(sel_hhbjet_mask) # validate that either none or two hhbjets were identified - assert len(set(ak.sum(hhbjet_mask, axis=1)) - {0, 2}) == 0 + assert ak.all(((n_hhbjets := ak.sum(hhbjet_mask, axis=1)) == 0) | (n_hhbjets == 2)) # # fat jets @@ -263,14 +265,10 @@ def jet_selection( axis=2, ) - # discard the event in case the (first) fatjet with matching subjets is found - # but they are not b-tagged (TODO: move to deepjet when available for subjets) - # TODO: is it correct to do this? for run 3 the pnet wp is compare against btagDeepB? - if self.config_inst.campaign.x.run == 3: - wp = self.config_inst.x.btag_working_points.particleNet.loose - else: - wp = self.config_inst.x.btag_working_points.deepcsv.loose - subjets_btagged = ak.all(events.SubJet[ak.firsts(subjet_indices)].btagDeepB > wp, axis=1) + # check subjet btags (only deepcsv available) + # note: skipped for now as we do not have a final strategy for run 3 yet + # wp = ... + # subjets_btagged = ak.all(events.SubJet[ak.firsts(subjet_indices)].btagDeepB > wp, axis=1) # # vbf jets @@ -343,12 +341,8 @@ def jet_selection( ascending=False, ) - # final event selection - jet_sel = ( - (ak.sum(default_mask, axis=1) >= 2) & - # TODO: do want this? - ak.fill_none(subjets_btagged, True) # was none for events with no matched fatjet - ) + # final event selection (only looking at number of default jets for now) + jet_sel = ak.sum(default_mask, axis=1) >= 2 # some final type conversions jet_indices = ak.values_astype(ak.fill_none(jet_indices, 0), np.int32) diff --git a/modules/cmsdb b/modules/cmsdb index f324901a..54953603 160000 --- a/modules/cmsdb +++ b/modules/cmsdb @@ -1 +1 @@ -Subproject commit f324901a22acce34c1f8948dd60704b641716e93 +Subproject commit 549536034fdd0b20efbddc1adf4403a3713161ab From 2484fc52b45a5107b6f19d48b282486d1bc4a9a7 Mon Sep 17 00:00:00 2001 From: Ana A Date: Wed, 8 Jan 2025 16:45:26 +0100 Subject: [PATCH 02/20] removing method a) from hhbtag matching --- hbt/selection/jet.py | 72 ++++++++++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 29 deletions(-) diff --git a/hbt/selection/jet.py b/hbt/selection/jet.py index 4d17ac65..ab3da841 100644 --- a/hbt/selection/jet.py +++ b/hbt/selection/jet.py @@ -180,18 +180,15 @@ def jet_selection( # deselect jets in events with less than two valid scores hhbjet_mask = hhbjet_mask & (ak.sum(hhbtag_scores != EMPTY_FLOAT, axis=1) >= 2) - # trigger leg matching for tautau events that were only triggered by a tau-tau-jet cross trigger - # with two different strategies (still under scrutiny): - # a) select the two highest scoring hhbjets of which one must match the jet leg - # (folds matching decisions into the hhbjet identification itself) - # b) _after_ selecting the two hhbjets, at least one of them must match the jet leg - # (does the hhbjet identification first, and then filters using the matching) - - # create the mask select those events + # trigger leg matching for tautau events that were only triggered by a tau-tau-jet cross trigger; + # two strategies were studied a) and b) but strategy a) seems to not comply with how trigger matching + # should be done and should therefore be ignored. + + # create a mask to select tautau events that were only triggered by a tau-tau-jet cross trigger false_mask = ak.full_like(events.event, False, dtype=bool) ttj_mask = ( (events.channel_id == 3) & - ~ak.any(reduce(or_, [(events.trigger_ids == tid) for tid in self.op], false_mask), axis=1) & + ~ak.any(reduce(or_, [(events.trigger_ids == tid) for tid in self.trigger_ids_ttc], false_mask), axis=1) & ak.any(reduce(or_, [(events.trigger_ids == tid) for tid in self.trigger_ids_ttjc], false_mask), axis=1) ) @@ -209,35 +206,52 @@ def jet_selection( # constrain to jets with a score matching_mask = matching_mask & (hhbjet_mask[ttj_mask] != EMPTY_FLOAT) + # # a) - # sort matching masks by score first - sel_score_indices = score_indices[ttj_mask] - sorted_matching_mask = matching_mask[sel_score_indices] - # get the position of the highest scoring _and_ matched hhbjet - # (this hhbet is guaranteed to be selected) - sel_li = ak.local_index(sorted_matching_mask) - matched_idx = ak.firsts(sel_li[sorted_matching_mask], axis=1) - # the other hhbjet is not required to be matched and is either at the 0th or 1st position - # (depending on whether the matched one had the highest score) - other_idx = ak.where(matched_idx == 0, 1, 0) - # use comparisons between selected indices and the local index to convert back into a mask - # and check again that both hhbjets have a score - sel_hhbjet_mask = ( - (sel_li == ak.fill_none(sel_score_indices[matched_idx[..., None]][..., 0], -1)) | - (sel_li == ak.fill_none(sel_score_indices[other_idx[..., None]][..., 0], -1)) - ) & (hhbjet_mask[ttj_mask] != EMPTY_FLOAT) - + # two hhb-tagged jets must be selected. The highest scoring jet is always selected. + # - If this jet happens to match the trigger leg, then the second highest scoring jet is also selected. + # - If this is not the case, then the highest scoring jet that matches the trigger leg is selected. + # ! Note : Apparently the official recommendation is that trigger matching should only be used + # to select full events and not for individual objects selection. Thus, this strategy results in bias. + # + + # # sort matching masks by score first + # sel_score_indices = score_indices[ttj_mask] + # sorted_matching_mask = matching_mask[sel_score_indices] + # # get the position of the highest scoring _and_ matched hhbjet + # # (this hhbet is guaranteed to be selected) + # sel_li = ak.local_index(sorted_matching_mask) + # matched_idx = ak.firsts(sel_li[sorted_matching_mask], axis=1) + # # the other hhbjet is not required to be matched and is either at the 0th or 1st position + # # (depending on whether the matched one had the highest score) + # other_idx = ak.where(matched_idx == 0, 1, 0) + # # use comparisons between selected indices and the local index to convert back into a mask + # # and check again that both hhbjets have a score + # sel_hhbjet_mask = ( + # (sel_li == ak.fill_none(sel_score_indices[matched_idx[..., None]][..., 0], -1)) | + # (sel_li == ak.fill_none(sel_score_indices[other_idx[..., None]][..., 0], -1)) + # ) & (hhbjet_mask[ttj_mask] != EMPTY_FLOAT) + + # # b) + # two hhb-tagged jets must be selected. The highest and second-highest scoring jets are selected. + # - If any of those matches the trigger leg, the event is accepted. + # - If none of them matches the trigger leg, the event is rejected. + # + # check if any of the two jets is matched and fold back into hhbjet_mask (brodcasted) - # sel_hhbjet_mask = ak.Array(hhbjet_mask[ttj_mask]) - # one_matched = ak.any(matching_mask[sel_hhbjet_mask], axis=1) - # sel_hhbjet_mask = sel_hhbjet_mask & one_matched + sel_hhbjet_mask = ak.Array(hhbjet_mask[ttj_mask]) + one_matched = ak.any(matching_mask[sel_hhbjet_mask], axis=1) + sel_hhbjet_mask = sel_hhbjet_mask & one_matched # insert back into the full hhbjet_mask flat_hhbjet_mask = flat_np_view(hhbjet_mask) flat_jet_mask = ak.flatten(ak.full_like(events.Jet.pt, False, dtype=bool) | ttj_mask) flat_hhbjet_mask[flat_jet_mask] = ak.flatten(sel_hhbjet_mask) + from IPython import embed; + embed() + # validate that either none or two hhbjets were identified assert ak.all(((n_hhbjets := ak.sum(hhbjet_mask, axis=1)) == 0) | (n_hhbjets == 2)) From e5cec17ab5de7183ac6b4ae12e59b31c99ccd1c8 Mon Sep 17 00:00:00 2001 From: "Marcel R." Date: Wed, 8 Jan 2025 18:55:26 +0100 Subject: [PATCH 03/20] Fix lepton / trigger matching. --- hbt/categorization/default.py | 5 + hbt/config/categories.py | 9 +- hbt/config/configs_hbt.py | 11 +- hbt/config/triggers.py | 98 ++--- hbt/selection/jet.py | 7 +- hbt/selection/lepton.py | 773 ++++++++++++++++++---------------- 6 files changed, 486 insertions(+), 417 deletions(-) diff --git a/hbt/categorization/default.py b/hbt/categorization/default.py index c86e40a4..17391df5 100644 --- a/hbt/categorization/default.py +++ b/hbt/categorization/default.py @@ -30,6 +30,11 @@ def cat_tautau(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, return events, events.channel_id == self.config_inst.channels.n.tautau.id +@categorizer(uses={"channel_id"}) +def cat_ee(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.ee.id + + @categorizer(uses={"channel_id"}) def cat_mumu(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: return events, events.channel_id == self.config_inst.channels.n.mumu.id diff --git a/hbt/config/categories.py b/hbt/config/categories.py index dae03186..b86371f0 100644 --- a/hbt/config/categories.py +++ b/hbt/config/categories.py @@ -19,8 +19,9 @@ def add_categories(config: od.Config) -> None: add_category(config, name="etau", id=1, selection="cat_etau", label=r"$e\tau_{h}$") add_category(config, name="mutau", id=2, selection="cat_mutau", label=r"$\mu\tau_{h}$") add_category(config, name="tautau", id=3, selection="cat_tautau", label=r"$\tau_{h}\tau_{h}$") - add_category(config, name="mumu", id=4, selection="cat_mumu", label=r"$\mu\mu$") - add_category(config, name="emu", id=5, selection="cat_emu", label=r"$e\mu$") + add_category(config, name="ee", id=4, selection="cat_ee", label=r"$ee$") + add_category(config, name="mumu", id=5, selection="cat_mumu", label=r"$\mu\mu$") + add_category(config, name="emu", id=6, selection="cat_emu", label=r"$e\mu$") # qcd regions add_category(config, name="os", id=10, selection="cat_os", label="Opposite sign", tags={"os"}) @@ -75,7 +76,9 @@ def kwargs_fn(categories, add_qcd_group=True): # control categories control_categories = { # channels first - "channel": [config.get_category("mumu"), config.get_category("emu")], + "channel": [ + config.get_category("ee"), config.get_category("mumu"), config.get_category("emu"), + ], # kinematic regions in the middle (to be extended) "kin": [config.get_category("incl"), config.get_category("2j")], # relative sign last diff --git a/hbt/config/configs_hbt.py b/hbt/config/configs_hbt.py index b909b544..fadd094b 100644 --- a/hbt/config/configs_hbt.py +++ b/hbt/config/configs_hbt.py @@ -342,7 +342,7 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str] # add tags to datasets if dataset.name.startswith("data_e_"): - dataset.add_tag({"etau", "emu_from_e"}) + dataset.add_tag({"etau", "emu_from_e", "ee"}) if dataset.name.startswith("data_mu_"): dataset.add_tag({"mutau", "emu_from_mu", "mumu"}) if dataset.name.startswith("data_tau_"): @@ -428,8 +428,8 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str] "ewk", ]), "dy_split": [ - "dy_m4to10", "dy_m10to50", "dy_m50toinf", - "dy_m50toinf_0j", "dy_m50toinf_1j", "dy_m50toinf_2j", + # "dy_m4to10", "dy_m10to50", "dy_m50toinf", + # "dy_m50toinf_0j", "dy_m50toinf_1j", "dy_m50toinf_2j", "dy_m50toinf_1j_pt40to100", "dy_m50toinf_1j_pt100to200", "dy_m50toinf_1j_pt200to400", "dy_m50toinf_1j_pt400to600", "dy_m50toinf_1j_pt600toinf", "dy_m50toinf_2j_pt40to100", "dy_m50toinf_2j_pt100to200", "dy_m50toinf_2j_pt200to400", @@ -1209,8 +1209,9 @@ def add_external(name, value): cfg.add_channel(name="etau", id=1) cfg.add_channel(name="mutau", id=2) cfg.add_channel(name="tautau", id=3) - cfg.add_channel(name="mumu", id=4) - cfg.add_channel(name="emu", id=5) + cfg.add_channel(name="ee", id=4) + cfg.add_channel(name="mumu", id=5) + cfg.add_channel(name="emu", id=6) # add categories from hbt.config.categories import add_categories diff --git a/hbt/config/triggers.py b/hbt/config/triggers.py index 0e482ceb..0fc77340 100644 --- a/hbt/config/triggers.py +++ b/hbt/config/triggers.py @@ -258,7 +258,7 @@ def add_triggers_2016(config: od.Config) -> None: # does not exist for run F on but should only be used until run 276215 -> which era? # TODO: to be checked applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.x.era <= "E"), - tags={"cross_trigger", "cross_e_tau", "channel_e_tau"}, + tags={"cross_trigger", "cross_e_tau"}, ), Trigger( name="HLT_Ele24_eta2p1_WPLoose_Gsf_LooseIsoPFTau20", @@ -282,7 +282,7 @@ def add_triggers_2016(config: od.Config) -> None: # does not exist for run F on but should only be used between run 276215 and 278270 -> which eras? # TODO: to be checked applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data and dataset_inst.x.era <= "E"), - tags={"cross_trigger", "cross_e_tau", "channel_e_tau"}, + tags={"cross_trigger", "cross_e_tau"}, ), Trigger( name="HLT_Ele24_eta2p1_WPLoose_Gsf_LooseIsoPFTau30", @@ -306,7 +306,7 @@ def add_triggers_2016(config: od.Config) -> None: # does not exist until run E but should only be used after run 278270 -> which era? # TODO: to be checked applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data and dataset_inst.x.era >= "E"), - tags={"cross_trigger", "cross_e_tau", "channel_e_tau"}, + tags={"cross_trigger", "cross_e_tau"}, ), # @@ -331,7 +331,7 @@ def add_triggers_2016(config: od.Config) -> None: trigger_bits=None, # TODO ), ), - tags={"cross_trigger", "cross_mu_tau", "channel_mu_tau"}, + tags={"cross_trigger", "cross_mu_tau"}, ), Trigger( name="HLT_IsoMu19_eta2p1_LooseIsoPFTau20_SingleL1", @@ -352,7 +352,7 @@ def add_triggers_2016(config: od.Config) -> None: trigger_bits=None, # TODO ), ), - tags={"cross_trigger", "cross_mu_tau", "channel_mu_tau"}, + tags={"cross_trigger", "cross_mu_tau"}, ), # @@ -378,7 +378,7 @@ def add_triggers_2016(config: od.Config) -> None: ), ), applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or ("B" <= dataset_inst.x.era <= "F")), - tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"}, + tags={"cross_trigger", "cross_tau_tau"}, ), Trigger( name="HLT_DoubleMediumCombinedIsoPFTau35_Trk1_eta2p1_Reg", @@ -400,7 +400,7 @@ def add_triggers_2016(config: od.Config) -> None: ), ), applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.x.era >= "H"), - tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"}, + tags={"cross_trigger", "cross_tau_tau"}, ), # @@ -425,7 +425,7 @@ def add_triggers_2016(config: od.Config) -> None: trigger_bits=None, # TODO ), ), - tags={"single_trigger", "single_e", "channel_e_tau"}, + tags={"single_trigger", "single_e"}, ) # # single muon @@ -442,7 +442,7 @@ def add_triggers_2016(config: od.Config) -> None: trigger_bits=None, # TODO ), ), - tags={"single_trigger", "single_mu", "channel_mu_tau"}, + tags={"single_trigger", "single_mu"}, ) config.x.triggers.add( name="HLT_IsoMu22_eta2p1", @@ -456,7 +456,7 @@ def add_triggers_2016(config: od.Config) -> None: trigger_bits=None, # TODO ), ), - tags={"single_trigger", "single_mu", "channel_mu_tau"}, + tags={"single_trigger", "single_mu"}, ) config.x.triggers.add( name="HLT_IsoTkMu22", @@ -470,7 +470,7 @@ def add_triggers_2016(config: od.Config) -> None: trigger_bits=None, # TODO ), ), - tags={"single_trigger", "single_mu", "channel_mu_tau"}, + tags={"single_trigger", "single_mu"}, ) config.x.triggers.add( name="HLT_IsoTkMu22_eta2p1", @@ -484,7 +484,7 @@ def add_triggers_2016(config: od.Config) -> None: trigger_bits=None, # TODO ), ), - tags={"single_trigger", "single_mu", "channel_mu_tau"}, + tags={"single_trigger", "single_mu"}, ) @@ -510,7 +510,7 @@ def add_triggers_2017(config: od.Config) -> None: ), ), applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.x.era >= "D"), - tags={"single_trigger", "single_e", "channel_e_tau"}, + tags={"single_trigger", "single_e"}, ), Trigger( name="HLT_Ele32_WPTight_Gsf_L1DoubleEG", @@ -525,7 +525,7 @@ def add_triggers_2017(config: od.Config) -> None: trigger_bits=2 + 1024, ), ), - tags={"single_trigger", "single_e", "channel_e_tau"}, + tags={"single_trigger", "single_e"}, ), Trigger( name="HLT_Ele35_WPTight_Gsf", @@ -539,7 +539,7 @@ def add_triggers_2017(config: od.Config) -> None: trigger_bits=2, ), ), - tags={"single_trigger", "single_e", "channel_e_tau"}, + tags={"single_trigger", "single_e"}, ), # @@ -557,7 +557,7 @@ def add_triggers_2017(config: od.Config) -> None: trigger_bits=2, ), ), - tags={"single_trigger", "single_mu", "channel_mu_tau"}, + tags={"single_trigger", "single_mu"}, ), Trigger( name="HLT_IsoMu27", @@ -571,7 +571,7 @@ def add_triggers_2017(config: od.Config) -> None: trigger_bits=2, ), ), - tags={"single_trigger", "single_mu", "channel_mu_tau"}, + tags={"single_trigger", "single_mu"}, ), # @@ -598,7 +598,7 @@ def add_triggers_2017(config: od.Config) -> None: trigger_bits=1024 + 256, ), ), - tags={"cross_trigger", "cross_e_tau", "channel_e_tau"}, + tags={"cross_trigger", "cross_e_tau"}, ), # @@ -625,7 +625,7 @@ def add_triggers_2017(config: od.Config) -> None: trigger_bits=1024 + 512, ), ), - tags={"cross_trigger", "cross_mu_tau", "channel_mu_tau"}, + tags={"cross_trigger", "cross_mu_tau"}, ), # @@ -650,7 +650,7 @@ def add_triggers_2017(config: od.Config) -> None: trigger_bits=64, ), ), - tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"}, + tags={"cross_trigger", "cross_tau_tau"}, ), Trigger( name="HLT_DoubleTightChargedIsoPFTau35_Trk1_TightID_eta2p1_Reg", @@ -672,7 +672,7 @@ def add_triggers_2017(config: od.Config) -> None: ), ), applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data), - tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"}, + tags={"cross_trigger", "cross_tau_tau"}, ), Trigger( name="HLT_DoubleMediumChargedIsoPFTau40_Trk1_TightID_eta2p1_Reg", @@ -694,7 +694,7 @@ def add_triggers_2017(config: od.Config) -> None: ), ), applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data), - tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"}, + tags={"cross_trigger", "cross_tau_tau"}, ), Trigger( name="HLT_DoubleTightChargedIsoPFTau40_Trk1_eta2p1_Reg", @@ -716,7 +716,7 @@ def add_triggers_2017(config: od.Config) -> None: ), ), applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data), - tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"}, + tags={"cross_trigger", "cross_tau_tau"}, ), # @@ -756,7 +756,7 @@ def add_triggers_2017(config: od.Config) -> None: ), ), applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.x.era >= "D"), - tags={"cross_trigger", "cross_tau_tau_vbf", "channel_tau_tau"}, + tags={"cross_trigger", "cross_tau_tau_vbf"}, ), ]) @@ -779,7 +779,7 @@ def add_triggers_2018(config: od.Config) -> None: ), ), applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.x.era >= "D"), - tags={"single_trigger", "single_e", "channel_e_tau"}, + tags={"single_trigger", "single_e"}, ), Trigger( name="HLT_Ele35_WPTight_Gsf", @@ -793,7 +793,7 @@ def add_triggers_2018(config: od.Config) -> None: trigger_bits=2, ), ), - tags={"single_trigger", "single_e", "channel_e_tau"}, + tags={"single_trigger", "single_e"}, ), # @@ -811,7 +811,7 @@ def add_triggers_2018(config: od.Config) -> None: trigger_bits=2, ), ), - tags={"single_trigger", "single_mu", "channel_mu_tau"}, + tags={"single_trigger", "single_mu"}, ), Trigger( name="HLT_IsoMu27", @@ -825,7 +825,7 @@ def add_triggers_2018(config: od.Config) -> None: trigger_bits=2, ), ), - tags={"single_trigger", "single_mu", "channel_mu_tau"}, + tags={"single_trigger", "single_mu"}, ), # @@ -854,7 +854,7 @@ def add_triggers_2018(config: od.Config) -> None: ), # the non-HPS path existed only for data and is fully covered in MC below applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data), - tags={"cross_trigger", "cross_e_tau", "channel_e_tau"}, + tags={"cross_trigger", "cross_e_tau"}, ), # @@ -883,7 +883,7 @@ def add_triggers_2018(config: od.Config) -> None: ), # the non-HPS path existed only for data and is fully covered in MC below applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data), - tags={"cross_trigger", "cross_mu_tau", "channel_mu_tau"}, + tags={"cross_trigger", "cross_mu_tau"}, ), # @@ -909,7 +909,7 @@ def add_triggers_2018(config: od.Config) -> None: ), ), applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data), - tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"}, + tags={"cross_trigger", "cross_tau_tau"}, ), Trigger( name="HLT_DoubleMediumChargedIsoPFTau40_Trk1_TightID_eta2p1_Reg", @@ -931,7 +931,7 @@ def add_triggers_2018(config: od.Config) -> None: ), ), applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data), - tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"}, + tags={"cross_trigger", "cross_tau_tau"}, ), Trigger( name="HLT_DoubleTightChargedIsoPFTau40_Trk1_eta2p1_Reg", @@ -953,7 +953,7 @@ def add_triggers_2018(config: od.Config) -> None: ), ), applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data), - tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"}, + tags={"cross_trigger", "cross_tau_tau"}, ), # @@ -992,7 +992,7 @@ def add_triggers_2018(config: od.Config) -> None: ), ), applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.x.era >= "D"), - tags={"cross_trigger", "cross_tau_tau_vbf", "channel_tau_tau"}, + tags={"cross_trigger", "cross_tau_tau_vbf"}, ), ]) @@ -1030,10 +1030,11 @@ def add_triggers_2022(config: od.Config) -> None: applies_to_dataset=(lambda dataset_inst: ( dataset_inst.is_mc or dataset_inst.has_tag("etau") or + dataset_inst.has_tag("ee") or dataset_inst.has_tag("emu_from_e") or dataset_inst.has_tag("emu_from_mu") )), - tags={"single_trigger", "single_e", "channel_e_tau"}, + tags={"single_trigger", "single_e"}, ), # @@ -1061,7 +1062,7 @@ def add_triggers_2022(config: od.Config) -> None: dataset_inst.has_tag("emu_from_mu") or dataset_inst.has_tag("mumu") )), - tags={"single_trigger", "single_mu", "channel_mu_tau"}, + tags={"single_trigger", "single_mu"}, ), # @@ -1095,7 +1096,7 @@ def add_triggers_2022(config: od.Config) -> None: ), ), applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("etau")), - tags={"cross_trigger", "cross_e_tau", "channel_e_tau"}, + tags={"cross_trigger", "cross_e_tau"}, ), # @@ -1130,7 +1131,7 @@ def add_triggers_2022(config: od.Config) -> None: ), ), applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("mutau")), - tags={"cross_trigger", "cross_mu_tau", "channel_mu_tau"}, + tags={"cross_trigger", "cross_mu_tau"}, ), # @@ -1164,7 +1165,7 @@ def add_triggers_2022(config: od.Config) -> None: ), ), applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("tautau")), - tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"}, + tags={"cross_trigger", "cross_tau_tau"}, ), # @@ -1220,7 +1221,7 @@ def add_triggers_2022(config: od.Config) -> None: ), ), applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("tautau")), - tags={"cross_trigger", "cross_tau_tau_vbf", "channel_tau_tau"}, + tags={"cross_trigger", "cross_tau_tau_vbf"}, ), # Currently disabled since it may not be needed @@ -1278,7 +1279,7 @@ def add_triggers_2022(config: od.Config) -> None: ), ), applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("tautau")), - tags={"cross_trigger", "cross_tau_tau_jet", "channel_tau_tau"}, + tags={"cross_trigger", "cross_tau_tau_jet"}, ), ]) @@ -1314,10 +1315,11 @@ def add_triggers_2023(config: od.Config) -> None: applies_to_dataset=(lambda dataset_inst: ( dataset_inst.is_mc or dataset_inst.has_tag("etau") or + dataset_inst.has_tag("ee") or dataset_inst.has_tag("emu_from_e") or dataset_inst.has_tag("emu_from_mu") )), - tags={"single_trigger", "single_e", "channel_e_tau"}, + tags={"single_trigger", "single_e"}, ), # @@ -1345,7 +1347,7 @@ def add_triggers_2023(config: od.Config) -> None: dataset_inst.has_tag("emu_from_mu") or dataset_inst.has_tag("mumu") )), - tags={"single_trigger", "single_mu", "channel_mu_tau"}, + tags={"single_trigger", "single_mu"}, ), # @@ -1379,7 +1381,7 @@ def add_triggers_2023(config: od.Config) -> None: ), ), applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("etau")), - tags={"cross_trigger", "cross_e_tau", "channel_e_tau"}, + tags={"cross_trigger", "cross_e_tau"}, ), # @@ -1414,7 +1416,7 @@ def add_triggers_2023(config: od.Config) -> None: ), ), applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("mutau")), - tags={"cross_trigger", "cross_mu_tau", "channel_mu_tau"}, + tags={"cross_trigger", "cross_mu_tau"}, ), # @@ -1448,7 +1450,7 @@ def add_triggers_2023(config: od.Config) -> None: ), ), applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("tautau")), - tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"}, + tags={"cross_trigger", "cross_tau_tau"}, ), # @@ -1502,7 +1504,7 @@ def add_triggers_2023(config: od.Config) -> None: ), ), applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("tautau")), - tags={"cross_trigger", "cross_tau_tau_vbf", "channel_tau_tau"}, + tags={"cross_trigger", "cross_tau_tau_vbf"}, ), # @@ -1541,6 +1543,6 @@ def add_triggers_2023(config: od.Config) -> None: ), ), applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("tautau")), - tags={"cross_trigger", "cross_tau_tau_jet", "channel_tau_tau"}, + tags={"cross_trigger", "cross_tau_tau_jet"}, ), ]) diff --git a/hbt/selection/jet.py b/hbt/selection/jet.py index ab3da841..a0afcca8 100644 --- a/hbt/selection/jet.py +++ b/hbt/selection/jet.py @@ -249,9 +249,6 @@ def jet_selection( flat_jet_mask = ak.flatten(ak.full_like(events.Jet.pt, False, dtype=bool) | ttj_mask) flat_hhbjet_mask[flat_jet_mask] = ak.flatten(sel_hhbjet_mask) - from IPython import embed; - embed() - # validate that either none or two hhbjets were identified assert ak.all(((n_hhbjets := ak.sum(hhbjet_mask, axis=1)) == 0) | (n_hhbjets == 2)) @@ -424,9 +421,9 @@ def jet_selection_setup(self: Selector, reqs: dict, inputs: dict, reader_targets # store ids of tau-tau-jet and tau-tau cross triggers self.trigger_ids_ttc = [ trigger.id for trigger in self.config_inst.x.triggers - if trigger.has_tag("channel_tau_tau") and not trigger.has_tag("cross_tau_tau_jet") + if trigger.has_tag({"cross_tau_tau", "cross_tau_tau_vbf"}) ] self.trigger_ids_ttjc = [ trigger.id for trigger in self.config_inst.x.triggers - if trigger.has_tag("channel_tau_tau") and trigger.has_tag("cross_tau_tau_jet") + if trigger.has_tag("cross_tau_tau_jet") ] diff --git a/hbt/selection/lepton.py b/hbt/selection/lepton.py index d7b14823..6c04ec7b 100644 --- a/hbt/selection/lepton.py +++ b/hbt/selection/lepton.py @@ -12,7 +12,7 @@ from functools import reduce from columnflow.selection import Selector, SelectionResult, selector -from columnflow.columnar_util import set_ak_column +from columnflow.columnar_util import set_ak_column, sorted_indices_from_mask from columnflow.util import maybe_import from hbt.util import IF_NANO_V9, IF_NANO_GE_V10 @@ -70,9 +70,8 @@ def update_channel_ids( @selector( uses={ "Electron.{pt,eta,phi,dxy,dz,pfRelIso03_all}", - IF_NANO_V9("Electron.mvaFall17V2{Iso_WP80,Iso_WP90,noIso_WP90}"), - IF_NANO_GE_V10("Electron.{mvaIso_WP80,mvaIso_WP90,mvaNoIso_WP90}"), - "TrigObj.{pt,eta,phi}", + IF_NANO_V9("Electron.mvaFall17V2{Iso_WP80,Iso_WP90}"), + IF_NANO_GE_V10("Electron.{mvaIso_WP80,mvaIso_WP90}"), }, exposed=False, ) @@ -80,36 +79,15 @@ def electron_selection( self: Selector, events: ak.Array, trigger: Trigger, - leg_masks: dict[str, ak.Array], - trigger_fire_list: list[bool] | None = None, **kwargs, -) -> tuple[ak.Array, ak.Array]: +) -> tuple[ak.Array | None, ak.Array]: """ - Electron selection returning two sets of indices for default and veto electrons. + Electron selection returning two sets of masks for default and veto electrons. See https://twiki.cern.ch/twiki/bin/view/CMS/EgammaNanoAOD?rev=4 - If the trigger_fire_list is given, all events that are not fired by the trigger lose their trigger - matching, i.e. the trigger object matching is set to True. This is useful for the emu channel. """ + is_2016 = self.config_inst.campaign.x.year == 2016 is_single = trigger.has_tag("single_e") is_cross = trigger.has_tag("cross_e_tau") - is_2016 = self.config_inst.campaign.x.year == 2016 - - # start per-electron mask with trigger object matching - if is_single: - # catch config errors - assert trigger.n_legs == len(leg_masks) == 1 - assert abs(trigger.legs["e"].pdg_id) == 11 - # match leg 0 - matches_leg0 = trigger_object_matching(events.Electron, events.TrigObj[leg_masks["e"]]) - elif is_cross: - # catch config errors - assert trigger.n_legs == len(leg_masks) == 2 - assert abs(trigger.legs["e"].pdg_id) == 11 - # match leg 0 - matches_leg0 = trigger_object_matching(events.Electron, events.TrigObj[leg_masks["e"]]) - - # pt sorted indices for converting masks to indices - sorted_indices = ak.argsort(events.Electron.pt, axis=-1, ascending=False) # obtain mva flags, which might be located at different routes, depending on the nano version if "mvaIso_WP80" in events.Electron.fields: @@ -118,124 +96,92 @@ def electron_selection( # check this in original root files if necessary mva_iso_wp80 = events.Electron.mvaIso_WP80 mva_iso_wp90 = events.Electron.mvaIso_WP90 - # mva_noniso_wp90 = events.Electron.mvaNoIso_WP90 else: # <= nano v9 mva_iso_wp80 = events.Electron.mvaFall17V2Iso_WP80 mva_iso_wp90 = events.Electron.mvaFall17V2Iso_WP90 - # mva_noniso_wp90 = events.Electron.mvaFall17V2noIso_WP90 - # default electron mask, only required for single and cross triggers with electron leg + # default electron mask default_mask = None - default_indices = None - - # add true to the leg mask if the trigger is not fired for the emu channel only case, - # where trigger_fire_list should be given - if trigger_fire_list is not None: - matches_leg0 = ak.where(trigger_fire_list, events.Electron.pt > -1, events.Electron.pt < -1) - if is_single or is_cross or (trigger_fire_list is not None): + if is_single or is_cross: min_pt = 26.0 if is_2016 else (31.0 if is_single else 25.0) + max_eta = 2.5 if is_single else 2.1 default_mask = ( (mva_iso_wp80 == 1) & - (abs(events.Electron.eta) < 2.5) if is_single else (abs(events.Electron.eta) < 2.1) & + (abs(events.Electron.eta) < max_eta) & (abs(events.Electron.dxy) < 0.045) & (abs(events.Electron.dz) < 0.2) & - (events.Electron.pt > min_pt) & - matches_leg0 + (events.Electron.pt > min_pt) ) - # convert to sorted indices - default_indices = sorted_indices[default_mask[sorted_indices]] - default_indices = ak.values_astype(default_indices, np.int32) - # veto electron mask + # veto electron mask (must be trigger independent!) veto_mask = ( - ( - (mva_iso_wp90 == 1) | - False - # TODO: do we need to keep this? - # disabled as part of the resonant synchronization effort - # ((mva_noniso_wp90 == 1) & (events.Electron.pfRelIso03_all < 0.3)) - ) & + (mva_iso_wp90 == 1) & (abs(events.Electron.eta) < 2.5) & (abs(events.Electron.dxy) < 0.045) & (abs(events.Electron.dz) < 0.2) & (events.Electron.pt > 10.0) ) - # convert to sorted indices - veto_indices = sorted_indices[veto_mask[sorted_indices]] - veto_indices = ak.values_astype(veto_indices, np.int32) - return default_indices, veto_indices + return default_mask, veto_mask @selector( - uses={ - # nano columns - "Muon.{pt,eta,phi,mediumId,tightId,pfRelIso04_all,dxy,dz}", - "TrigObj.{pt,eta,phi}", - }, + uses={"{Electron,TrigObj}.{pt,eta,phi}"}, exposed=False, ) -def muon_selection( +def electron_trigger_matching( self: Selector, events: ak.Array, trigger: Trigger, leg_masks: dict[str, ak.Array], - select_without_trigger: bool = False, - mumu_selection: bool = False, **kwargs, -) -> tuple[ak.Array, ak.Array]: +) -> tuple[ak.Array]: """ - Muon selection returning two sets of indidces for default and veto muons. + Electron trigger matching. + """ + # TODO: only perform this for events where the trigger fired, needs more info and flat_views + is_single = trigger.has_tag("single_e") + is_cross = trigger.has_tag("cross_e_tau") + + # catch config errors + assert is_single or is_cross + assert trigger.n_legs == len(leg_masks) == (1 if is_single else 2) + assert abs(trigger.legs["e"].pdg_id) == 11 + + return trigger_object_matching(events.Electron, events.TrigObj[leg_masks["e"]]) + + +@selector( + uses={"Muon.{pt,eta,phi,mediumId,tightId,pfRelIso04_all,dxy,dz}"}, + exposed=False, +) +def muon_selection( + self: Selector, + events: ak.Array, + trigger: Trigger, + **kwargs, +) -> tuple[ak.Array | None, ak.Array]: + """ + Muon selection returning two sets of masks for default and veto muons. References: - Isolation working point: https://twiki.cern.ch/twiki/bin/view/CMS/SWGuideMuonIdRun2?rev=59 - ID und ISO : https://twiki.cern.ch/twiki/bin/view/CMS/MuonUL2017?rev=15 - - If mumu_selection is set to True, a second muon is selected and the corresponding indices are additionally returned. """ + is_2016 = self.config_inst.campaign.x.year == 2016 is_single = trigger.has_tag("single_mu") is_cross = trigger.has_tag("cross_mu_tau") - is_2016 = self.config_inst.campaign.x.year == 2016 - - if (is_single or is_cross or mumu_selection) and select_without_trigger: - raise ValueError("select_without_trigger can only be used for non-triggered muon selections") - if is_cross and mumu_selection: - raise ValueError("mumu_selection can only be used for single muon selections") - - # start per-muon mask with trigger object matching - if is_single: - # catch config errors - assert trigger.n_legs == len(leg_masks) == 1 - assert abs(trigger.legs["mu"].pdg_id) == 13 - # match leg 0 - matches_leg0 = trigger_object_matching(events.Muon, events.TrigObj[leg_masks["mu"]]) - elif is_cross: - # catch config errors - assert trigger.n_legs == len(leg_masks) == 2 - assert abs(trigger.legs["mu"].pdg_id) == 13 - # match leg 0 - matches_leg0 = trigger_object_matching(events.Muon, events.TrigObj[leg_masks["mu"]]) - elif select_without_trigger: - matches_leg0 = events.Muon.pt > -1 - - # pt sorted indices for converting masks to indices - # TODO: Why not local index? they should be sorted anyway so yes the results should be the same - # but if there is a problem with the sorting, we are selecting the wrong muons, since the - # masks below are created with the default sorting - sorted_indices = ak.argsort(events.Muon.pt, axis=-1, ascending=False) - - # default muon mask, only required for single and cross triggers with muon leg + # default muon mask default_mask = None - default_indices = None - if is_single or is_cross or select_without_trigger: + if is_single or is_cross: if is_2016: min_pt = 23.0 if is_single else 20.0 else: min_pt = 26.0 if is_single else 22.0 - default_mask_wo_trigger = ( + default_mask = ( (events.Muon.tightId == 1) & (abs(events.Muon.eta) < 2.4) & (abs(events.Muon.dxy) < 0.045) & @@ -244,43 +190,7 @@ def muon_selection( (events.Muon.pt > min_pt) ) - if mumu_selection: - # for mumu selection, the matched muon should the one with the highest pt among the two - # otherwise selected muons. We check the number of muons only afterwards but should still - # apply the trigger matching to only selected muons - - # first mask the array with the default mask without trigger, the non-matched muon will - # become None and will return False in the trigger matching and be put at the end of the - # list when sorted by pt - masked_muons = ak.mask(events.Muon, default_mask_wo_trigger) - - # then sort the selected muons by pt to push the non-selected ones to the end - sorted_masked_muons = masked_muons[ak.argsort(masked_muons.pt, ascending=False)] - - # apply trigger matching to the first muon of the selected muons - matches_first_selected_muons = ak.where( - ak.local_index(sorted_masked_muons) == 0, - trigger_object_matching(sorted_masked_muons, events.TrigObj[leg_masks["mu"]]), - False, - ) - - # bring the muons back in the original position in the array - matches_leg0 = matches_first_selected_muons[ak.argsort(ak.argsort(masked_muons.pt, ascending=False))] - - # for mumu selection, the second muon should not be the same as the first one - matches_second_muon = ~matches_leg0 - default_mask_second_muon = default_mask_wo_trigger & matches_second_muon - - default_mask = default_mask_wo_trigger & matches_leg0 - - # convert to sorted indices - default_indices = sorted_indices[default_mask[sorted_indices]] - default_indices = ak.values_astype(default_indices, np.int32) - if mumu_selection: - default_indices_second_muon = sorted_indices[default_mask_second_muon[sorted_indices]] - default_indices_second_muon = ak.values_astype(default_indices_second_muon, np.int32) - - # veto muon mask + # veto muon mask (must be trigger independent!) veto_mask = ( ((events.Muon.mediumId == 1) | (events.Muon.tightId == 1)) & (abs(events.Muon.eta) < 2.4) & @@ -289,14 +199,34 @@ def muon_selection( (events.Muon.pfRelIso04_all < 0.3) & (events.Muon.pt > 10) ) - # convert to sorted indices - veto_indices = sorted_indices[veto_mask[sorted_indices]] - veto_indices = ak.values_astype(veto_indices, np.int32) - if mumu_selection: - return default_indices, default_indices_second_muon, veto_indices - else: - return default_indices, veto_indices + return default_mask, veto_mask + + +@selector( + uses={"{Muon,TrigObj}.{pt,eta,phi}"}, + exposed=False, +) +def muon_trigger_matching( + self: Selector, + events: ak.Array, + trigger: Trigger, + leg_masks: dict[str, ak.Array], + **kwargs, +) -> tuple[ak.Array]: + """ + Muon trigger matching. + """ + # TODO: only perform this for events where the trigger fired, needs more info and flat_views + is_single = trigger.has_tag("single_mu") + is_cross = trigger.has_tag("cross_mu_tau") + + # catch config errors + assert is_single or is_cross + assert trigger.n_legs == len(leg_masks) == (1 if is_single else 2) + assert abs(trigger.legs["mu"].pdg_id) == 13 + + return trigger_object_matching(events.Muon, events.TrigObj[leg_masks["mu"]]) @selector( @@ -311,22 +241,20 @@ def tau_selection( self: Selector, events: ak.Array, trigger: Trigger, - leg_masks: dict[str, ak.Array], - electron_indices: ak.Array, - muon_indices: ak.Array, + electron_mask: ak.Array | None, + muon_mask: ak.Array | None, **kwargs, ) -> tuple[ak.Array, ak.Array]: """ - Tau selection returning a set of indices for taus that are at least VVLoose isolated (vs jet) - and a second mask to select the action Medium isolated ones, eventually to separate normal and - iso inverted taus for QCD estimations. + Tau selection returning a masks for taus that are at least VVLoose isolated (vs jet) + and a second mask to select isolated ones, eventually to separate normal and iso inverted taus + for QCD estimations. """ # return empty mask if no tagged taus exists in the chunk if ak.all(ak.num(events.Tau) == 0): logger.info("no taus found in event chunk") - # convenient definition of empty indices and iso mask - empty_indices = ak.local_index(events.Tau) - return empty_indices, empty_indices == 0 + false_mask = ak.full_like(events.Tau.pt, False, dtype=bool) + return false_mask, false_mask is_single_e = trigger.has_tag("single_e") is_single_mu = trigger.has_tag("single_mu") @@ -335,29 +263,11 @@ def tau_selection( is_cross_tau = trigger.has_tag("cross_tau_tau") is_cross_tau_vbf = trigger.has_tag("cross_tau_tau_vbf") is_cross_tau_jet = trigger.has_tag("cross_tau_tau_jet") - is_any_cross_tau = is_cross_tau or is_cross_tau_vbf or is_cross_tau_jet is_2016 = self.config_inst.campaign.x.year == 2016 is_run3 = self.config_inst.campaign.x.run == 3 get_tau_tagger = lambda tag: f"id{self.config_inst.x.tau_tagger}VS{tag}" - wp_config = self.config_inst.x.tau_id_working_points - # start per-tau mask with trigger object matching per leg - if is_cross_e or is_cross_mu: - # catch config errors - assert trigger.n_legs == len(leg_masks) == 2 - assert abs(trigger.legs["tau"].pdg_id) == 15 - # match leg 1 - matches_leg1 = trigger_object_matching(events.Tau, events.TrigObj[leg_masks["tau"]]) - elif is_any_cross_tau: - # catch config errors - assert trigger.n_legs == len(leg_masks) >= 2 - assert abs(trigger.legs["tau1"].pdg_id) == 15 - assert abs(trigger.legs["tau2"].pdg_id) == 15 - # match both legs - matches_leg0 = trigger_object_matching(events.Tau, events.TrigObj[leg_masks["tau1"]]) - matches_leg1 = trigger_object_matching(events.Tau, events.TrigObj[leg_masks["tau2"]]) - # determine minimum pt and maximum eta max_eta = 2.5 if is_single_e or is_single_mu: @@ -375,59 +285,83 @@ def tau_selection( elif is_cross_tau_jet: min_pt = None if not is_run3 else 35.0 - # select which decay modes to consider - decay_mode_mask = reduce(or_, - [ - events.Tau.decayMode == 0, - events.Tau.decayMode == 1, - events.Tau.decayMode == 10, - events.Tau.decayMode == 11, - ], - ) - # base tau mask for default and qcd sideband tau base_mask = ( (abs(events.Tau.eta) < max_eta) & (events.Tau.pt > min_pt) & (abs(events.Tau.dz) < 0.2) & - decay_mode_mask & + reduce(or_, [events.Tau.decayMode == mode for mode in (0, 1, 10, 11)]) & (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vloose) & (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.tight) & (events.Tau[get_tau_tagger("jet")] >= wp_config.tau_vs_jet.vvvloose) ) # remove taus with too close spatial separation to previously selected leptons - if electron_indices is not None: - base_mask = base_mask & ak.all(events.Tau.metric_table(events.Electron[electron_indices]) > 0.5, axis=2) - if muon_indices is not None: - base_mask = base_mask & ak.all(events.Tau.metric_table(events.Muon[muon_indices]) > 0.5, axis=2) + if electron_mask is not None: + base_mask = base_mask & ak.all(events.Tau.metric_table(events.Electron[electron_mask]) > 0.5, axis=2) + if muon_mask is not None: + base_mask = base_mask & ak.all(events.Tau.metric_table(events.Muon[muon_mask]) > 0.5, axis=2) - # add trigger object masks - if is_cross_e or is_cross_mu: - base_mask = base_mask & matches_leg1 - elif is_cross_tau or is_cross_tau_vbf or is_cross_tau_jet: - # taus need to be matched to at least one leg, but as a side condition - # each leg has to have at least one match to a tau - base_mask = base_mask & ( - (matches_leg0 | matches_leg1) & - ak.any(matches_leg0, axis=1) & - ak.any(matches_leg1, axis=1) - ) + # compute the isolation mask separately as it is used to defined (qcd) categories later on + iso_mask = events.Tau[get_tau_tagger("jet")] >= wp_config.tau_vs_jet.medium - # indices for sorting first by isolation, then by pt - # for this, combine iso and pt values, e.g. iso 255 and pt 32.3 -> 2550032.3 - f = 10**(np.ceil(np.log10(ak.max(events.Tau.pt))) + 1) - sort_key = events.Tau[get_tau_tagger("jet")] * f + events.Tau.pt - sorted_indices = ak.argsort(sort_key, axis=-1, ascending=False) + return base_mask, iso_mask - # convert to sorted indices - base_indices = sorted_indices[base_mask[sorted_indices]] - base_indices = ak.values_astype(base_indices, np.int32) - # additional mask to select final, Medium isolated taus - iso_mask = events.Tau[base_indices][get_tau_tagger("jet")] >= wp_config.tau_vs_jet.medium +@selector( + uses={"{Tau,TrigObj}.{pt,eta,phi}"}, + # shifts are declared dynamically below in tau_selection_init + exposed=False, +) +def tau_trigger_matching( + self: Selector, + events: ak.Array, + trigger: Trigger, + leg_masks: dict[str, ak.Array], + **kwargs, +) -> tuple[ak.Array]: + """ + Tau trigger matching. + """ + # TODO: only perform this for events where the trigger fired, needs more info and flat_views + if ak.all(ak.num(events.Tau) == 0): + logger.info("no taus found in event chunk") + return ak.full_like(events.Tau.pt, False, dtype=bool) - return base_indices, iso_mask + is_cross_e = trigger.has_tag("cross_e_tau") + is_cross_mu = trigger.has_tag("cross_mu_tau") + is_cross_tau = trigger.has_tag("cross_tau_tau") + is_cross_tau_vbf = trigger.has_tag("cross_tau_tau_vbf") + is_cross_tau_jet = trigger.has_tag("cross_tau_tau_jet") + is_any_cross_tau = is_cross_tau or is_cross_tau_vbf or is_cross_tau_jet + assert is_cross_e or is_cross_mu or is_any_cross_tau + + # start per-tau mask with trigger object matching per leg + if is_cross_e or is_cross_mu: + # catch config errors + assert trigger.n_legs == len(leg_masks) == 2 + assert abs(trigger.legs["tau"].pdg_id) == 15 + # match leg 1 + return trigger_object_matching(events.Tau, events.TrigObj[leg_masks["tau"]]) + + # is_any_cross_tau + # catch config errors + assert trigger.n_legs == len(leg_masks) >= 2 + assert abs(trigger.legs["tau1"].pdg_id) == 15 + assert abs(trigger.legs["tau2"].pdg_id) == 15 + + # match both legs + matches_leg0 = trigger_object_matching(events.Tau, events.TrigObj[leg_masks["tau1"]]) + matches_leg1 = trigger_object_matching(events.Tau, events.TrigObj[leg_masks["tau2"]]) + # taus need to be matched to at least one leg, but as a side condition + # each leg has to have at least one match to a tau + matches = ( + (matches_leg0 | matches_leg1) & + ak.any(matches_leg0, axis=1) & + ak.any(matches_leg1, axis=1) + ) + + return matches @tau_selection.init @@ -448,11 +382,13 @@ def tau_selection_init(self: Selector) -> None: @selector( uses={ - electron_selection, muon_selection, tau_selection, + electron_selection, electron_trigger_matching, muon_selection, muon_trigger_matching, + tau_selection, tau_trigger_matching, "event", "{Electron,Muon,Tau}.{charge,mass}", }, produces={ - electron_selection, muon_selection, tau_selection, + electron_selection, electron_trigger_matching, muon_selection, muon_trigger_matching, + tau_selection, tau_trigger_matching, # new columns "channel_id", "leptons_os", "tau2_isolated", "single_triggered", "cross_triggered", }, @@ -470,6 +406,7 @@ def lepton_selection( ch_etau = self.config_inst.get_channel("etau") ch_mutau = self.config_inst.get_channel("mutau") ch_tautau = self.config_inst.get_channel("tautau") + ch_ee = self.config_inst.get_channel("ee") ch_mumu = self.config_inst.get_channel("mumu") ch_emu = self.config_inst.get_channel("emu") @@ -480,12 +417,17 @@ def lepton_selection( leptons_os = false_mask single_triggered = false_mask cross_triggered = false_mask - empty_indices = events.Tau[:, :0].charge * 1 # ak.zeros_like(1 * events.event, dtype=np.uint16)[..., None][..., :0] - sel_electron_indices = empty_indices - sel_muon_indices = empty_indices - sel_tau_indices = empty_indices + sel_electron_mask = ak.full_like(events.Electron.pt, False, dtype=bool) + sel_muon_mask = ak.full_like(events.Muon.pt, False, dtype=bool) + sel_tau_mask = ak.full_like(events.Tau.pt, False, dtype=bool) leading_taus = events.Tau[:, :0] + # indices for sorting taus first by isolation, then by pt + # for this, combine iso and pt values, e.g. iso 255 and pt 32.3 -> 2550032.3 + f = 10**(np.ceil(np.log10(ak.max(events.Tau.pt))) + 2) + tau_sorting_key = events.Tau[f"raw{self.config_inst.x.tau_tagger}VSjet"] * f + events.Tau.pt + tau_sorting_indices = ak.argsort(tau_sorting_key, axis=-1, ascending=False) + # perform each lepton election step separately per trigger, avoid caching sel_kwargs = {**kwargs, "call_force": True} for trigger, trigger_fired, leg_masks in trigger_results.x.trigger_data: @@ -493,242 +435,350 @@ def lepton_selection( is_cross = trigger.has_tag("cross_trigger") # electron selection - electron_indices, electron_veto_indices = self[electron_selection]( + electron_mask, electron_veto_mask = self[electron_selection]( events, trigger, - leg_masks, **sel_kwargs, ) # muon selection - muon_indices, muon_veto_indices = self[muon_selection]( + muon_mask, muon_veto_mask = self[muon_selection]( events, trigger, - leg_masks, **sel_kwargs, ) # tau selection - tau_indices, tau_iso_mask = self[tau_selection]( + tau_mask, tau_iso_mask = self[tau_selection]( events, trigger, - leg_masks, - electron_indices, - muon_indices, + electron_mask, + muon_mask, **sel_kwargs, ) - # lepton pair selecton per trigger via lepton counting - + # conditions potentially leading to etau channel if trigger.has_tag({"single_e", "cross_e_tau"}) and ( self.dataset_inst.is_mc or self.dataset_inst.has_tag("etau") ): + # fold trigger matching into the selection + trig_electron_mask = ( + electron_mask & + self[electron_trigger_matching](events, trigger, leg_masks, **sel_kwargs) + ) + trig_tau_mask = tau_mask + if trigger.has_tag("cross_e_tau"): + trig_tau_mask = ( + trig_tau_mask & + self[tau_trigger_matching](events, trigger, leg_masks, **sel_kwargs) + ) + + # check if the most isolated tau among the selected ones is matched + first_tau_matched = ak.fill_none( + ak.firsts(trig_tau_mask[tau_sorting_indices[tau_mask[tau_sorting_indices]]], axis=1), + False, + ) + # expect 1 electron, 1 veto electron (the same one), 0 veto muons, and at least one tau + # without and with trigger matching on the default objects is_etau = ( trigger_fired & - (ak.num(electron_indices, axis=1) == 1) & - (ak.num(electron_veto_indices, axis=1) == 1) & - (ak.num(muon_veto_indices, axis=1) == 0) & - (ak.num(tau_indices, axis=1) >= 1) + (ak.sum(electron_mask, axis=1) == 1) & + (ak.sum(trig_electron_mask, axis=1) == 1) & + (ak.sum(electron_veto_mask, axis=1) == 1) & + (ak.sum(muon_veto_mask, axis=1) == 0) & + first_tau_matched ) - is_iso = ak.sum(tau_iso_mask, axis=1) >= 1 - # determine the os/ss charge sign relation - e_charge = ak.firsts(events.Electron[electron_indices].charge, axis=1) - tau_charge = ak.firsts(events.Tau[tau_indices].charge, axis=1) + + # get selected taus and sort them + # (this will be correct for events for which is_etau is actually True) + sorted_sel_taus = events.Tau[tau_sorting_indices][trig_tau_mask[tau_sorting_indices]] + # determine the relative charge and tau2 isolation + e_charge = ak.firsts(events.Electron[trig_electron_mask].charge, axis=1) + tau_charge = ak.firsts(sorted_sel_taus.charge, axis=1) is_os = e_charge == -tau_charge + is_iso = ak.sum(tau_iso_mask[trig_tau_mask], axis=1) >= 1 # store global variables channel_id = update_channel_ids(events, channel_id, ch_etau.id, is_etau) tau2_isolated = ak.where(is_etau, is_iso, tau2_isolated) leptons_os = ak.where(is_etau, is_os, leptons_os) single_triggered = ak.where(is_etau & is_single, True, single_triggered) cross_triggered = ak.where(is_etau & is_cross, True, cross_triggered) - sel_electron_indices = ak.where(is_etau, electron_indices, sel_electron_indices) - sel_tau_indices = ak.where(is_etau, tau_indices, sel_tau_indices) - leading_taus = ak.where(is_etau, events.Tau[tau_indices[:, :1]], leading_taus) + sel_electron_mask = ak.where(is_etau, trig_electron_mask, sel_electron_mask) + sel_tau_mask = ak.where(is_etau, trig_tau_mask, sel_tau_mask) + leading_taus = ak.where(is_etau, sorted_sel_taus[:, :1], leading_taus) - elif trigger.has_tag({"single_mu", "cross_mu_tau"}) and ( + # mutau channel + if trigger.has_tag({"single_mu", "cross_mu_tau"}) and ( self.dataset_inst.is_mc or self.dataset_inst.has_tag("mutau") ): + # fold trigger matching into the selection + trig_muon_mask = ( + muon_mask & + self[muon_trigger_matching](events, trigger, leg_masks, **sel_kwargs) + ) + trig_tau_mask = tau_mask + if trigger.has_tag("cross_e_tau"): + trig_tau_mask = ( + trig_tau_mask & + self[tau_trigger_matching](events, trigger, leg_masks, **sel_kwargs) + ) + + # check if the most isolated tau among the selected ones is matched + first_tau_matched = ak.fill_none( + ak.firsts(trig_tau_mask[tau_sorting_indices[tau_mask[tau_sorting_indices]]], axis=1), + False, + ) + # expect 1 muon, 1 veto muon (the same one), 0 veto electrons, and at least one tau + # without and with trigger matching on the default objects is_mutau = ( trigger_fired & - (ak.num(muon_indices, axis=1) == 1) & - (ak.num(muon_veto_indices, axis=1) == 1) & - (ak.num(electron_veto_indices, axis=1) == 0) & - (ak.num(tau_indices, axis=1) >= 1) + (ak.sum(muon_mask, axis=1) == 1) & + (ak.sum(trig_muon_mask, axis=1) == 1) & + (ak.sum(muon_veto_mask, axis=1) == 1) & + (ak.sum(electron_veto_mask, axis=1) == 0) & + first_tau_matched ) - is_iso = ak.sum(tau_iso_mask, axis=1) >= 1 - # determine the os/ss charge sign relation - mu_charge = ak.firsts(events.Muon[muon_indices].charge, axis=1) - tau_charge = ak.firsts(events.Tau[tau_indices].charge, axis=1) + + # get selected, sorted taus to obtain quantities + # (this will be correct for events for which is_mutau is actually True) + sorted_sel_taus = events.Tau[tau_sorting_indices][trig_tau_mask[tau_sorting_indices]] + # determine the relative charge and tau2 isolation + mu_charge = ak.firsts(events.Muon[trig_muon_mask].charge, axis=1) + tau_charge = ak.firsts(sorted_sel_taus.charge, axis=1) is_os = mu_charge == -tau_charge + is_iso = ak.sum(tau_iso_mask[trig_tau_mask], axis=1) >= 1 # store global variables channel_id = update_channel_ids(events, channel_id, ch_mutau.id, is_mutau) tau2_isolated = ak.where(is_mutau, is_iso, tau2_isolated) leptons_os = ak.where(is_mutau, is_os, leptons_os) single_triggered = ak.where(is_mutau & is_single, True, single_triggered) cross_triggered = ak.where(is_mutau & is_cross, True, cross_triggered) - sel_muon_indices = ak.where(is_mutau, muon_indices, sel_muon_indices) - sel_tau_indices = ak.where(is_mutau, tau_indices, sel_tau_indices) - leading_taus = ak.where(is_mutau, events.Tau[tau_indices[:, :1]], leading_taus) + sel_muon_mask = ak.where(is_mutau, trig_muon_mask, sel_muon_mask) + sel_tau_mask = ak.where(is_mutau, trig_tau_mask, sel_tau_mask) + leading_taus = ak.where(is_mutau, sorted_sel_taus[:, :1], leading_taus) - elif trigger.has_tag({"cross_tau_tau", "cross_tau_tau_vbf", "cross_tau_tau_jet"}) and ( - self.dataset_inst.is_mc or - self.dataset_inst.has_tag("tautau") + # tautau channel + if ( + trigger.has_tag({"cross_tau_tau", "cross_tau_tau_vbf", "cross_tau_tau_jet"}) and + (self.dataset_inst.is_mc or self.dataset_inst.has_tag("tautau")) ): + # fold trigger matching into the selection + trig_tau_mask = ( + tau_mask & + self[tau_trigger_matching](events, trigger, leg_masks, **sel_kwargs) + ) + + # check if the two leading (most isolated) taus are matched + leading_taus_matched = ak.fill_none( + ak.firsts(trig_tau_mask[tau_sorting_indices[tau_mask[tau_sorting_indices]]], axis=1) & + ak.firsts(trig_tau_mask[tau_sorting_indices[tau_mask[tau_sorting_indices]]][:, 1:], axis=1), + False, + ) + # expect 0 veto electrons, 0 veto muons and at least two taus of which one is isolated is_tautau = ( trigger_fired & - (ak.num(electron_veto_indices, axis=1) == 0) & - (ak.num(muon_veto_indices, axis=1) == 0) & - (ak.num(tau_indices, axis=1) >= 2) & - (ak.sum(tau_iso_mask, axis=1) >= 1) + (ak.sum(electron_veto_mask, axis=1) == 0) & + (ak.sum(muon_veto_mask, axis=1) == 0) & + leading_taus_matched ) + + # TODO: not clear if this is required in the first place, discuss with cclub # special case for cross tau vbf trigger: # to avoid overlap, with non-vbf triggers, only one tau is allowed to have pt > 40 - if trigger.has_tag("cross_tau_tau_vbf"): - is_tautau = is_tautau & (ak.sum(events.Tau[tau_indices].pt > 40, axis=1) <= 1) - is_iso = ak.sum(tau_iso_mask, axis=1) >= 2 - # tau_indices are sorted by highest isolation as cond. 1 and highest pt as cond. 2, so - # the first two indices are exactly those selected by the full-blown pairing algorithm - # and there is no need here to apply it again :) - # determine the os/ss charge sign relation - tau1_charge = ak.firsts(events.Tau[tau_indices].charge, axis=1) - tau2_charge = ak.firsts(events.Tau[tau_indices].charge[..., 1:], axis=1) + # if trigger.has_tag("cross_tau_tau_vbf"): + # is_tautau = is_tautau & (ak.sum(events.Tau[tau_indices].pt > 40, axis=1) <= 1) + + # get selected, sorted taus to obtain quantities + # (this will be correct for events for which is_tautau is actually True) + sorted_sel_taus = events.Tau[tau_sorting_indices][trig_tau_mask[tau_sorting_indices]] + # determine the relative charge and tau2 isolation + tau1_charge = ak.firsts(sorted_sel_taus.charge, axis=1) + tau2_charge = ak.firsts(sorted_sel_taus.charge[:, 1:], axis=1) is_os = tau1_charge == -tau2_charge + is_iso = ak.sum(tau_iso_mask[trig_tau_mask], axis=1) >= 2 # store global variables channel_id = update_channel_ids(events, channel_id, ch_tautau.id, is_tautau) tau2_isolated = ak.where(is_tautau, is_iso, tau2_isolated) leptons_os = ak.where(is_tautau, is_os, leptons_os) single_triggered = ak.where(is_tautau & is_single, True, single_triggered) cross_triggered = ak.where(is_tautau & is_cross, True, cross_triggered) - sel_tau_indices = ak.where(is_tautau, tau_indices, sel_tau_indices) - leading_taus = ak.where(is_tautau, events.Tau[tau_indices[:, :2]], leading_taus) + sel_tau_mask = ak.where(is_tautau, trig_tau_mask, sel_tau_mask) + leading_taus = ak.where(is_tautau, sorted_sel_taus[:, :2], leading_taus) - # control regions - if trigger.has_tag({"single_mu"}) and ( + # ee channel + if trigger.has_tag("single_e") and ( + self.dataset_inst.is_mc or + self.dataset_inst.has_tag("ee") + ): + # fold trigger matching into the selection + trig_electron_mask = ( + electron_mask & + self[electron_trigger_matching](events, trigger, leg_masks, **sel_kwargs) + ) + + # check if the first (hardest) electron matched + electron_sorting_indices = ak.argsort(events.Electron.pt, axis=1, ascending=False) + leading_electron_matched = ak.fill_none( + ak.firsts(trig_electron_mask[electron_sorting_indices[electron_mask[electron_sorting_indices]]], axis=1), # noqa: E501 + False, + ) + + # expect 2 electrons, 2 veto electrons, 0 veto muons, and ignore the taus + is_ee = ( + trigger_fired & + (ak.sum(electron_mask, axis=1) == 2) & + leading_electron_matched & + (ak.sum(electron_veto_mask, axis=1) == 2) & + (ak.sum(muon_veto_mask, axis=1) == 0) + ) + + # get selected, sorted electrons to obtain quantities + # (this will be correct for events for which is_ee is actually True) + sorted_sel_electrons = events.Electron[electron_sorting_indices][electron_mask[electron_sorting_indices]] + # determine the relative charge + e1_charge = ak.firsts(sorted_sel_electrons.charge, axis=1) + e2_charge = ak.firsts(sorted_sel_electrons.charge[:, 1:], axis=1) + is_os = e1_charge == -e2_charge + # store global variables + channel_id = update_channel_ids(events, channel_id, ch_ee.id, is_ee) + leptons_os = ak.where(is_ee, is_os, leptons_os) + single_triggered = ak.where(is_ee & is_single, True, single_triggered) + cross_triggered = ak.where(is_ee & is_cross, True, cross_triggered) + sel_electron_mask = ak.where(is_ee, electron_mask, sel_electron_mask) + + # mumu channel + if trigger.has_tag("single_mu") and ( self.dataset_inst.is_mc or self.dataset_inst.has_tag("mumu") ): - first_muon_indices, second_muon_indices, muon_veto_indices = self[muon_selection]( - events, - trigger, - leg_masks, - mumu_selection=True, - **sel_kwargs, + # fold trigger matching into the selection + trig_muon_mask = ( + muon_mask & + self[muon_trigger_matching](events, trigger, leg_masks, **sel_kwargs) ) - mumu_muon_indices = ak.concatenate([first_muon_indices, second_muon_indices], axis=1) + # check if the first (hardest) muon matched + muon_sorting_indices = ak.argsort(events.Muon.pt, axis=1, ascending=False) + leading_muon_matched = ak.fill_none( + ak.firsts(trig_muon_mask[muon_sorting_indices[muon_mask[muon_sorting_indices]]], axis=1), + False, + ) # expect 2 muons, 2 veto muons, 0 veto electrons, and ignore the taus is_mumu = ( trigger_fired & - (ak.num(first_muon_indices, axis=1) == 1) & - (ak.num(second_muon_indices, axis=1) == 1) & - (ak.num(muon_veto_indices, axis=1) == 2) & - (ak.num(electron_veto_indices, axis=1) == 0) & - (ak.num(tau_indices, axis=1) >= 0) # to remove? + (ak.sum(muon_mask, axis=1) == 2) & + leading_muon_matched & + (ak.sum(muon_veto_mask, axis=1) == 2) & + (ak.sum(electron_veto_mask, axis=1) == 0) ) - # store necessary global variables - channel_id = update_channel_ids(events, channel_id, ch_mumu.id, is_mumu) - sel_muon_indices = ak.where(is_mumu, mumu_muon_indices, sel_muon_indices) - single_triggered = ak.where(is_mumu & is_single, True, single_triggered) - cross_triggered = ak.where(is_mumu & is_cross, True, cross_triggered) - # define fake iso regions for mumu, as there is not necessarily a tau to be isolated - # should be always false, as no tau are used - is_iso = ak.sum(tau_iso_mask, axis=1) < 0 - # determine the os/ss charge sign relation - mu1_charge = ak.firsts(events.Muon[muon_indices].charge, axis=1) - mu2_charge = ak.firsts(events.Muon[muon_indices].charge[..., 1:], axis=1) + # get selected, sorted muons to obtain quantities + # (this will be correct for events for which is_mumu is actually True) + sorted_sel_muons = events.Muon[muon_sorting_indices][muon_mask[muon_sorting_indices]] + # determine the relative charge + mu1_charge = ak.firsts(sorted_sel_muons.charge, axis=1) + mu2_charge = ak.firsts(sorted_sel_muons.charge[:, 1:], axis=1) is_os = mu1_charge == -mu2_charge # store global variables - tau2_isolated = ak.where(is_mumu, is_iso, tau2_isolated) + channel_id = update_channel_ids(events, channel_id, ch_mumu.id, is_mumu) leptons_os = ak.where(is_mumu, is_os, leptons_os) + single_triggered = ak.where(is_mumu & is_single, True, single_triggered) + cross_triggered = ak.where(is_mumu & is_cross, True, cross_triggered) + sel_muon_mask = ak.where(is_mumu, muon_mask, sel_muon_mask) # emu channel if ( - (trigger.has_tag({"single_e"}) and (self.dataset_inst.is_mc or self.dataset_inst.has_tag("emu_from_e"))) or - (trigger.has_tag({"single_mu"}) and (self.dataset_inst.is_mc or self.dataset_inst.has_tag("emu_from_mu"))) + (emu_from_e := ( + trigger.has_tag("single_e") and + (self.dataset_inst.is_mc or self.dataset_inst.has_tag("emu_from_e")) + )) or ( + trigger.has_tag("single_mu") and + (self.dataset_inst.is_mc or self.dataset_inst.has_tag("emu_from_mu")) + ) ): - # behavior for Single Muon dataset - if trigger.has_tag({"single_mu"}) and (self.dataset_inst.is_mc or self.dataset_inst.has_tag("emu_from_mu")): - for trigger_emu, trigger_fired_emu, leg_masks_emu in trigger_results.x.trigger_data: - # verify that the single electron trigger is matched if the single electron trigger is fired - # if not, the matching is not verified in the selection. - # This is done by giving the electron selection the trigger_fired_mask - - # TODO: handle the case where there are several single e triggers (maybe not necessary?) - # as of now, only the last single electron trigger in the list of triggers applied to the dataset - # is used - if trigger_emu.has_tag("single_e"): - electron_indices, electron_veto_indices = self[electron_selection]( - events, - trigger_emu, - # TODO: this was leg_masks before, why? - leg_masks_emu, - trigger_fire_list=trigger_fired_emu, - **sel_kwargs, - ) - not_muon_in_e_trigger_fired = True - - # behavior for Single Electron dataset - elif trigger.has_tag({"single_e"}) and (self.dataset_inst.is_mc or self.dataset_inst.has_tag("emu_from_e")): - muon_indices, muon_veto_indices = self[muon_selection]( - events, - trigger, - leg_masks, - select_without_trigger=True, - **sel_kwargs, + if emu_from_e: + emu_electron_mask = electron_mask + # fold trigger matching into the selection + trig_electron_mask = ( + electron_mask & + self[electron_trigger_matching](events, trigger, leg_masks, **sel_kwargs) ) - muon_triggers_fire_list = [] - for trigger_emu, trigger_fired_emu, leg_masks_emu in trigger_results.x.trigger_data: - if trigger_emu.has_tag("single_mu"): - muon_triggers_fire_list += [trigger_fired_emu] - muon_trigger_fired = reduce( - or_, - muon_triggers_fire_list, + # for muons, loop over triggers, find single triggers and make sure none of them + # fired in order to avoid double counting + emu_muon_mask = False + mu_trig_fired = ak.full_like(events.event, False, dtype=bool) + for _trigger, _trigger_fired, _ in trigger_results.x.trigger_data: + if not _trigger.has_tag("single_mu"): + continue + # evaluate the muon selection once (it is the same for all single triggers) + if emu_muon_mask is False: + emu_muon_mask, _ = self[muon_selection](events, _trigger, **sel_kwargs) + # store the trigger decision + mu_trig_fired = mu_trig_fired | _trigger_fired + # muons obey the trigger rules if no single trigger fired + trig_muon_mask = emu_muon_mask & ~mu_trig_fired + + else: + emu_muon_mask = muon_mask + # fold trigger matching into the selection + trig_muon_mask = ( + muon_mask & + self[muon_trigger_matching](events, trigger, leg_masks, **sel_kwargs) ) - not_muon_in_e_trigger_fired = ~muon_trigger_fired + # for electrons, loop over triggers, find single triggers and check the matching + # only in case a trigger fired + emu_electron_mask = False + e_trig_fired = ak.full_like(events.event, False, dtype=bool) + e_match_mask = ak.full_like(events.Electron.pt, False, dtype=bool) + for _trigger, _trigger_fired, _leg_masks in trigger_results.x.trigger_data: + if not _trigger.has_tag("single_e"): + continue + # evaluate the electron selection once (it is the same for all single triggers) + if emu_electron_mask is False: + emu_electron_mask, _ = self[electron_selection](events, _trigger, **sel_kwargs) + # store the trigger decision + e_trig_fired = e_trig_fired | _trigger_fired + # evaluate the matching + e_match_mask = e_match_mask | ( + self[electron_trigger_matching](events, _trigger, _leg_masks, **sel_kwargs) & + _trigger_fired + ) + # for events in which no single e trigger fired, consider the matching as successful + e_match_mask = e_match_mask | ~e_trig_fired + trig_electron_mask = emu_electron_mask & e_match_mask - # general emu channel selection # expect 1 electron, 1 muon, 1 veto electron, 1 veto muon, and ignore taus is_emu = ( - trigger_fired & not_muon_in_e_trigger_fired & - (ak.num(electron_indices, axis=1) == 1) & - (ak.num(electron_veto_indices, axis=1) == 1) & - (ak.num(muon_indices, axis=1) == 1) & - (ak.num(muon_veto_indices, axis=1) == 1) & - (ak.num(tau_indices, axis=1) >= 0) # to remove? + trigger_fired & + (ak.sum(emu_electron_mask, axis=1) == 1) & + (ak.sum(trig_electron_mask, axis=1) == 1) & + (ak.sum(electron_veto_mask, axis=1) == 1) & + (ak.sum(emu_muon_mask, axis=1) == 1) & + (ak.sum(trig_muon_mask, axis=1) == 1) & + (ak.sum(muon_veto_mask, axis=1) == 1) ) - # store necessary global variables + # determine the relative charge + e_charge = ak.firsts(events.Electron[trig_electron_mask].charge, axis=1) + mu_charge = ak.firsts(events.Muon[trig_muon_mask].charge, axis=1) + is_os = e_charge == -mu_charge + # store global variables channel_id = update_channel_ids(events, channel_id, ch_emu.id, is_emu) - sel_electron_indices = ak.where(is_emu, electron_indices, sel_electron_indices) - sel_muon_indices = ak.where(is_emu, muon_indices, sel_muon_indices) + leptons_os = ak.where(is_emu, is_os, leptons_os) single_triggered = ak.where(is_emu & is_single, True, single_triggered) cross_triggered = ak.where(is_emu & is_cross, True, cross_triggered) - - # define fake iso regions for emu, as there is not necessarily a tau to be isolated - # should be always false, as no tau are used - is_iso = ak.sum(tau_iso_mask, axis=1) < 0 - # determine the os/ss charge sign relation - mu_charge = ak.firsts(events.Muon[muon_indices].charge, axis=1) - e_charge = ak.firsts(events.Electron[electron_indices].charge, axis=1) - is_os = mu_charge == -e_charge - # store global variables - tau2_isolated = ak.where(is_emu, is_iso, tau2_isolated) - leptons_os = ak.where(is_emu, is_os, leptons_os) - # print("number events in emu channel", ak.sum(is_emu)) + sel_electron_mask = ak.where(is_emu, trig_electron_mask, sel_electron_mask) + sel_muon_mask = ak.where(is_emu, trig_muon_mask, sel_muon_mask) # some final type conversions channel_id = ak.values_astype(channel_id, np.uint8) leptons_os = ak.fill_none(leptons_os, False) - sel_electron_indices = ak.values_astype(sel_electron_indices, np.int32) - sel_muon_indices = ak.values_astype(sel_muon_indices, np.int32) - sel_tau_indices = ak.values_astype(sel_tau_indices, np.int32) # save new columns events = set_ak_column(events, "channel_id", channel_id) @@ -737,6 +787,11 @@ def lepton_selection( events = set_ak_column(events, "single_triggered", single_triggered) events = set_ak_column(events, "cross_triggered", cross_triggered) + # convert lepton masks to sorted indices (pt for e/mu, iso for tau) + sel_electron_indices = sorted_indices_from_mask(sel_electron_mask, events.Electron.pt, ascending=False) + sel_muon_indices = sorted_indices_from_mask(sel_muon_mask, events.Muon.pt, ascending=False) + sel_tau_indices = sorted_indices_from_mask(sel_tau_mask, tau_sorting_key, ascending=False) + return events, SelectionResult( steps={ "lepton": channel_id != 0, @@ -769,3 +824,9 @@ def lepton_selection( "leading_taus": leading_taus, }, ) + + +@lepton_selection.init +def lepton_selection_init(self: Selector) -> None: + # add column to load the raw tau tagger score + self.uses.add(f"Tau.raw{self.config_inst.x.tau_tagger}VSjet") From 73d231ca990af7d507dded2323eee7f2bb6cd920 Mon Sep 17 00:00:00 2001 From: "Marcel R." Date: Wed, 8 Jan 2025 19:53:14 +0100 Subject: [PATCH 04/20] Improve trigger leg matching. --- hbt/selection/jet.py | 9 ++-- hbt/selection/lepton.py | 98 ++++++++++++++++++++++++++++------------- modules/columnflow | 2 +- 3 files changed, 74 insertions(+), 35 deletions(-) diff --git a/hbt/selection/jet.py b/hbt/selection/jet.py index a0afcca8..18f09161 100644 --- a/hbt/selection/jet.py +++ b/hbt/selection/jet.py @@ -11,6 +11,7 @@ from columnflow.selection.cms.jets import jet_veto_map from columnflow.columnar_util import ( EMPTY_FLOAT, set_ak_column, sorted_indices_from_mask, mask_from_indices, flat_np_view, + full_like, ) from columnflow.util import maybe_import, InsertableDict @@ -185,7 +186,7 @@ def jet_selection( # should be done and should therefore be ignored. # create a mask to select tautau events that were only triggered by a tau-tau-jet cross trigger - false_mask = ak.full_like(events.event, False, dtype=bool) + false_mask = full_like(events.event, False, dtype=bool) ttj_mask = ( (events.channel_id == 3) & ~ak.any(reduce(or_, [(events.trigger_ids == tid) for tid in self.trigger_ids_ttc], false_mask), axis=1) & @@ -195,7 +196,7 @@ def jet_selection( # only perform this special treatment when applicable if ak.any(ttj_mask): # check which jets can be matched to any of the jet legs - matching_mask = ak.full_like(events.Jet.pt[ttj_mask], False, dtype=bool) + matching_mask = full_like(events.Jet.pt[ttj_mask], False, dtype=bool) for trigger, _, leg_masks in trigger_results.x.trigger_data: if trigger.id in self.trigger_ids_ttjc: trig_objs = events.TrigObj[leg_masks["jet"]] @@ -246,7 +247,7 @@ def jet_selection( # insert back into the full hhbjet_mask flat_hhbjet_mask = flat_np_view(hhbjet_mask) - flat_jet_mask = ak.flatten(ak.full_like(events.Jet.pt, False, dtype=bool) | ttj_mask) + flat_jet_mask = ak.flatten(full_like(events.Jet.pt, False, dtype=bool) | ttj_mask) flat_hhbjet_mask[flat_jet_mask] = ak.flatten(sel_hhbjet_mask) # validate that either none or two hhbjets were identified @@ -306,7 +307,7 @@ def jet_selection( # extra requirements for events for which only the tau tau vbf cross trigger fired cross_vbf_ids = [t.id for t in self.config_inst.x.triggers if t.has_tag("cross_tau_tau_vbf")] if not cross_vbf_ids: - cross_vbf_mask = ak.full_like(1 * events.event, False, dtype=bool) + cross_vbf_mask = full_like(1 * events.event, False, dtype=bool) else: cross_vbf_masks = [events.trigger_ids == tid for tid in cross_vbf_ids] # This combines "at least one cross trigger is fired" and "no other triggers are fired" diff --git a/hbt/selection/lepton.py b/hbt/selection/lepton.py index 6c04ec7b..899db374 100644 --- a/hbt/selection/lepton.py +++ b/hbt/selection/lepton.py @@ -12,7 +12,9 @@ from functools import reduce from columnflow.selection import Selector, SelectionResult, selector -from columnflow.columnar_util import set_ak_column, sorted_indices_from_mask +from columnflow.columnar_util import ( + set_ak_column, sorted_indices_from_mask, flat_np_view, full_like, +) from columnflow.util import maybe_import from hbt.util import IF_NANO_V9, IF_NANO_GE_V10 @@ -28,21 +30,36 @@ def trigger_object_matching( vectors1: ak.Array, vectors2: ak.Array, + /, + *, threshold: float = 0.5, axis: int = 2, + event_mask: ak.Array | type(Ellipsis) | None = None, ) -> ak.Array: """ Helper to check per object in *vectors1* if there is at least one object in *vectors2* that leads to a delta R metric below *threshold*. The final reduction is applied over *axis* of the - resulting metric table containing the full combinatorics. When *return_all_matches* is *True*, - the matrix with all matching decisions is returned as well. + resulting metric table containing the full combinatorics. If an *event_mask* is given, the + the matching is performed only for those events, but a full object mask with the same shape as + that of *vectors1* is returned, which all objects set to *False* where not matching was done. """ + # handle event masks + used_event_mask = event_mask is not None and event_mask is not Ellipsis + event_mask = Ellipsis if event_mask is None else event_mask + # delta_r for all combinations - dr = vectors1.metric_table(vectors2) + dr = vectors1[event_mask].metric_table(vectors2[event_mask]) # check per element in vectors1 if there is at least one matching element in vectors2 any_match = ak.any(dr < threshold, axis=axis) + # expand to original shape if an event mask was given + if used_event_mask: + full_any_match = full_like(vectors1.pt, False, dtype=bool) + flat_full_any_match = flat_np_view(full_any_match) + flat_full_any_match[flat_np_view(full_any_match | event_mask)] = flat_np_view(any_match) + any_match = full_any_match + return any_match @@ -134,13 +151,13 @@ def electron_trigger_matching( self: Selector, events: ak.Array, trigger: Trigger, + trigger_fired: ak.Array, leg_masks: dict[str, ak.Array], **kwargs, ) -> tuple[ak.Array]: """ Electron trigger matching. """ - # TODO: only perform this for events where the trigger fired, needs more info and flat_views is_single = trigger.has_tag("single_e") is_cross = trigger.has_tag("cross_e_tau") @@ -149,7 +166,11 @@ def electron_trigger_matching( assert trigger.n_legs == len(leg_masks) == (1 if is_single else 2) assert abs(trigger.legs["e"].pdg_id) == 11 - return trigger_object_matching(events.Electron, events.TrigObj[leg_masks["e"]]) + return trigger_object_matching( + events.Electron, + events.TrigObj[leg_masks["e"]], + event_mask=trigger_fired, + ) @selector( @@ -211,13 +232,13 @@ def muon_trigger_matching( self: Selector, events: ak.Array, trigger: Trigger, + trigger_fired: ak.Array, leg_masks: dict[str, ak.Array], **kwargs, ) -> tuple[ak.Array]: """ Muon trigger matching. """ - # TODO: only perform this for events where the trigger fired, needs more info and flat_views is_single = trigger.has_tag("single_mu") is_cross = trigger.has_tag("cross_mu_tau") @@ -226,7 +247,11 @@ def muon_trigger_matching( assert trigger.n_legs == len(leg_masks) == (1 if is_single else 2) assert abs(trigger.legs["mu"].pdg_id) == 13 - return trigger_object_matching(events.Muon, events.TrigObj[leg_masks["mu"]]) + return trigger_object_matching( + events.Muon, + events.TrigObj[leg_masks["mu"]], + event_mask=trigger_fired, + ) @selector( @@ -253,7 +278,7 @@ def tau_selection( # return empty mask if no tagged taus exists in the chunk if ak.all(ak.num(events.Tau) == 0): logger.info("no taus found in event chunk") - false_mask = ak.full_like(events.Tau.pt, False, dtype=bool) + false_mask = full_like(events.Tau.pt, False, dtype=bool) return false_mask, false_mask is_single_e = trigger.has_tag("single_e") @@ -317,16 +342,16 @@ def tau_trigger_matching( self: Selector, events: ak.Array, trigger: Trigger, + trigger_fired: ak.Array, leg_masks: dict[str, ak.Array], **kwargs, ) -> tuple[ak.Array]: """ Tau trigger matching. """ - # TODO: only perform this for events where the trigger fired, needs more info and flat_views if ak.all(ak.num(events.Tau) == 0): logger.info("no taus found in event chunk") - return ak.full_like(events.Tau.pt, False, dtype=bool) + return full_like(events.Tau.pt, False, dtype=bool) is_cross_e = trigger.has_tag("cross_e_tau") is_cross_mu = trigger.has_tag("cross_mu_tau") @@ -342,7 +367,11 @@ def tau_trigger_matching( assert trigger.n_legs == len(leg_masks) == 2 assert abs(trigger.legs["tau"].pdg_id) == 15 # match leg 1 - return trigger_object_matching(events.Tau, events.TrigObj[leg_masks["tau"]]) + return trigger_object_matching( + events.Tau, + events.TrigObj[leg_masks["tau"]], + event_mask=trigger_fired, + ) # is_any_cross_tau # catch config errors @@ -351,8 +380,17 @@ def tau_trigger_matching( assert abs(trigger.legs["tau2"].pdg_id) == 15 # match both legs - matches_leg0 = trigger_object_matching(events.Tau, events.TrigObj[leg_masks["tau1"]]) - matches_leg1 = trigger_object_matching(events.Tau, events.TrigObj[leg_masks["tau2"]]) + matches_leg0 = trigger_object_matching( + events.Tau, + events.TrigObj[leg_masks["tau1"]], + event_mask=trigger_fired, + ) + matches_leg1 = trigger_object_matching( + events.Tau, + events.TrigObj[leg_masks["tau2"]], + event_mask=trigger_fired, + ) + # taus need to be matched to at least one leg, but as a side condition # each leg has to have at least one match to a tau matches = ( @@ -417,9 +455,9 @@ def lepton_selection( leptons_os = false_mask single_triggered = false_mask cross_triggered = false_mask - sel_electron_mask = ak.full_like(events.Electron.pt, False, dtype=bool) - sel_muon_mask = ak.full_like(events.Muon.pt, False, dtype=bool) - sel_tau_mask = ak.full_like(events.Tau.pt, False, dtype=bool) + sel_electron_mask = full_like(events.Electron.pt, False, dtype=bool) + sel_muon_mask = full_like(events.Muon.pt, False, dtype=bool) + sel_tau_mask = full_like(events.Tau.pt, False, dtype=bool) leading_taus = events.Tau[:, :0] # indices for sorting taus first by isolation, then by pt @@ -465,13 +503,13 @@ def lepton_selection( # fold trigger matching into the selection trig_electron_mask = ( electron_mask & - self[electron_trigger_matching](events, trigger, leg_masks, **sel_kwargs) + self[electron_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs) ) trig_tau_mask = tau_mask if trigger.has_tag("cross_e_tau"): trig_tau_mask = ( trig_tau_mask & - self[tau_trigger_matching](events, trigger, leg_masks, **sel_kwargs) + self[tau_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs) ) # check if the most isolated tau among the selected ones is matched @@ -517,13 +555,13 @@ def lepton_selection( # fold trigger matching into the selection trig_muon_mask = ( muon_mask & - self[muon_trigger_matching](events, trigger, leg_masks, **sel_kwargs) + self[muon_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs) ) trig_tau_mask = tau_mask if trigger.has_tag("cross_e_tau"): trig_tau_mask = ( trig_tau_mask & - self[tau_trigger_matching](events, trigger, leg_masks, **sel_kwargs) + self[tau_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs) ) # check if the most isolated tau among the selected ones is matched @@ -569,7 +607,7 @@ def lepton_selection( # fold trigger matching into the selection trig_tau_mask = ( tau_mask & - self[tau_trigger_matching](events, trigger, leg_masks, **sel_kwargs) + self[tau_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs) ) # check if the two leading (most isolated) taus are matched @@ -618,7 +656,7 @@ def lepton_selection( # fold trigger matching into the selection trig_electron_mask = ( electron_mask & - self[electron_trigger_matching](events, trigger, leg_masks, **sel_kwargs) + self[electron_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs) ) # check if the first (hardest) electron matched @@ -659,7 +697,7 @@ def lepton_selection( # fold trigger matching into the selection trig_muon_mask = ( muon_mask & - self[muon_trigger_matching](events, trigger, leg_masks, **sel_kwargs) + self[muon_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs) ) # check if the first (hardest) muon matched @@ -707,12 +745,12 @@ def lepton_selection( # fold trigger matching into the selection trig_electron_mask = ( electron_mask & - self[electron_trigger_matching](events, trigger, leg_masks, **sel_kwargs) + self[electron_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs) ) # for muons, loop over triggers, find single triggers and make sure none of them # fired in order to avoid double counting emu_muon_mask = False - mu_trig_fired = ak.full_like(events.event, False, dtype=bool) + mu_trig_fired = full_like(events.event, False, dtype=bool) for _trigger, _trigger_fired, _ in trigger_results.x.trigger_data: if not _trigger.has_tag("single_mu"): continue @@ -729,13 +767,13 @@ def lepton_selection( # fold trigger matching into the selection trig_muon_mask = ( muon_mask & - self[muon_trigger_matching](events, trigger, leg_masks, **sel_kwargs) + self[muon_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs) ) # for electrons, loop over triggers, find single triggers and check the matching # only in case a trigger fired emu_electron_mask = False - e_trig_fired = ak.full_like(events.event, False, dtype=bool) - e_match_mask = ak.full_like(events.Electron.pt, False, dtype=bool) + e_trig_fired = full_like(events.event, False, dtype=bool) + e_match_mask = full_like(events.Electron.pt, False, dtype=bool) for _trigger, _trigger_fired, _leg_masks in trigger_results.x.trigger_data: if not _trigger.has_tag("single_e"): continue @@ -746,7 +784,7 @@ def lepton_selection( e_trig_fired = e_trig_fired | _trigger_fired # evaluate the matching e_match_mask = e_match_mask | ( - self[electron_trigger_matching](events, _trigger, _leg_masks, **sel_kwargs) & + self[electron_trigger_matching](events, _trigger, _trigger_fired, _leg_masks, **sel_kwargs) & _trigger_fired ) # for events in which no single e trigger fired, consider the matching as successful diff --git a/modules/columnflow b/modules/columnflow index c58104f5..154e7b0b 160000 --- a/modules/columnflow +++ b/modules/columnflow @@ -1 +1 @@ -Subproject commit c58104f593588a6c5393b9ed19c0604f431a7078 +Subproject commit 154e7b0bbf0ff2a1e2c3263923cf42c5f3e48da1 From 02c15879657e2dd743304357dfabb5063aece098 Mon Sep 17 00:00:00 2001 From: "Marcel R." Date: Thu, 9 Jan 2025 08:46:00 +0100 Subject: [PATCH 05/20] Update cf. --- modules/columnflow | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/columnflow b/modules/columnflow index 154e7b0b..701b8b36 160000 --- a/modules/columnflow +++ b/modules/columnflow @@ -1 +1 @@ -Subproject commit 154e7b0bbf0ff2a1e2c3263923cf42c5f3e48da1 +Subproject commit 701b8b36b8b962c16c643397a7388a399dbebe13 From cd179d148069ea81df946aa4a6370dbc974f857b Mon Sep 17 00:00:00 2001 From: "Marcel R." Date: Thu, 9 Jan 2025 11:20:07 +0100 Subject: [PATCH 06/20] Fix sync_mode. --- hbt/config/configs_hbt.py | 14 +++++++------- modules/cmsdb | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/hbt/config/configs_hbt.py b/hbt/config/configs_hbt.py index fadd094b..06581b14 100644 --- a/hbt/config/configs_hbt.py +++ b/hbt/config/configs_hbt.py @@ -330,13 +330,12 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str] *if_era(year=2023, tag="postBPix", values=[ f"data_{stream}_d{v}" for stream in ["mu", "e", "tau", "met"] for v in "12" ]), - - # sync - *if_era(year=[2022, 2023], sync=True, values=[ - "hh_ggf_hbb_htt_kl1_kt1_powheg", - ]), ] for dataset_name in dataset_names: + # skip when in sync mode and not exiting + if sync_mode and not campaign.has_dataset(dataset_name): + continue + # add the dataset dataset = cfg.add_dataset(campaign.get_dataset(dataset_name)) @@ -353,7 +352,7 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str] dataset.add_tag({"has_top", "single_top", "st"}) if dataset.name.startswith("dy_"): dataset.add_tag("dy") - if re.match(r"^(ww|wz|zz)_.*pythia$", dataset.name): + if re.match(r"^(ww|wz|zz)_.*_pythia$", dataset.name): dataset.add_tag("no_lhe_weights") if dataset_name.startswith("hh_"): dataset.add_tag("signal") @@ -388,7 +387,8 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str] info.n_files = 1 # verify that the root process of each dataset is part of any of the registered processes - verify_config_processes(cfg, warn=True) + if not sync_mode: + verify_config_processes(cfg, warn=True) ################################################################################################ # task defaults and groups diff --git a/modules/cmsdb b/modules/cmsdb index 54953603..ccac9ae6 160000 --- a/modules/cmsdb +++ b/modules/cmsdb @@ -1 +1 @@ -Subproject commit 549536034fdd0b20efbddc1adf4403a3713161ab +Subproject commit ccac9ae6e3d7f761e3dcb8bc7109712d797c33f6 From b52f7acbd6ad58ac84c4e127d8c241897985b4d2 Mon Sep 17 00:00:00 2001 From: "Marcel R." Date: Thu, 9 Jan 2025 16:52:45 +0100 Subject: [PATCH 07/20] Update cf. --- hbt/selection/jet.py | 85 -------------------------------------------- modules/columnflow | 2 +- 2 files changed, 1 insertion(+), 86 deletions(-) diff --git a/hbt/selection/jet.py b/hbt/selection/jet.py index 18f09161..9478ff4e 100644 --- a/hbt/selection/jet.py +++ b/hbt/selection/jet.py @@ -86,91 +86,6 @@ def jet_selection( # hhb-jet identification # - # # get the hhbtag values per jet per event - # hhbtag_scores = self[hhbtag](events, default_mask, lepton_results.x.lepton_pair, **kwargs) - - # # get the score indices of each hhbtag value - # score_indices = ak.argsort(hhbtag_scores, axis=1, ascending=False) - - # # only consider tautau events for which the tau_tau_jet trigger fired and no other tau tau trigger - # trigger_mask = ( - # (events.channel_id == 3) & - # ~ak.any((events.trigger_ids == 505) | (events.trigger_ids == 603), axis=1) & - # ak.any(events.trigger_ids == 701, axis=1) - # ) - - # # score (and local index) of the hhbtag for each jet coming from events which passed the trigger mask - # sel_score_indices = score_indices[trigger_mask] - # sel_score_indices_li = ak.local_index(sel_score_indices, axis=1) - - # # ids of the objects which fired the jet leg of the cross_tau_tau_jet trigger - # for trigger, trigger_fired, leg_masks in trigger_results.x.trigger_data: - # if trigger.has_tag("cross_tau_tau_jet"): - # # Zip the trigger legs with their corresponding masks - # for leg, mask in zip(trigger.legs, leg_masks): - # if leg.pdg_id == 1: - # obj_ids = mask[trigger_mask] - - # # trigger objects corresponding to the above ids - # sel_trig_objs = events.TrigObj[trigger_mask][obj_ids] - - # # mask that checks wheather or not the selected hhbjets *matches* the trigger object with delta R < 0.5 - # matching_mask = trigger_object_matching(events[trigger_mask].Jet[sel_score_indices], sel_trig_objs) - - # # index of the highest scored hhbjet *matching* the trigger object - # sel_hhbjet_idx = ak.argmax(matching_mask, axis=1) - - # # create mask to remove all jets except the highest scored hhbjet *matching* the trigger object - # unselected_hhbjet_idx = sel_score_indices_li != sel_hhbjet_idx - - # # hhbtag score of the highest scored hhbjet *matching* the trigger object - # first_hhbjet_idx = sel_score_indices[~unselected_hhbjet_idx] - - # # select the highest scored hhbjet of the remaining *matched and unmatched* hhbjets - # remaining_hhbjets_score_indices = sel_score_indices[unselected_hhbjet_idx] - # second_hhbjet_idx = ak.singletons((ak.firsts(remaining_hhbjets_score_indices))) - - # # concatenate both selected hhbjet indices; 1st index is *matched*; 2nd can be *matched* or *unmatched* - # sel_hhbjets_score_indices = ak.concatenate([first_hhbjet_idx, second_hhbjet_idx], axis=1) - # # sort the selected score indices (now the *matched* hhbjet can be in either position) - # sorted_sel_hhbjets_score_indices = ak.sort(sel_hhbjets_score_indices, axis=1) - - # # when only 1 hhbjet was found fill the second index with None - # sel_hhbjets_score_indices = ak.pad_none(sel_hhbjets_score_indices, 2, axis=1) - # sorted_sel_hhbjets_score_indices = ak.pad_none(sorted_sel_hhbjets_score_indices, 2, axis=1) - - # # all event indices - # event_idx = ak.local_index(score_indices, axis=0) - # # indices of events passing the trigger mask - # sel_event_idx = event_idx[trigger_mask] - - # # store the selected hhbjet score indices in the corresponding event indices - # temp_mask = (event_idx[:, None] == sel_event_idx) - # pos_mask = ak.any(temp_mask, axis=1) - # match_index = ak.argmax(temp_mask, axis=1) - - # # initializing a None array - # none = ak.Array([[None, None]]) - # none_expanded = ak.broadcast_arrays(none, event_idx)[0] - - # # save selected hhbjet scores for the selected events and save [None, None] to the remaining events - # padded_hhbjet_indices = ak.where(pos_mask, sel_hhbjets_score_indices[match_index], none_expanded) - # hhbjet_mask = ((li == padded_hhbjet_indices[..., [0]]) | (li == padded_hhbjet_indices[..., [1]])) - # # -------------------------------------------------------------------------------------------- - - # # get indices for actual book keeping only for events with both lepton candidates and where at - # # least two jets pass the default mask (bjet candidates) - # valid_score_mask = ( - # default_mask & - # (ak.sum(default_mask, axis=1) >= 2) & - # (ak.num(lepton_results.x.lepton_pair, axis=1) == 2) - # ) - # hhbjet_indices = score_indices[valid_score_mask[score_indices]][..., :2] - - # - # compressed version, to be checked - # - # get the hhbtag values per jet per event hhbtag_scores = self[hhbtag](events, default_mask, lepton_results.x.lepton_pair, **kwargs) diff --git a/modules/columnflow b/modules/columnflow index 701b8b36..52207354 160000 --- a/modules/columnflow +++ b/modules/columnflow @@ -1 +1 @@ -Subproject commit 701b8b36b8b962c16c643397a7388a399dbebe13 +Subproject commit 522073541c68d16938daa1bd1b137664dfc6d332 From 9adc9368d7ca02fe3ed84d7f3b3cbbb326b0d2ac Mon Sep 17 00:00:00 2001 From: "Marcel R." Date: Thu, 9 Jan 2025 17:02:34 +0100 Subject: [PATCH 08/20] Fix name in sync. --- hbt/tasks/sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbt/tasks/sync.py b/hbt/tasks/sync.py index 26902842..dfe1b6c4 100644 --- a/hbt/tasks/sync.py +++ b/hbt/tasks/sync.py @@ -289,7 +289,7 @@ def select_leptons(events: ak.Array, common_fields: dict[str, int | float]) -> a "run": events.run, "lumi": events.luminosityBlock, # high-level events variables - "channel": events.channel_id, + "channel_id": events.channel_id, "os": events.leptons_os * 1, "iso": events.tau2_isolated * 1, "deterministic_seed": to_str(events.deterministic_seed), From a5913c9869919c1bb0b9030799685ffd2898872e Mon Sep 17 00:00:00 2001 From: "Marcel R." Date: Thu, 9 Jan 2025 17:57:38 +0100 Subject: [PATCH 09/20] Make deeptau cuts vs e and mu channel-dependent. --- hbt/selection/lepton.py | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/hbt/selection/lepton.py b/hbt/selection/lepton.py index 899db374..485e35c7 100644 --- a/hbt/selection/lepton.py +++ b/hbt/selection/lepton.py @@ -316,9 +316,8 @@ def tau_selection( (events.Tau.pt > min_pt) & (abs(events.Tau.dz) < 0.2) & reduce(or_, [events.Tau.decayMode == mode for mode in (0, 1, 10, 11)]) & - (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vloose) & - (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.tight) & (events.Tau[get_tau_tagger("jet")] >= wp_config.tau_vs_jet.vvvloose) + # vs e and mu cuts are channel dependent and thus applied in the overall lepton selection ) # remove taus with too close spatial separation to previously selected leptons @@ -440,6 +439,9 @@ def lepton_selection( """ Combined lepton selection. """ + wp_config = self.config_inst.x.tau_id_working_points + get_tau_tagger = lambda tag: f"id{self.config_inst.x.tau_tagger}VS{tag}" + # get channels from the config ch_etau = self.config_inst.get_channel("etau") ch_mutau = self.config_inst.get_channel("mutau") @@ -500,12 +502,19 @@ def lepton_selection( self.dataset_inst.is_mc or self.dataset_inst.has_tag("etau") ): + # channel dependent deeptau cuts vs e and mu + ch_tau_mask = ( + tau_mask & + (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vloose) & + (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.tight) + ) + # fold trigger matching into the selection trig_electron_mask = ( electron_mask & self[electron_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs) ) - trig_tau_mask = tau_mask + trig_tau_mask = ch_tau_mask if trigger.has_tag("cross_e_tau"): trig_tau_mask = ( trig_tau_mask & @@ -514,7 +523,7 @@ def lepton_selection( # check if the most isolated tau among the selected ones is matched first_tau_matched = ak.fill_none( - ak.firsts(trig_tau_mask[tau_sorting_indices[tau_mask[tau_sorting_indices]]], axis=1), + ak.firsts(trig_tau_mask[tau_sorting_indices[ch_tau_mask[tau_sorting_indices]]], axis=1), False, ) @@ -552,12 +561,19 @@ def lepton_selection( self.dataset_inst.is_mc or self.dataset_inst.has_tag("mutau") ): + # channel dependent deeptau cuts vs e and mu + ch_tau_mask = ( + tau_mask & + (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vvloose) & + (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.tight) + ) + # fold trigger matching into the selection trig_muon_mask = ( muon_mask & self[muon_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs) ) - trig_tau_mask = tau_mask + trig_tau_mask = ch_tau_mask if trigger.has_tag("cross_e_tau"): trig_tau_mask = ( trig_tau_mask & @@ -566,7 +582,7 @@ def lepton_selection( # check if the most isolated tau among the selected ones is matched first_tau_matched = ak.fill_none( - ak.firsts(trig_tau_mask[tau_sorting_indices[tau_mask[tau_sorting_indices]]], axis=1), + ak.firsts(trig_tau_mask[tau_sorting_indices[ch_tau_mask[tau_sorting_indices]]], axis=1), False, ) @@ -604,16 +620,23 @@ def lepton_selection( trigger.has_tag({"cross_tau_tau", "cross_tau_tau_vbf", "cross_tau_tau_jet"}) and (self.dataset_inst.is_mc or self.dataset_inst.has_tag("tautau")) ): + # channel dependent deeptau cuts vs e and mu + ch_tau_mask = ( + tau_mask & + (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vvloose) & + (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.vloose) + ) + # fold trigger matching into the selection trig_tau_mask = ( - tau_mask & + ch_tau_mask & self[tau_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs) ) # check if the two leading (most isolated) taus are matched leading_taus_matched = ak.fill_none( - ak.firsts(trig_tau_mask[tau_sorting_indices[tau_mask[tau_sorting_indices]]], axis=1) & - ak.firsts(trig_tau_mask[tau_sorting_indices[tau_mask[tau_sorting_indices]]][:, 1:], axis=1), + ak.firsts(trig_tau_mask[tau_sorting_indices[ch_tau_mask[tau_sorting_indices]]], axis=1) & + ak.firsts(trig_tau_mask[tau_sorting_indices[ch_tau_mask[tau_sorting_indices]]][:, 1:], axis=1), False, ) From a7fb9bcdde915b57cc71cf9047a10ec5beb34064 Mon Sep 17 00:00:00 2001 From: "Marcel R." Date: Thu, 9 Jan 2025 18:06:48 +0100 Subject: [PATCH 10/20] Move jet_veto_map selection from jet to overall selection. --- hbt/selection/default.py | 14 ++++++++++---- hbt/selection/jet.py | 10 ++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/hbt/selection/default.py b/hbt/selection/default.py index 8cd35719..b0957136 100644 --- a/hbt/selection/default.py +++ b/hbt/selection/default.py @@ -14,6 +14,7 @@ from columnflow.selection.stats import increment_stats from columnflow.selection.cms.json_filter import json_filter from columnflow.selection.cms.met_filters import met_filters +from columnflow.selection.cms.jets import jet_veto_map from columnflow.production.processes import process_ids from columnflow.production.cms.mc_weight import mc_weight from columnflow.production.cms.pileup import pu_weight @@ -53,10 +54,10 @@ def get_met_filters(self: Selector) -> Iterable[str]: @selector( uses={ - json_filter, hbt_met_filters, trigger_selection, lepton_selection, jet_selection, mc_weight, - pu_weight, btag_weights_deepjet, IF_RUN_3(btag_weights_pnet), process_ids, cutflow_features, - increment_stats, attach_coffea_behavior, patch_ecalBadCalibFilter, - IF_DATASET_HAS_LHE_WEIGHTS(pdf_weights, murmuf_weights), + json_filter, hbt_met_filters, IF_RUN_3(jet_veto_map), trigger_selection, lepton_selection, + jet_selection, mc_weight, pu_weight, btag_weights_deepjet, IF_RUN_3(btag_weights_pnet), + process_ids, cutflow_features, increment_stats, attach_coffea_behavior, + patch_ecalBadCalibFilter, IF_DATASET_HAS_LHE_WEIGHTS(pdf_weights, murmuf_weights), }, produces={ trigger_selection, lepton_selection, jet_selection, mc_weight, pu_weight, @@ -96,6 +97,11 @@ def default( ) results += met_filter_results + # jet veto map + if self.has_dep(jet_veto_map): + events, veto_result = self[jet_veto_map](events, **kwargs) + results += veto_result + # trigger selection events, trigger_results = self[trigger_selection](events, **kwargs) results += trigger_results diff --git a/hbt/selection/jet.py b/hbt/selection/jet.py index 9478ff4e..5d91c159 100644 --- a/hbt/selection/jet.py +++ b/hbt/selection/jet.py @@ -8,14 +8,13 @@ from functools import reduce from columnflow.selection import Selector, SelectionResult, selector -from columnflow.selection.cms.jets import jet_veto_map from columnflow.columnar_util import ( EMPTY_FLOAT, set_ak_column, sorted_indices_from_mask, mask_from_indices, flat_np_view, full_like, ) from columnflow.util import maybe_import, InsertableDict -from hbt.util import IF_RUN_2, IF_RUN_3 +from hbt.util import IF_RUN_2 from hbt.production.hhbtag import hhbtag from hbt.selection.lepton import trigger_object_matching @@ -25,7 +24,7 @@ @selector( uses={ - hhbtag, IF_RUN_3(jet_veto_map), + hhbtag, "trigger_ids", "TrigObj.{pt,eta,phi}", "Jet.{pt,eta,phi,mass,jetId}", IF_RUN_2("Jet.puId"), "FatJet.{pt,eta,phi,mass,msoftdrop,jetId,subJetIdx1,subJetIdx2}", @@ -314,11 +313,6 @@ def jet_selection( }, ) - # additional jet veto map, vetoing entire events - if self.has_dep(jet_veto_map): - events, veto_result = self[jet_veto_map](events, **kwargs) - result += veto_result - return events, result From 9f7254138b8d66cffa4dc355ea2216d2e874e09d Mon Sep 17 00:00:00 2001 From: "Marcel R." Date: Thu, 9 Jan 2025 18:07:35 +0100 Subject: [PATCH 11/20] Update cmsdb ref. --- modules/cmsdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/cmsdb b/modules/cmsdb index ccac9ae6..f2b84f7c 160000 --- a/modules/cmsdb +++ b/modules/cmsdb @@ -1 +1 @@ -Subproject commit ccac9ae6e3d7f761e3dcb8bc7109712d797c33f6 +Subproject commit f2b84f7cceaea02a3bc5270436369da6562ecd68 From b2a23e8079fdec8c1b85742aa7ec13306bfdc99c Mon Sep 17 00:00:00 2001 From: "Marcel R." Date: Fri, 10 Jan 2025 08:31:43 +0100 Subject: [PATCH 12/20] Clean variables and categories. --- hbt/config/categories.py | 16 +++++++++++++--- hbt/config/variables.py | 12 +++++------- hbt/production/weights.py | 6 ++++++ 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/hbt/config/categories.py b/hbt/config/categories.py index b86371f0..7f69a7bf 100644 --- a/hbt/config/categories.py +++ b/hbt/config/categories.py @@ -71,7 +71,12 @@ def kwargs_fn(categories, add_qcd_group=True): "sign": [config.get_category("os"), config.get_category("ss")], "tau2": [config.get_category("iso"), config.get_category("noniso")], } - create_category_combinations(config, main_categories, name_fn, kwargs_fn) + create_category_combinations( + config=config, + categories=main_categories, + name_fn=name_fn, + kwargs_fn=functools.partial(kwargs_fn, add_qcd_group=True), + ) # control categories control_categories = { @@ -82,6 +87,11 @@ def kwargs_fn(categories, add_qcd_group=True): # kinematic regions in the middle (to be extended) "kin": [config.get_category("incl"), config.get_category("2j")], # relative sign last - "sign": [config.get_category("os"), config.get_category("ss")], + "sign": [config.get_category("os")], } - create_category_combinations(config, control_categories, name_fn, functools.partial(kwargs_fn, add_qcd_group=False)) + create_category_combinations( + config=config, + categories=control_categories, + name_fn=name_fn, + kwargs_fn=functools.partial(kwargs_fn, add_qcd_group=False), + ) diff --git a/hbt/config/variables.py b/hbt/config/variables.py index 58da3b01..d3ec4994 100644 --- a/hbt/config/variables.py +++ b/hbt/config/variables.py @@ -87,26 +87,24 @@ def add_variables(config: od.Config) -> None: ) config.add_variable( name="met_phi", - expression="MET.phi", + expression="PuppiMET.phi", null_value=EMPTY_FLOAT, binning=(33, -3.3, 3.3), x_title=r"MET $\phi$", ) - config.add_variable( - name="e_pt", - expression="Electron.pt", + name="e1_pt", + expression="Electron.pt[:, 0]", null_value=EMPTY_FLOAT, binning=(40, 0, 400), - x_title=r"Electron p$_{T}$", + x_title=r"Leading electron p$_{T}$", ) - config.add_variable( name="mu1_pt", expression="Muon.pt[:,0]", null_value=EMPTY_FLOAT, binning=(40, 0, 400), - x_title=r"Muon 1 p$_{T}$", + x_title=r"Leading muon p$_{T}$", ) # weights diff --git a/hbt/production/weights.py b/hbt/production/weights.py index f4777e74..716cbc96 100644 --- a/hbt/production/weights.py +++ b/hbt/production/weights.py @@ -6,6 +6,7 @@ from columnflow.production import Producer, producer from columnflow.production.cms.pileup import pu_weight +from columnflow.production.cms.pdf import pdf_weights from columnflow.util import maybe_import, safe_div, InsertableDict from columnflow.columnar_util import set_ak_column @@ -153,6 +154,11 @@ def normalized_pdf_weight_setup( } +# variation of the pdf weights producer that does not store up and down shifted weights +# but that stores all available pdf weights for the full treatment based on histograms +all_pdf_weights = pdf_weights.derive("all_pdf_weights", cls_dict={"store_all_weights": True}) + + @producer( uses={"murmuf_weight{,_up,_down}"}, produces={"normalized_murmuf_weight{,_up,_down}"}, From 3895d976662dad85441c39411e319875a1fd7fdc Mon Sep 17 00:00:00 2001 From: "Marcel R." Date: Mon, 13 Jan 2025 09:28:16 +0100 Subject: [PATCH 13/20] Simplify trigger bits, adapt to cf. --- hbt/config/configs_hbt.py | 42 +++++++++---- hbt/config/triggers.py | 120 +++++++++++++++++--------------------- hbt/config/util.py | 44 +++++++++++++- hbt/production/btag.py | 13 +++-- hbt/production/weights.py | 24 +++----- hbt/util.py | 2 +- law.cfg | 4 +- modules/cmsdb | 2 +- modules/columnflow | 2 +- 9 files changed, 148 insertions(+), 105 deletions(-) diff --git a/hbt/config/configs_hbt.py b/hbt/config/configs_hbt.py index 06581b14..73b70e73 100644 --- a/hbt/config/configs_hbt.py +++ b/hbt/config/configs_hbt.py @@ -7,7 +7,6 @@ from __future__ import annotations import os -import re import itertools import functools @@ -167,6 +166,8 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str] proc.add_tag({"ttbar", "tt"}) if process_name.startswith("dy_"): proc.add_tag("dy") + if process_name.startswith("w_lnu_"): + proc.add_tag("w_lnu") # add the process cfg.add_process(proc) @@ -293,10 +294,10 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str] "ww_pythia", # vvv - "zzz_amcatnlo", - "wzz_amcatnlo", - "wwz_4f_amcatnlo", "www_4f_amcatnlo", + "wwz_4f_amcatnlo", + "wzz_amcatnlo", + "zzz_amcatnlo", # single H "h_ggf_htt_powheg", @@ -319,16 +320,16 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str] # data *if_era(year=2022, tag="preEE", values=[ - f"data_{stream}_{period}" for stream in ["mu", "e", "tau", "met"] for period in "cd" + f"data_{stream}_{period}" for stream in ["mu", "e", "tau"] for period in "cd" ]), *if_era(year=2022, tag="postEE", values=[ - f"data_{stream}_{period}" for stream in ["mu", "e", "tau", "met"] for period in "efg" + f"data_{stream}_{period}" for stream in ["mu", "e", "tau"] for period in "efg" ]), *if_era(year=2023, tag="preBPix", values=[ - f"data_{stream}_c{v}" for stream in ["mu", "e", "tau", "met"] for v in "1234" + f"data_{stream}_c{v}" for stream in ["mu", "e", "tau"] for v in "1234" ]), *if_era(year=2023, tag="postBPix", values=[ - f"data_{stream}_d{v}" for stream in ["mu", "e", "tau", "met"] for v in "12" + f"data_{stream}_d{v}" for stream in ["mu", "e", "tau"] for v in "12" ]), ] for dataset_name in dataset_names: @@ -352,8 +353,19 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str] dataset.add_tag({"has_top", "single_top", "st"}) if dataset.name.startswith("dy_"): dataset.add_tag("dy") - if re.match(r"^(ww|wz|zz)_.*_pythia$", dataset.name): + if dataset.name.startswith("w_lnu_"): + dataset.add_tag("w_lnu") + # datasets that are known to have no lhe info at all + if law.util.multi_match(dataset.name, [ + r"^(ww|wz|zz)_.*pythia$", + r"^tt(w|z)_.*amcatnlo$", + r"^hh_ggf_hbb_htt_kl[^1]+_kt1_powheg$", # only SM model has LHE weighs, TODO: in all configs? + ]): dataset.add_tag("no_lhe_weights") + # datasets that are allowed to contain some events with missing lhe infos + # (known to happen for amcatnlo) + if dataset.name.endswith("_amcatnlo"): + dataset.add_tag("partial_lhe_weights") if dataset_name.startswith("hh_"): dataset.add_tag("signal") dataset.add_tag("nonresonant_signal") @@ -409,6 +421,7 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str] cfg.x.process_groups = { "signals": [ "hh_ggf_hbb_htt_kl1_kt1", + "hh_vbf_hbb_htt_kv1_k2v1_kl1", ], "signals_ggf": [ "hh_ggf_hbb_htt_kl0_kt1", @@ -464,7 +477,11 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str] # dataset groups for conveniently looping over certain datasets # (used in wrapper_factory and during plotting) - cfg.x.dataset_groups = {} + cfg.x.dataset_groups = { + "dy": [dataset.name for dataset in cfg.datasets if dataset.has_tag("dy")], + "w_lnu": [dataset.name for dataset in cfg.datasets if dataset.has_tag("w_lnu")], + # TODO: resonant (mostly for excluding them) + } # category groups for conveniently looping over certain categories # (used during plotting) @@ -485,6 +502,7 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str] } cfg.x.default_selector_steps = "default" + # plotting overwrites cfg.x.custom_style_config_groups = { "small_legend": { "legend_cfg": {"ncols": 2, "fontsize": 16, "columnspacing": 0.6}, @@ -1280,10 +1298,10 @@ def get_dataset_lfns( lfn_base = dir_cls(path, fs=fs) # loop though files and interpret paths as lfns - return [ + return sorted( "/" + lfn_base.child(basename, type="f").path.lstrip("/") for basename in lfn_base.listdir(pattern="*.root") - ] + ) # define the lfn retrieval function cfg.x.get_dataset_lfns = get_dataset_lfns diff --git a/hbt/config/triggers.py b/hbt/config/triggers.py index 0fc77340..6f882d3b 100644 --- a/hbt/config/triggers.py +++ b/hbt/config/triggers.py @@ -66,27 +66,12 @@ from __future__ import annotations import functools -from dataclasses import dataclass import order as od from columnflow.util import DotDict -from columnflow.types import ClassVar -from hbt.config.util import Trigger, TriggerLeg - - -@dataclass -class Bits: - v12: int | None = None - v14: int | None = None - - supported_versions: ClassVar[set[int]] = {12, 14} - - def get(self, nano_version: int) -> int: - if nano_version not in self.supported_versions: - raise ValueError(f"nano_version {nano_version} not supported") - return getattr(self, f"v{nano_version}") +from hbt.config.util import Trigger, TriggerLeg, TriggerBits as Bits # use the CCLub names for the trigger bits and improve them when necessary @@ -99,10 +84,10 @@ def get(self, nano_version: int) -> int: # last update in https://github.com/cms-sw/cmssw/blob/CMSSW_14_0_X/PhysicsTools/NanoAOD/python/triggerObjects_cff.py "e": { - "CaloIdLTrackIdLIsoVL": Bits(v12=1, v14=1), - "WPTightTrackIso": Bits(v12=2, v14=2), - "WPLooseTrackIso": Bits(v12=4, v14=4), - "OverlapFilterPFTau": Bits(v12=8, v14=8), + "CaloIdLTrackIdLIsoVL": Bits(v12=1, v14="v12"), + "WPTightTrackIso": Bits(v12=2, v14="v12"), + "WPLooseTrackIso": Bits(v12=4, v14="v12"), + "OverlapFilterPFTau": Bits(v12=8, v14="v12"), "DiElectron": Bits(v12=16), "DiElectronLeg1": Bits(v14=16), "DiElectronLeg2": Bits(v14=32), @@ -120,19 +105,19 @@ def get(self, nano_version: int) -> int: "EleTauPNet": Bits(v14=131072), }, "mu": { - "TrkIsoVVL": Bits(v12=1, v14=1), - "Iso": Bits(v12=2, v14=2), - "OverlapFilterPFTau": Bits(v12=4, v14=4), - "SingleMuon": Bits(v12=8, v14=8), - "DiMuon": Bits(v12=16, v14=16), - "MuEle": Bits(v12=32, v14=32), - "MuTau": Bits(v12=64, v14=64), - "TripleMuon": Bits(v12=128, v14=128), - "DiMuonSingleEle": Bits(v12=256, v14=256), - "SingleMuonDiEle": Bits(v12=512, v14=512), - "Mu50": Bits(v12=1024, v14=1024), - "Mu100": Bits(v12=2048, v14=2048), - "SingleMuonSinglePhoton": Bits(v12=4096, v14=4096), + "TrkIsoVVL": Bits(v12=1, v14="v12"), + "Iso": Bits(v12=2, v14="v12"), + "OverlapFilterPFTau": Bits(v12=4, v14="v12"), + "SingleMuon": Bits(v12=8, v14="v12"), + "DiMuon": Bits(v12=16, v14="v12"), + "MuEle": Bits(v12=32, v14="v12"), + "MuTau": Bits(v12=64, v14="v12"), + "TripleMuon": Bits(v12=128, v14="v12"), + "DiMuonSingleEle": Bits(v12=256, v14="v12"), + "SingleMuonDiEle": Bits(v12=512, v14="v12"), + "Mu50": Bits(v12=1024, v14="v12"), + "Mu100": Bits(v12=2048, v14="v12"), + "SingleMuonSinglePhoton": Bits(v12=4096, v14="v12"), "MuTauPNet": Bits(v14=8192), }, "tau": { # general comment: lot of v14 paths contain PNet paths, not available in v12, e.g. OverlapFilterIsoEle @@ -142,7 +127,7 @@ def get(self, nano_version: int) -> int: "Medium": Bits(v14=2), "TightChargedIso": Bits(v12=4), "Tight": Bits(v14=4), - "DeepTau": Bits(v12=8, v14=8), + "DeepTau": Bits(v12=8, v14="v12"), "PNet": Bits(v14=16), "TightOOSCPhotons": Bits(v12=16), "HPS": Bits(v12=32, v14=268435456), @@ -161,7 +146,7 @@ def get(self, nano_version: int) -> int: "VBFpDoublePFTau_run3": Bits(v12=4096), # warning: this trigger bit expects "ChargedIso" in the filter name, this does not correspond to our actual VBF filter name # noqa "DiTau": Bits(v14=2048), "DiPFJetAndDiTau": Bits(v12=8192), - "DiTauAndPFJet": Bits(v12=16384, v14=16384), + "DiTauAndPFJet": Bits(v12=16384, v14="v12"), "DisplacedTau": Bits(v12=32768), "ETauDisplaced": Bits(v14=32768), "MuTauDisplaced": Bits(v14=65536), @@ -182,37 +167,37 @@ def get(self, nano_version: int) -> int: "VBFSingleTau": Bits(v14=1073741824), }, "jet": { - "4PixelOnlyPFCentralJetTightIDPt20": Bits(v12=1, v14=1), - "3PixelOnlyPFCentralJetTightIDPt30": Bits(v12=2, v14=2), - "PFJetFilterTwoC30": Bits(v12=4, v14=4), - "4PFCentralJetTightIDPt30": Bits(v12=8, v14=8), - "4PFCentralJetTightIDPt35": Bits(v12=16, v14=16), - "QuadCentralJet30": Bits(v12=32, v14=32), - "2PixelOnlyPFCentralJetTightIDPt40": Bits(v12=64, v14=64), - "L1sTripleJetVBF_orHTT_orDoubleJet_orSingleJet": Bits(v12=128, v14=128), - "3PFCentralJetTightIDPt40": Bits(v12=256, v14=256), - "3PFCentralJetTightIDPt45": Bits(v12=512, v14=512), - "L1sQuadJetsHT": Bits(v12=1024, v14=1024), - "BTagCaloDeepCSVp17Double": Bits(v12=2048, v14=2048), - "PFCentralJetLooseIDQuad30": Bits(v12=4096, v14=4096), - "1PFCentralJetLooseID75": Bits(v12=8192, v14=8192), - "2PFCentralJetLooseID60": Bits(v12=16384, v14=16384), - "3PFCentralJetLooseID45": Bits(v12=32768, v14=32768), - "4PFCentralJetLooseID40": Bits(v12=65536, v14=65536), - "DoubleTau+Jet": Bits(v12=131072, v14=131072), # v14 also contains PNet paths - "VBFcrossCleanedDeepTauPFTau": Bits(v12=262144, v14=262144), # more general VBFDiTauJets in v14 TODO: change name? # noqa - "VBFcrossCleanedUsingDijetCorr": Bits(v12=524288, v14=524288), # more general VBFSingleTauJets in v14 TODO: change name? # noqa - "MonitoringMuon+Tau+Jet": Bits(v12=1048576, v14=1048576), - "2PFCentralJetTightIDPt50": Bits(v12=2097152, v14=2097152), - "1PixelOnlyPFCentralJetTightIDPt60": Bits(v12=4194304, v14=4194304), - "1PFCentralJetTightIDPt70": Bits(v12=8388608, v14=8388608), - "BTagPFDeepJet1p5Single": Bits(v12=16777216, v14=16777216), - "BTagPFDeepJet4p5Triple": Bits(v12=33554432, v14=33554432), - "2BTagSumOR2BTagMeanPaths": Bits(v12=67108864, v14=67108864), - "2/1PixelOnlyPFCentralJetTightIDPt20/50": Bits(v12=134217728, v14=134217728), - "2PFCentralJetTightIDPt30": Bits(v12=268435456, v14=268435456), - "1PFCentralJetTightIDPt60": Bits(v12=536870912, v14=536870912), - "PF2CentralJetPt30PNet2BTagMean0p50": Bits(v12=1073741824, v14=1073741824), + "4PixelOnlyPFCentralJetTightIDPt20": Bits(v12=1, v14="v12"), + "3PixelOnlyPFCentralJetTightIDPt30": Bits(v12=2, v14="v12"), + "PFJetFilterTwoC30": Bits(v12=4, v14="v12"), + "4PFCentralJetTightIDPt30": Bits(v12=8, v14="v12"), + "4PFCentralJetTightIDPt35": Bits(v12=16, v14="v12"), + "QuadCentralJet30": Bits(v12=32, v14="v12"), + "2PixelOnlyPFCentralJetTightIDPt40": Bits(v12=64, v14="v12"), + "L1sTripleJetVBF_orHTT_orDoubleJet_orSingleJet": Bits(v12=128, v14="v12"), + "3PFCentralJetTightIDPt40": Bits(v12=256, v14="v12"), + "3PFCentralJetTightIDPt45": Bits(v12=512, v14="v12"), + "L1sQuadJetsHT": Bits(v12=1024, v14="v12"), + "BTagCaloDeepCSVp17Double": Bits(v12=2048, v14="v12"), + "PFCentralJetLooseIDQuad30": Bits(v12=4096, v14="v12"), + "1PFCentralJetLooseID75": Bits(v12=8192, v14="v12"), + "2PFCentralJetLooseID60": Bits(v12=16384, v14="v12"), + "3PFCentralJetLooseID45": Bits(v12=32768, v14="v12"), + "4PFCentralJetLooseID40": Bits(v12=65536, v14="v12"), + "DoubleTau+Jet": Bits(v12=131072, v14="v12"), # v14 also contains PNet paths + "VBFcrossCleanedDeepTauPFTau": Bits(v12=262144, v14="v12"), # more general VBFDiTauJets in v14 TODO: change name? # noqa + "VBFcrossCleanedUsingDijetCorr": Bits(v12=524288, v14="v12"), # more general VBFSingleTauJets in v14 TODO: change name? # noqa + "MonitoringMuon+Tau+Jet": Bits(v12=1048576, v14="v12"), + "2PFCentralJetTightIDPt50": Bits(v12=2097152, v14="v12"), + "1PixelOnlyPFCentralJetTightIDPt60": Bits(v12=4194304, v14="v12"), + "1PFCentralJetTightIDPt70": Bits(v12=8388608, v14="v12"), + "BTagPFDeepJet1p5Single": Bits(v12=16777216, v14="v12"), + "BTagPFDeepJet4p5Triple": Bits(v12=33554432, v14="v12"), + "2BTagSumOR2BTagMeanPaths": Bits(v12=67108864, v14="v12"), + "2/1PixelOnlyPFCentralJetTightIDPt20/50": Bits(v12=134217728, v14="v12"), + "2PFCentralJetTightIDPt30": Bits(v12=268435456, v14="v12"), + "1PFCentralJetTightIDPt60": Bits(v12=536870912, v14="v12"), + "PF2CentralJetPt30PNet2BTagMean0p50": Bits(v12=1073741824, v14="v12"), }, }) @@ -1308,7 +1293,8 @@ def add_triggers_2023(config: od.Config) -> None: # WPTightTrackIso trigger_bits=get_bit_sum_v("e", [ "WPTightTrackIso", - "Tight" if nano_version == 14 else None, + # TODO: Tight not existing, needs fixing + # "Tight" if nano_version == 14 else None, ]), ), ), diff --git a/hbt/config/util.py b/hbt/config/util.py index 9daad8ad..26be7ef9 100644 --- a/hbt/config/util.py +++ b/hbt/config/util.py @@ -6,10 +6,13 @@ from __future__ import annotations +import re +from dataclasses import dataclass + from order import UniqueObject, TagMixin from order.util import typed -from columnflow.types import Callable, Any, Sequence, Hashable +from columnflow.types import Callable, Any, Sequence, Hashable, ClassVar class TriggerLeg(object): @@ -224,3 +227,42 @@ def n_legs(self): def hlt_field(self): # remove the first four "HLT_" characters return self.name[4:] + + +@dataclass +class TriggerBits: + """ + Lightweight container wrapping trigger bits for different versions of NanoAOD. + """ + + v12: int | None = None + v14: int | None = None + + supported_versions: ClassVar[set[int]] = {12, 14} + + def __post_init__(self) -> None: + # versions might be strings such as "v12" that act as references + cre = re.compile(r"^v(\d+)$") + for v in self.supported_versions: + attr = f"v{v}" + # only check strings + if not isinstance((val := getattr(self, attr)), str): + continue + # check format + if not (m := cre.match(val)): + raise ValueError(f"invalid reference {attr} -> {val}") + # check if not circular + ref_v = int(m.group(1)) + if v == ref_v: + raise ValueError(f"reference to same version {attr} -> {val}") + # check type of referred value + ref_val = getattr(self, f"v{ref_v}") + if not isinstance(ref_val, int) and v is not None: + raise ValueError(f"wrong reference value in {attr} -> {val}") + # set it + setattr(self, attr, ref_val) + + def get(self, nano_version: int) -> int: + if nano_version not in self.supported_versions: + raise ValueError(f"nano_version {nano_version} not supported") + return getattr(self, f"v{nano_version}") diff --git a/hbt/production/btag.py b/hbt/production/btag.py index 907da7ec..6a594553 100644 --- a/hbt/production/btag.py +++ b/hbt/production/btag.py @@ -93,24 +93,25 @@ def _normalized_btag_weights_init(self: Producer) -> None: @_normalized_btag_weights.requires def _normalized_btag_weights_requires(self: Producer, reqs: dict) -> None: from columnflow.tasks.selection import MergeSelectionStats - reqs["selection_stats"] = MergeSelectionStats.req( + reqs["selection_stats"] = MergeSelectionStats.req_different_branching( self.task, - tree_index=0, - branch=-1, - _exclude=MergeSelectionStats.exclude_params_forest_merge, + branch=-1 if self.task.is_workflow() else 0, ) @_normalized_btag_weights.setup def _normalized_btag_weights_setup(self: Producer, reqs: dict, inputs: dict, reader_targets: InsertableDict) -> None: + # BUG in prod3: some stats fields were missing so skip them for now + return + # load the selection stats selection_stats = self.task.cached_value( key="selection_stats", - func=lambda: inputs["selection_stats"]["collection"][0]["stats"].load(formatter="json"), + func=lambda: inputs["selection_stats"]["stats"].load(formatter="json"), ) # get the unique process ids in that dataset - key = f"sum_mc_weight_selected_nob_{self.tagger_name}_per_process_and_njet" + key = f"sum_btag_weight_{self.tagger_name}_selected_nob_{self.tagger_name}_per_process_and_njet" self.unique_process_ids = list(map(int, selection_stats[key].keys())) # get the maximum numbers of jets diff --git a/hbt/production/weights.py b/hbt/production/weights.py index 716cbc96..e63a36c2 100644 --- a/hbt/production/weights.py +++ b/hbt/production/weights.py @@ -60,11 +60,9 @@ def normalized_pu_weight_init(self: Producer) -> None: @normalized_pu_weight.requires def normalized_pu_weight_requires(self: Producer, reqs: dict) -> None: from columnflow.tasks.selection import MergeSelectionStats - reqs["selection_stats"] = MergeSelectionStats.req( + reqs["selection_stats"] = MergeSelectionStats.req_different_branching( self.task, - tree_index=0, - branch=-1, - _exclude=MergeSelectionStats.exclude_params_forest_merge, + branch=-1 if self.task.is_workflow() else 0, ) @@ -78,7 +76,7 @@ def normalized_pu_weight_setup( # load the selection stats selection_stats = self.task.cached_value( key="selection_stats", - func=lambda: inputs["selection_stats"]["collection"][0]["stats"].load(formatter="json"), + func=lambda: inputs["selection_stats"]["stats"].load(formatter="json"), ) # get the unique process ids in that dataset @@ -126,11 +124,9 @@ def normalized_pdf_weight(self: Producer, events: ak.Array, **kwargs) -> ak.Arra @normalized_pdf_weight.requires def normalized_pdf_weight_requires(self: Producer, reqs: dict) -> None: from columnflow.tasks.selection import MergeSelectionStats - reqs["selection_stats"] = MergeSelectionStats.req( + reqs["selection_stats"] = MergeSelectionStats.req_different_branching( self.task, - tree_index=0, - branch=-1, - _exclude=MergeSelectionStats.exclude_params_forest_merge, + branch=-1 if self.task.is_workflow() else 0, ) @@ -144,7 +140,7 @@ def normalized_pdf_weight_setup( # load the selection stats selection_stats = self.task.cached_value( key="selection_stats", - func=lambda: inputs["selection_stats"]["collection"][0]["stats"].load(formatter="json"), + func=lambda: inputs["selection_stats"]["stats"].load(formatter="json"), ) # save average weights @@ -180,11 +176,9 @@ def normalized_murmuf_weight(self: Producer, events: ak.Array, **kwargs) -> ak.A @normalized_murmuf_weight.requires def normalized_murmuf_weight_requires(self: Producer, reqs: dict) -> None: from columnflow.tasks.selection import MergeSelectionStats - reqs["selection_stats"] = MergeSelectionStats.req( + reqs["selection_stats"] = MergeSelectionStats.req_different_branching( self.task, - tree_index=0, - branch=-1, - _exclude=MergeSelectionStats.exclude_params_forest_merge, + branch=-1 if self.task.is_workflow() else 0, ) @@ -198,7 +192,7 @@ def normalized_murmuf_weight_setup( # load the selection stats selection_stats = self.task.cached_value( key="selection_stats", - func=lambda: inputs["selection_stats"]["collection"][0]["stats"].load(formatter="json"), + func=lambda: inputs["selection_stats"]["stats"].load(formatter="json"), ) # save average weights diff --git a/hbt/util.py b/hbt/util.py index 0c3055de..0e4d18a5 100644 --- a/hbt/util.py +++ b/hbt/util.py @@ -58,7 +58,7 @@ def IF_DATASET_HAS_LHE_WEIGHTS( if getattr(func, "dataset_inst", None) is None: return self.get() - return None if func.dataset_inst.has_tag("no_lhe_weights") else self.get() + return self.get() if not func.dataset_inst.has_tag("no_lhe_weights") else None @deferred_column diff --git a/law.cfg b/law.cfg index 312a3107..fc6509b6 100644 --- a/law.cfg +++ b/law.cfg @@ -30,6 +30,7 @@ mattermost_user: HH → bb𝛕𝛕 job_file_dir: $CF_JOB_BASE job_file_dir_cleanup: False job_file_dir_mkdtemp: sub_{{task_id}}_XXX +crab_sandbox_name: CMSSW_14_2_1::arch=el9_amd64_gcc12 # @@ -65,6 +66,7 @@ skip_ensure_proxy: False # (resources like memory and disk can also be set in [resources] with more granularity) htcondor_flavor: $CF_HTCONDOR_FLAVOR htcondor_share_software: True +# 2GB -> short "lite" queue, otherwise long "bide" queue on the naf htcondor_memory: 2GB htcondor_disk: 5GB slurm_flavor: $CF_SLURM_FLAVOR @@ -192,7 +194,7 @@ cf.CreateSyncFile: wlcg [resources] # default selection with hhbtag requires more memory -cf.{Select,Reduce}Events__sel_default: htcondor_memory=5GB +cf.{Select,Reduce}Events__sel_default: htcondor_memory=5GB, crab_memory=5000MB # diff --git a/modules/cmsdb b/modules/cmsdb index f2b84f7c..fcdb5d9c 160000 --- a/modules/cmsdb +++ b/modules/cmsdb @@ -1 +1 @@ -Subproject commit f2b84f7cceaea02a3bc5270436369da6562ecd68 +Subproject commit fcdb5d9ca251b178a29ebdecfb1eb66802e9b1ec diff --git a/modules/columnflow b/modules/columnflow index 52207354..dd0655af 160000 --- a/modules/columnflow +++ b/modules/columnflow @@ -1 +1 @@ -Subproject commit 522073541c68d16938daa1bd1b137664dfc6d332 +Subproject commit dd0655afda8756c99bc434a901929f0a028dacf9 From f31f7690661d140a07bbd3caf6f028355e0d08d0 Mon Sep 17 00:00:00 2001 From: "Marcel R." Date: Mon, 13 Jan 2025 09:30:42 +0100 Subject: [PATCH 14/20] Port stitching updates from #55. --- hbt/config/configs_hbt.py | 16 ++- hbt/production/processes.py | 258 +++++++++++++++++++++++------------- hbt/selection/default.py | 51 ++++--- hbt/util.py | 11 ++ 4 files changed, 226 insertions(+), 110 deletions(-) diff --git a/hbt/config/configs_hbt.py b/hbt/config/configs_hbt.py index 73b70e73..9a7ab0c3 100644 --- a/hbt/config/configs_hbt.py +++ b/hbt/config/configs_hbt.py @@ -473,7 +473,21 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str] }, } # w+jets - # TODO: add + cfg.x.w_lnu_stitching = { + "incl": { + "inclusive_dataset": cfg.datasets.n.w_lnu_amcatnlo, + "leaf_processes": [ + # the following processes cover the full njet and pt phasespace + procs.n.w_lnu_0j, + *( + procs.get(f"w_lnu_{nj}j_pt{pt}") + for nj in [1, 2] + for pt in ["0to40", "40to100", "100to200", "200to400", "400to600", "600toinf"] + ), + procs.n.w_lnu_ge3j, + ], + }, + } # dataset groups for conveniently looping over certain datasets # (used in wrapper_factory and during plotting) diff --git a/hbt/production/processes.py b/hbt/production/processes.py index 17e39011..200bd646 100644 --- a/hbt/production/processes.py +++ b/hbt/production/processes.py @@ -4,15 +4,18 @@ Process ID producer relevant for the stitching of the DY samples. """ -import functools +from __future__ import annotations + +import abc import law +import order -from columnflow.production import Producer, producer +from columnflow.production import Producer from columnflow.util import maybe_import, InsertableDict -from columnflow.columnar_util import set_ak_column +from columnflow.columnar_util import set_ak_column, Route -from hbt.util import IF_DATASET_IS_DY +from hbt.util import IF_DATASET_IS_DY, IF_DATASET_IS_W_LNU np = maybe_import("numpy") ak = maybe_import("awkward") @@ -25,88 +28,160 @@ NJetsRange = tuple[int, int] PtRange = tuple[float, float] -set_ak_column_i64 = functools.partial(set_ak_column, value_type=np.int64) +class stitched_process_ids(Producer): + """General class to calculate process ids for stitched samples. -@producer( - uses={IF_DATASET_IS_DY("LHE.NpNLO", "LHE.Vpt")}, - produces={IF_DATASET_IS_DY("process_id")}, -) -def process_ids_dy(self: Producer, events: ak.Array, **kwargs) -> ak.Array: - """ - Assigns each dy event a single process id, based on the number of jets and the di-lepton pt of - the LHE record. This is used for the stitching of the DY samples. + Individual producers should derive from this class and set the following attributes: + + :param id_table: scipy lookup table mapping processes variables (using key_func) to process ids + :param key_func: function to generate keys for the lookup, receiving values of stitching columns + :param stitching_columns: list of observables to use for stitching + :param cross_check_translation_dict: dictionary to translate stitching columns to auxiliary + fields of process objects, used for cross checking the validity of obtained ranges + :param include_condition: condition for including stitching columns in used columns """ - # as always, we assume that each dataset has exactly one process associated to it - if len(self.dataset_inst.processes) != 1: - raise NotImplementedError( - f"dataset {self.dataset_inst.name} has {len(self.dataset_inst.processes)} processes " - "assigned, which is not yet implemented", - ) - process_inst = self.dataset_inst.processes.get_first() - - # get the number of nlo jets and the di-lepton pt - njets = events.LHE.NpNLO - pt = events.LHE.Vpt - - # raise a warning if a datasets was already created for a specific "bin" (leaf process), - # but actually does not fit - njets_range = process_inst.x("njets", None) - if njets_range is not None: - outliers = (njets < njets_range[0]) | (njets >= njets_range[1]) - if ak.any(outliers): - logger.warning( - f"dataset {self.dataset_inst.name} is meant to contain njet values in the range " - f"[{njets_range[0]}, {njets_range[0]}), but found {ak.sum(outliers)} events " - "outside this range", + + @abc.abstractproperty + def id_table(self) -> sp.sparse._lil.lil_matrix: + # must be overwritten by inheriting classes + ... + + @abc.abstractmethod + def key_func(self, *values: ak.Array) -> int: + # must be overwritten by inheriting classes + ... + + @abc.abstractproperty + def cross_check_translation_dict(self) -> dict[str, str]: + # must be overwritten by inheriting classes + ... + + def init_func(self, *args, **kwargs): + # if there is a include_condition set, apply it to both used and produced columns + cond = lambda args: {self.include_condition(*args)} if self.include_condition else {*args} + self.uses |= cond(self.stitching_columns or []) + self.produces |= cond(["process_id"]) + + def call_func(self, events: ak.Array, **kwargs) -> ak.Array: + """ + Assigns each event a single process id, based on the stitching values extracted per event. + This id can be used for the stitching of the respective datasets downstream. + """ + # ensure that each dataset has exactly one process associated to it + if len(self.dataset_inst.processes) != 1: + raise NotImplementedError( + f"dataset {self.dataset_inst.name} has {len(self.dataset_inst.processes)} processes " + "assigned, which is not yet implemented", ) - pt_range = process_inst.x("ptll", None) - if pt_range is not None: - outliers = (pt < pt_range[0]) | (pt >= pt_range[1]) - if ak.any(outliers): - logger.warning( - f"dataset {self.dataset_inst.name} is meant to contain ptll values in the range " - f"[{pt_range[0]}, {pt_range[1]}), but found {ak.sum(outliers)} events outside this " - "range", + process_inst = self.dataset_inst.processes.get_first() + + # get stitching observables + stitching_values = [Route(obs).apply(events) for obs in self.stitching_columns] + + # run the cross check function if defined + if callable(self.stitching_range_cross_check): + self.stitching_range_cross_check(process_inst, stitching_values) + + # lookup the id and check for invalid values + process_ids = np.squeeze(np.asarray(self.id_table[self.key_func(*stitching_values)].todense())) + invalid_mask = process_ids == 0 + if ak.any(invalid_mask): + raise ValueError( + f"found {sum(invalid_mask)} events that could not be assigned to a process", ) - # lookup the id and check for invalid values - process_ids = np.squeeze(np.asarray(self.id_table[self.key_func(njets, pt)].todense())) - invalid_mask = process_ids == 0 - if ak.any(invalid_mask): - raise ValueError( - f"found {sum(invalid_mask)} dy events that could not be assigned to a process", - ) - - # store them - events = set_ak_column_i64(events, "process_id", process_ids) - - return events - - -@process_ids_dy.setup -def process_ids_dy_setup( - self: Producer, - reqs: dict, - inputs: dict, - reader_targets: InsertableDict, -) -> None: - # define stitching ranges for the DY datasets covered by this producer's dy_inclusive_dataset - stitching_ranges: dict[NJetsRange, list[PtRange]] = {} - for proc in self.dy_leaf_processes: - njets = proc.x.njets - stitching_ranges.setdefault(njets, []) - if proc.has_aux("ptll"): - stitching_ranges[njets].append(proc.x.ptll) - - # sort by the first element of the ptll range - sorted_stitching_ranges: list[tuple[NJetsRange, list[PtRange]]] = [ - (nj_range, sorted(stitching_ranges[nj_range], key=lambda ptll_range: ptll_range[0])) - for nj_range in sorted(stitching_ranges.keys(), key=lambda nj_range: nj_range[0]) - ] - - # define a key function that maps njets and pt to a unique key for use in a lookup table - def key_func(njets, pt): + # store them + events = set_ak_column(events, "process_id", process_ids, value_type=np.int64) + + return events + + def stitching_range_cross_check( + self: Producer, + process_inst: order.Process, + stitching_values: list[ak.Array], + ) -> None: + # define lookup for stitching observable -> process auxiliary values to compare with + # raise a warning if a datasets was already created for a specific "bin" (leaf process), + # but actually does not fit + for column, values in zip(self.stitching_columns, stitching_values): + aux_name = self.cross_check_translation_dict[str(column)] + if not process_inst.has_aux(aux_name): + continue + aux_min, aux_max = process_inst.x(aux_name) + outliers = (values < aux_min) | (values >= aux_max) + if ak.any(outliers): + logger.warning( + f"dataset {self.dataset_inst.name} is meant to contain {aux_name} values in " + f"the range [{aux_min}, {aux_max}), but found {ak.sum(outliers)} events " + "outside this range", + ) + + +class stiched_process_ids_nj_pt(stitched_process_ids): + """ + Process identifier for subprocesses spanned by a jet multiplicity and an optional pt range, such + as DY or W->lnu, which have (e.g.) "*_1j" as well as "*_1j_pt100to200" subprocesses. + """ + + # id table is set during setup, create a non-abstract class member in the meantime + id_table = None + + # required aux fields + njets_aux = "njets" + pt_aux = "ptll" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # setup during setup + self.sorted_stitching_ranges: list[tuple[NJetsRange, list[PtRange]]] + + # check that aux fields are present in cross_check_translation_dict + for field in (self.njets_aux, self.pt_aux): + if field not in self.cross_check_translation_dict.values(): + raise ValueError(f"field {field} must be present in cross_check_translation_dict") + + @abc.abstractproperty + def leaf_processes(self) -> list[order.Process]: + # must be overwritten by inheriting classes + ... + + def setup_func( + self, + reqs: dict, + inputs: dict, + reader_targets: InsertableDict, + ) -> None: + # define stitching ranges for the DY datasets covered by this producer's dy_inclusive_dataset + stitching_ranges: dict[NJetsRange, list[PtRange]] = {} + for proc in self.leaf_processes: + njets = proc.x(self.njets_aux) + stitching_ranges.setdefault(njets, []) + if proc.has_aux(self.pt_aux): + stitching_ranges[njets].append(proc.x(self.pt_aux)) + + # sort by the first element of the ptll range + self.sorted_stitching_ranges = [ + (nj_range, sorted(stitching_ranges[nj_range], key=lambda ptll_range: ptll_range[0])) + for nj_range in sorted(stitching_ranges.keys(), key=lambda nj_range: nj_range[0]) + ] + + # define the lookup table + max_nj_bin = len(self.sorted_stitching_ranges) + max_pt_bin = max(map(len, stitching_ranges.values())) + self.id_table = sp.sparse.lil_matrix((max_nj_bin + 1, max_pt_bin + 1), dtype=np.int64) + + # fill it + for proc in self.leaf_processes: + key = self.key_func(proc.x(self.njets_aux)[0], proc.x(self.pt_aux, [-1])[0]) + self.id_table[key] = proc.id + + def key_func( + self, + njets: int | np.ndarray, + pt: int | float | np.ndarray, + ) -> tuple[int, int] | tuple[np.ndarray, np.ndarray]: # potentially convert single values into arrays single = False if isinstance(njets, int): @@ -118,7 +193,7 @@ def key_func(njets, pt): # map into bins (index 0 means no binning) nj_bins = np.zeros(len(njets), dtype=np.int32) pt_bins = np.zeros(len(pt), dtype=np.int32) - for nj_bin, (nj_range, pt_ranges) in enumerate(sorted_stitching_ranges, 1): + for nj_bin, (nj_range, pt_ranges) in enumerate(self.sorted_stitching_ranges, 1): # nj_bin nj_mask = (nj_range[0] <= njets) & (njets < nj_range[1]) nj_bins[nj_mask] = nj_bin @@ -129,14 +204,17 @@ def key_func(njets, pt): return (nj_bins[0], pt_bins[0]) if single else (nj_bins, pt_bins) - self.key_func = key_func - # define the lookup table - max_nj_bin = len(sorted_stitching_ranges) - max_pt_bin = max(map(len, stitching_ranges.values())) - self.id_table = sp.sparse.lil_matrix((max_nj_bin + 1, max_pt_bin + 1), dtype=np.int64) +process_ids_dy = stiched_process_ids_nj_pt.derive("process_ids_dy", cls_dict={ + "stitching_columns": ["LHE.NpNLO", "LHE.Vpt"], + "cross_check_translation_dict": {"LHE.NpNLO": "njets", "LHE.Vpt": "ptll"}, + "include_condition": IF_DATASET_IS_DY, + # still misses leaf_processes, must be set dynamically +}) - # fill it - for proc in self.dy_leaf_processes: - key = key_func(proc.x.njets[0], proc.x("ptll", [-1])[0]) - self.id_table[key] = proc.id +process_ids_w_lnu = stiched_process_ids_nj_pt.derive("process_ids_w_lnu", cls_dict={ + "stitching_columns": ["LHE.NpNLO", "LHE.Vpt"], + "cross_check_translation_dict": {"LHE.NpNLO": "njets", "LHE.Vpt": "ptll"}, + "include_condition": IF_DATASET_IS_W_LNU, + # still misses leaf_processes, must be set dynamically +}) diff --git a/hbt/selection/default.py b/hbt/selection/default.py index b0957136..039ec45e 100644 --- a/hbt/selection/default.py +++ b/hbt/selection/default.py @@ -27,7 +27,7 @@ from hbt.selection.trigger import trigger_selection from hbt.selection.lepton import lepton_selection from hbt.selection.jet import jet_selection -from hbt.production.processes import process_ids_dy +import hbt.production.processes as process_producers from hbt.production.btag import btag_weights_deepjet, btag_weights_pnet from hbt.production.features import cutflow_features from hbt.production.patches import patch_ecalBadCalibFilter @@ -148,6 +148,8 @@ def default( # create process ids if self.process_ids_dy is not None: events = self[self.process_ids_dy](events, **kwargs) + elif self.process_ids_w_lnu is not None: + events = self[self.process_ids_w_lnu](events, **kwargs) else: events = self[process_ids](events, **kwargs) @@ -189,24 +191,33 @@ def default_init(self: Selector) -> None: if getattr(self, "dataset_inst", None) is None: return - self.process_ids_dy: process_ids_dy | None = None - if self.dataset_inst.has_tag("dy"): - # check if this dataset is covered by any dy id producer - for name, dy_cfg in self.config_inst.x.dy_stitching.items(): - dataset_inst = dy_cfg["inclusive_dataset"] - # the dataset is "covered" if its process is a subprocess of that of the dy dataset - if dataset_inst.has_process(self.dataset_inst.processes.get_first()): - self.process_ids_dy = process_ids_dy.derive(f"process_ids_dy_{name}", cls_dict={ - "dy_inclusive_dataset": dataset_inst, - "dy_leaf_processes": dy_cfg["leaf_processes"], - }) - - # add it as a dependency - self.uses.add(self.process_ids_dy) - self.produces.add(self.process_ids_dy) - - # stop after the first match - break + # build and store derived process id producers + for tag in ("dy", "w_lnu"): + prod_name = f"process_ids_{tag}" + setattr(self, prod_name, None) + if not self.dataset_inst.has_tag(tag): + continue + # check if the producer was already created and saved in the config + if (prod := self.config_inst.x(prod_name, None)) is None: + # check if this dataset is covered by any dy id producer + for stitch_name, cfg in self.config_inst.x(f"{tag}_stitching").items(): + incl_dataset_inst = cfg["inclusive_dataset"] + # the dataset is "covered" if its process is a subprocess of that of the dy dataset + if incl_dataset_inst.has_process(self.dataset_inst.processes.get_first()): + base_prod = getattr(process_producers, prod_name) + prod = base_prod.derive(f"{prod_name}_{stitch_name}", cls_dict={ + "leaf_processes": cfg["leaf_processes"], + }) + # cache it + self.config_inst.set_aux(prod_name, prod) + # stop after the first match + break + if prod is not None: + # add it as a dependency + self.uses.add(prod) + self.produces.add(prod) + # save it as an attribute + setattr(self, prod_name, prod) empty = default.derive("empty", cls_dict={}) @@ -288,6 +299,8 @@ def empty_call( # create process ids if self.process_ids_dy is not None: events = self[self.process_ids_dy](events, **kwargs) + elif self.process_ids_w_lnu is not None: + events = self[self.process_ids_w_lnu](events, **kwargs) else: events = self[process_ids](events, **kwargs) diff --git a/hbt/util.py b/hbt/util.py index 0e4d18a5..20f37848 100644 --- a/hbt/util.py +++ b/hbt/util.py @@ -72,6 +72,17 @@ def IF_DATASET_IS_DY( return self.get() if func.dataset_inst.has_tag("dy") else None +@deferred_column +def IF_DATASET_IS_W_LNU( + self: ArrayFunction.DeferredColumn, + func: ArrayFunction, +) -> Any | set[Any]: + if getattr(func, "dataset_inst", None) is None: + return self.get() + + return self.get() if func.dataset_inst.has_tag("w_lnu") else None + + def hash_events(arr: np.ndarray) -> np.ndarray: """ Helper function to create a hash value from the event, run and luminosityBlock columns. From 2efea55d7e1516f4cd29f89179d7ffe71337a169 Mon Sep 17 00:00:00 2001 From: "Marcel R." Date: Mon, 13 Jan 2025 15:06:31 +0100 Subject: [PATCH 15/20] Adapt to cf changes. --- hbt/config/configs_hbt.py | 1 + hbt/production/btag.py | 40 +++++++++++++++++++--------------- hbt/selection/default.py | 27 ++++++++++++++++++----- law.cfg | 4 +++- modules/columnflow | 2 +- setup.sh | 45 ++++++++++++++++++++------------------- 6 files changed, 73 insertions(+), 46 deletions(-) diff --git a/hbt/config/configs_hbt.py b/hbt/config/configs_hbt.py index 9a7ab0c3..d329f105 100644 --- a/hbt/config/configs_hbt.py +++ b/hbt/config/configs_hbt.py @@ -441,6 +441,7 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str] "ewk", ]), "dy_split": [ + # TODO # "dy_m4to10", "dy_m10to50", "dy_m50toinf", # "dy_m50toinf_0j", "dy_m50toinf_1j", "dy_m50toinf_2j", "dy_m50toinf_1j_pt40to100", "dy_m50toinf_1j_pt100to200", "dy_m50toinf_1j_pt200to400", diff --git a/hbt/production/btag.py b/hbt/production/btag.py index 6a594553..168ddaa6 100644 --- a/hbt/production/btag.py +++ b/hbt/production/btag.py @@ -47,23 +47,29 @@ def _normalized_btag_weights(self: Producer, events: ak.Array, **kwargs) -> ak.A if not weight_name.startswith(self.weight_name): continue - # create a weight vectors starting with ones for both weight variations, i.e., - # nomalization per pid and normalization per pid and jet multiplicity - norm_weight_per_pid = np.ones(len(events), dtype=np.float32) - norm_weight_per_pid_njet = np.ones(len(events), dtype=np.float32) - - # fill weights with a new mask per unique process id (mostly just one) - for pid in self.unique_process_ids: - pid_mask = events.process_id == pid - # single value - norm_weight_per_pid[pid_mask] = self.ratio_per_pid[weight_name][pid] - # lookup table - n_jets = ak.to_numpy(ak.num(events[pid_mask].Jet.pt, axis=1)) - norm_weight_per_pid_njet[pid_mask] = self.ratio_per_pid_njet[weight_name][pid][n_jets] - - # multiply with actual weight - norm_weight_per_pid = norm_weight_per_pid * events[weight_name] - norm_weight_per_pid_njet = norm_weight_per_pid_njet * events[weight_name] + # BUG in prod3: some stats fields were missing so skip them for now + # # create a weight vectors starting with ones for both weight variations, i.e., + # # nomalization per pid and normalization per pid and jet multiplicity + # norm_weight_per_pid = np.ones(len(events), dtype=np.float32) + # norm_weight_per_pid_njet = np.ones(len(events), dtype=np.float32) + + # # fill weights with a new mask per unique process id (mostly just one) + # for pid in self.unique_process_ids: + # pid_mask = events.process_id == pid + # # single value + # norm_weight_per_pid[pid_mask] = self.ratio_per_pid[weight_name][pid] + # # lookup table + # n_jets = ak.to_numpy(ak.num(events[pid_mask].Jet.pt, axis=1)) + # norm_weight_per_pid_njet[pid_mask] = self.ratio_per_pid_njet[weight_name][pid][n_jets] + + # # multiply with actual weight + # norm_weight_per_pid = norm_weight_per_pid * events[weight_name] + # norm_weight_per_pid_njet = norm_weight_per_pid_njet * events[weight_name] + + # fake values + from columnflow.columnar_util import full_like + norm_weight_per_pid = full_like(events.event, 1.0, dtype=np.float32) + norm_weight_per_pid_njet = norm_weight_per_pid # store them events = set_ak_column_f32(events, f"normalized_{weight_name}", norm_weight_per_pid) diff --git a/hbt/selection/default.py b/hbt/selection/default.py index 039ec45e..7f09d2f0 100644 --- a/hbt/selection/default.py +++ b/hbt/selection/default.py @@ -120,7 +120,13 @@ def default( # pdf weights if self.has_dep(pdf_weights): - events = self[pdf_weights](events, outlier_log_mode="debug", **kwargs) + events = self[pdf_weights]( + events, + outlier_log_mode="debug", + # allow some datasets to contain a few events with missing lhe infos + invalid_weights_action="ignore" if self.dataset_inst.has_tag("partial_lhe_weights") else "raise", + **kwargs, + ) # renormalization/factorization scale weights if self.has_dep(murmuf_weights): @@ -181,6 +187,7 @@ def event_sel_nob(btag_weight_cls): "nob_pnet": event_sel_nob(btag_weights_pnet) if self.has_dep(btag_weights_pnet) else None, }, njets=results.x.n_central_jets, + **kwargs, ) return events, results @@ -326,6 +333,7 @@ def empty_call( "nob_pnet": results.event if self.has_dep(btag_weights_pnet) else None, }, njets=ak.num(events.Jet, axis=1), + **kwargs, ) return events, results @@ -360,6 +368,9 @@ def setup_and_increment_stats( event_sel_variations = {} event_sel_variations = {n: s for n, s in event_sel_variations.items() if s is not None} + # when a shift was requested, skip all other systematic variations + skip_shifts = self.global_shift_inst != "nominal" + # start creating a weight, group and group combination map weight_map = { "num_events": Ellipsis, @@ -379,18 +390,17 @@ def setup_and_increment_stats( # pu weights with variations for route in sorted(self[pu_weight].produced_columns): - name = str(route) - weight_map[f"sum_mc_weight_{name}"] = (events.mc_weight * events[name], Ellipsis) + weight_map[f"sum_mc_weight_{route}"] = (events.mc_weight * route.apply(events), Ellipsis) # pdf weights with variations if self.has_dep(pdf_weights): - for v in ["", "_up", "_down"]: + for v in (("",) if skip_shifts else ("", "_up", "_down")): weight_map[f"sum_pdf_weight{v}"] = events[f"pdf_weight{v}"] weight_map[f"sum_pdf_weight{v}_selected"] = (events[f"pdf_weight{v}"], event_sel) # mur/muf weights with variations if self.has_dep(murmuf_weights): - for v in ["", "_up", "_down"]: + for v in (("",) if skip_shifts else ("", "_up", "_down")): weight_map[f"sum_murmuf_weight{v}"] = events[f"murmuf_weight{v}"] weight_map[f"sum_murmuf_weight{v}_selected"] = (events[f"murmuf_weight{v}"], event_sel) @@ -402,6 +412,8 @@ def setup_and_increment_stats( weight_name = str(route) if not weight_name.startswith(prod.weight_name): continue + if skip_shifts and weight_name.endswith(("_up", "_down")): + continue weight_map[f"sum_{weight_name}"] = events[weight_name] weight_map[f"sum_{weight_name}_selected"] = (events[weight_name], event_sel) for var_name, var_sel in event_sel_variations.items(): @@ -427,6 +439,10 @@ def setup_and_increment_stats( # combinations group_combinations.append(("process", "njet")) + def skip_func(weight_name: str, group_names: list[str]) -> bool: + # TODO: add not needed combinations here + return False + return self[increment_stats]( events, results, @@ -434,5 +450,6 @@ def setup_and_increment_stats( weight_map=weight_map, group_map=group_map, group_combinations=group_combinations, + skip_func=skip_func, **kwargs, ) diff --git a/law.cfg b/law.cfg index fc6509b6..cba7c5fc 100644 --- a/law.cfg +++ b/law.cfg @@ -188,7 +188,9 @@ cf.CreateSyncFile: wlcg [versions] -# none yet +# for first plots (13.1.2025) +22pre_v14__cf.CalibrateEvents: prod3 +22pre_v14__cf.{SelectEvents,MergeSelectionStats,ReduceEvents,MergeReductionStats,ProvideReducedEvents}: prod3 [resources] diff --git a/modules/columnflow b/modules/columnflow index dd0655af..2a9b095d 160000 --- a/modules/columnflow +++ b/modules/columnflow @@ -1 +1 @@ -Subproject commit dd0655afda8756c99bc434a901929f0a028dacf9 +Subproject commit 2a9b095d5bd067d06f3463a08edf1be84026d329 diff --git a/setup.sh b/setup.sh index 977613d2..0e2d3e14 100644 --- a/setup.sh +++ b/setup.sh @@ -27,25 +27,34 @@ setup_hbt() { # HBT_SETUP # A flag that is set to 1 after the setup was successful. + # + # load cf setup helpers + # + + local shell_is_zsh="$( [ -z "${ZSH_VERSION}" ] && echo "false" || echo "true" )" + local this_file="$( ${shell_is_zsh} && echo "${(%):-%x}" || echo "${BASH_SOURCE[0]}" )" + local this_dir="$( cd "$( dirname "${this_file}" )" && pwd )" + local cf_base="${this_dir}/modules/columnflow" + CF_SKIP_SETUP="true" source "${cf_base}/setup.sh" "" || return "$?" + + # # prevent repeated setups - if [ "${HBT_SETUP}" = "1" ] && [ "${CF_ON_SLURM}" != "1" ]; then + # + + cf_export_bool HBT_SETUP + if ${HBT_SETUP} && ! ${CF_ON_SLURM}; then >&2 echo "the HH -> bbtautau analysis was already succesfully setup" >&2 echo "re-running the setup requires a new shell" return "1" fi - # # prepare local variables # - local shell_is_zsh="$( [ -z "${ZSH_VERSION}" ] && echo "false" || echo "true" )" - local this_file="$( ${shell_is_zsh} && echo "${(%):-%x}" || echo "${BASH_SOURCE[0]}" )" - local this_dir="$( cd "$( dirname "${this_file}" )" && pwd )" local orig="${PWD}" local setup_name="${1:-default}" local setup_is_default="false" - local env_is_remote="$( [ "${CF_REMOTE_ENV}" = "1" ] && echo "true" || echo "false" )" [ "${setup_name}" = "default" ] && setup_is_default="true" # zsh options @@ -54,7 +63,6 @@ setup_hbt() { setopt globdots fi - # # global variables # (HBT = hh2bbtautau, CF = columnflow) @@ -62,18 +70,15 @@ setup_hbt() { # start exporting variables export HBT_BASE="${this_dir}" - export CF_BASE="${this_dir}/modules/columnflow" + export CF_BASE="${cf_base}" export CF_REPO_BASE="${HBT_BASE}" export CF_REPO_BASE_ALIAS="HBT_BASE" export CF_SETUP_NAME="${setup_name}" export CF_SCHEDULER_HOST="${CF_SCHEDULER_HOST:-naf-cms14.desy.de}" export CF_SCHEDULER_PORT="${CF_SCHEDULER_PORT:-8088}" - # load cf setup helpers - CF_SKIP_SETUP="1" source "${CF_BASE}/setup.sh" "" || return "$?" - # interactive setup - if ! ${env_is_remote}; then + if ! ${CF_REMOTE_ENV}; then cf_setup_interactive_body() { # the flavor will be cms export CF_FLAVOR="cms" @@ -91,14 +96,12 @@ setup_hbt() { export CF_VENV_BASE="${CF_VENV_BASE:-${CF_SOFTWARE_BASE}/venvs}" export CF_CMSSW_BASE="${CF_CMSSW_BASE:-${CF_SOFTWARE_BASE}/cmssw}" - # # common variables # cf_setup_common_variables || return "$?" - # # minimal local software setup # @@ -110,23 +113,21 @@ setup_hbt() { export PYTHONPATH="${HBT_BASE}:${HBT_BASE}/modules/cmsdb:${PYTHONPATH}" # initialze submodules - if ! ${env_is_remote} && [ -e "${HBT_BASE}/.git" ]; then + if ! ${CF_REMOTE_ENV} && [ -e "${HBT_BASE}/.git" ]; then local m for m in $( ls -1q "${HBT_BASE}/modules" ); do cf_init_submodule "${HBT_BASE}" "modules/${m}" done fi - # # git hooks # - if [ "${CF_LOCAL_ENV}" = "1" ]; then + if ${CF_LOCAL_ENV}; then cf_setup_git_hooks || return "$?" fi - # # law setup # @@ -135,7 +136,7 @@ setup_hbt() { export LAW_CONFIG_FILE="${LAW_CONFIG_FILE:-${HBT_BASE}/law.cfg}" # run the indexing when not remote - if ! ${env_is_remote} && which law &> /dev/null; then + if ! ${CF_REMOTE_ENV} && which law &> /dev/null; then # source law's bash completion scipt source "$( law completion )" "" @@ -148,12 +149,12 @@ setup_hbt() { # update the law config file to switch from mirrored to bare wlcg targets # as local mounts are typically not available remotely - if ${env_is_remote}; then + if ${CF_REMOTE_ENV}; then sed -i -r 's/(.+\: ?)wlcg_mirrored, local_.+, ?(wlcg_[^\s]+)/\1wlcg, \2/g' "${LAW_CONFIG_FILE}" fi # finalize - export HBT_SETUP="1" + export HBT_SETUP="true" } main() { @@ -171,6 +172,6 @@ main() { } # entry point -if [ "${HBT_SKIP_SETUP}" != "1" ]; then +if [ "${HBT_SKIP_SETUP}" != "true" ]; then main "$@" fi From f30eb56c6a79fd115d64192a8081d661b94cf75b Mon Sep 17 00:00:00 2001 From: "Marcel R." Date: Mon, 13 Jan 2025 17:57:19 +0100 Subject: [PATCH 16/20] Fix hhbtag score handling (@taaaeen). --- hbt/production/hhbtag.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hbt/production/hhbtag.py b/hbt/production/hhbtag.py index 5e08ff73..cdf4ba8b 100644 --- a/hbt/production/hhbtag.py +++ b/hbt/production/hhbtag.py @@ -99,11 +99,11 @@ def split(where): even_mask = ak.to_numpy((events[event_mask].event % 2) == 0) if ak.sum(even_mask): input_features_even = split(even_mask) - scores_even = self.hhbtag_model_even(input_features_even)[0].numpy() + scores_even = self.hhbtag_model_even(input_features_even).numpy() scores[even_mask] = scores_even if ak.sum(~even_mask): input_features_odd = split(~even_mask) - scores_odd = self.hhbtag_model_odd(input_features_odd)[0].numpy() + scores_odd = self.hhbtag_model_odd(input_features_odd).numpy() scores[~even_mask] = scores_odd # remove the scores of padded jets From a0efe7e08cd7cb2e7936a4c17649173b8824c68d Mon Sep 17 00:00:00 2001 From: "Marcel R." Date: Mon, 13 Jan 2025 19:34:24 +0100 Subject: [PATCH 17/20] Update cf. --- modules/columnflow | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/columnflow b/modules/columnflow index 2a9b095d..693f14d6 160000 --- a/modules/columnflow +++ b/modules/columnflow @@ -1 +1 @@ -Subproject commit 2a9b095d5bd067d06f3463a08edf1be84026d329 +Subproject commit 693f14d6b188ae42197a824b39548b50a624d667 From dd45b7c5c0b61b31f1b0e6480898bc4606d43c5f Mon Sep 17 00:00:00 2001 From: "Marcel R." Date: Tue, 14 Jan 2025 08:33:43 +0100 Subject: [PATCH 18/20] Update plots. --- hbt/config/categories.py | 22 ++--- hbt/config/configs_hbt.py | 14 ++-- hbt/config/variables.py | 141 ++++++++++++++++++++++++--------- hbt/production/res_networks.py | 8 +- modules/columnflow | 2 +- 5 files changed, 130 insertions(+), 57 deletions(-) diff --git a/hbt/config/categories.py b/hbt/config/categories.py index 7f69a7bf..453f2337 100644 --- a/hbt/config/categories.py +++ b/hbt/config/categories.py @@ -16,18 +16,18 @@ def add_categories(config: od.Config) -> None: Adds all categories to a *config*. """ # lepton channels - add_category(config, name="etau", id=1, selection="cat_etau", label=r"$e\tau_{h}$") - add_category(config, name="mutau", id=2, selection="cat_mutau", label=r"$\mu\tau_{h}$") - add_category(config, name="tautau", id=3, selection="cat_tautau", label=r"$\tau_{h}\tau_{h}$") - add_category(config, name="ee", id=4, selection="cat_ee", label=r"$ee$") - add_category(config, name="mumu", id=5, selection="cat_mumu", label=r"$\mu\mu$") - add_category(config, name="emu", id=6, selection="cat_emu", label=r"$e\mu$") + add_category(config, name="etau", id=1, selection="cat_etau", label=config.channels.n.etau.label) + add_category(config, name="mutau", id=2, selection="cat_mutau", label=config.channels.n.mutau.label) + add_category(config, name="tautau", id=3, selection="cat_tautau", label=config.channels.n.tautau.label) + add_category(config, name="ee", id=4, selection="cat_ee", label=config.channels.n.ee.label) + add_category(config, name="mumu", id=5, selection="cat_mumu", label=config.channels.n.mumu.label) + add_category(config, name="emu", id=6, selection="cat_emu", label=config.channels.n.emu.label) # qcd regions - add_category(config, name="os", id=10, selection="cat_os", label="Opposite sign", tags={"os"}) - add_category(config, name="ss", id=11, selection="cat_ss", label="Same sign", tags={"ss"}) - add_category(config, name="iso", id=12, selection="cat_iso", label=r"$\tau_{h,2} isolated$", tags={"iso"}) - add_category(config, name="noniso", id=13, selection="cat_noniso", label=r"$\tau_{h,2} non-isolated$", tags={"noniso"}) # noqa: E501 + add_category(config, name="os", id=10, selection="cat_os", label="OS", tags={"os"}) + add_category(config, name="ss", id=11, selection="cat_ss", label="SS", tags={"ss"}) + add_category(config, name="iso", id=12, selection="cat_iso", label=r"$\tau_{h,2} iso$", tags={"iso"}) + add_category(config, name="noniso", id=13, selection="cat_noniso", label=r"$\tau_{h,2} non-iso$", tags={"noniso"}) # noqa: E501 # kinematic categories add_category(config, name="incl", id=100, selection="cat_incl", label="inclusive") @@ -57,6 +57,8 @@ def kwargs_fn(categories, add_qcd_group=True): "tags": set.union(*[cat.tags for cat in categories.values() if cat]), # auxiliary information "aux": aux, + # label + "label": [cat.label or name for name, cat in categories.items()] or None, } # main analysis categories diff --git a/hbt/config/configs_hbt.py b/hbt/config/configs_hbt.py index d329f105..aa9f174e 100644 --- a/hbt/config/configs_hbt.py +++ b/hbt/config/configs_hbt.py @@ -521,6 +521,8 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str] cfg.x.custom_style_config_groups = { "small_legend": { "legend_cfg": {"ncols": 2, "fontsize": 16, "columnspacing": 0.6}, + "annotate_cfg": {"fontsize": 18, "style": "italic"}, + "cms_label": "wip", }, } cfg.x.default_custom_style_config = "small_legend" @@ -1239,12 +1241,12 @@ def add_external(name, value): ################################################################################################ # channels - cfg.add_channel(name="etau", id=1) - cfg.add_channel(name="mutau", id=2) - cfg.add_channel(name="tautau", id=3) - cfg.add_channel(name="ee", id=4) - cfg.add_channel(name="mumu", id=5) - cfg.add_channel(name="emu", id=6) + cfg.add_channel(name="etau", id=1, label=r"$e\tau_{h}$") + cfg.add_channel(name="mutau", id=2, label=r"$\mu\tau_{h}$") + cfg.add_channel(name="tautau", id=3, label=r"$\tau_{h}\tau_{h}$") + cfg.add_channel(name="ee", id=4, label=r"$ee$") + cfg.add_channel(name="mumu", id=5, label=r"$\mu\mu$") + cfg.add_channel(name="emu", id=6, label=r"$e\mu$") # add categories from hbt.config.categories import add_categories diff --git a/hbt/config/variables.py b/hbt/config/variables.py index d3ec4994..681dced4 100644 --- a/hbt/config/variables.py +++ b/hbt/config/variables.py @@ -13,48 +13,55 @@ def add_variables(config: od.Config) -> None: """ Adds all variables to a *config*. """ - config.add_variable( + add_variable( + config, name="event", expression="event", binning=(1, 0.0, 1.0e9), x_title="Event number", discrete_x=True, ) - config.add_variable( + add_variable( + config, name="run", expression="run", binning=(1, 100000.0, 500000.0), x_title="Run number", discrete_x=True, ) - config.add_variable( + add_variable( + config, name="lumi", expression="luminosityBlock", binning=(1, 0.0, 5000.0), x_title="Luminosity block", discrete_x=True, ) - config.add_variable( + add_variable( + config, name="n_jet", expression="n_jet", binning=(11, -0.5, 10.5), x_title="Number of jets", discrete_x=True, ) - config.add_variable( + add_variable( + config, name="n_hhbtag", expression="n_hhbtag", binning=(4, -0.5, 3.5), x_title="Number of HH b-tags", discrete_x=True, ) - config.add_variable( + add_variable( + config, name="ht", binning=[0, 80, 120, 160, 200, 240, 280, 320, 400, 500, 600, 800], unit="GeV", x_title="HT", ) - config.add_variable( + add_variable( + config, name="jet_pt", expression="Jet.pt", null_value=EMPTY_FLOAT, @@ -62,7 +69,8 @@ def add_variables(config: od.Config) -> None: unit="GeV", x_title=r"all Jet $p_{T}$", ) - config.add_variable( + add_variable( + config, name="jet1_pt", expression="Jet.pt[:,0]", null_value=EMPTY_FLOAT, @@ -70,14 +78,16 @@ def add_variables(config: od.Config) -> None: unit="GeV", x_title=r"Jet 1 $p_{T}$", ) - config.add_variable( + add_variable( + config, name="jet1_eta", expression="Jet.eta[:,0]", null_value=EMPTY_FLOAT, binning=(30, -3.0, 3.0), x_title=r"Jet 1 $\eta$", ) - config.add_variable( + add_variable( + config, name="jet2_pt", expression="Jet.pt[:,1]", null_value=EMPTY_FLOAT, @@ -85,21 +95,24 @@ def add_variables(config: od.Config) -> None: unit="GeV", x_title=r"Jet 2 $p_{T}$", ) - config.add_variable( + add_variable( + config, name="met_phi", expression="PuppiMET.phi", null_value=EMPTY_FLOAT, binning=(33, -3.3, 3.3), x_title=r"MET $\phi$", ) - config.add_variable( + add_variable( + config, name="e1_pt", expression="Electron.pt[:, 0]", null_value=EMPTY_FLOAT, binning=(40, 0, 400), x_title=r"Leading electron p$_{T}$", ) - config.add_variable( + add_variable( + config, name="mu1_pt", expression="Muon.pt[:,0]", null_value=EMPTY_FLOAT, @@ -108,37 +121,43 @@ def add_variables(config: od.Config) -> None: ) # weights - config.add_variable( + add_variable( + config, name="mc_weight", expression="mc_weight", binning=(200, -10, 10), x_title="MC weight", ) - config.add_variable( + add_variable( + config, name="pu_weight", expression="pu_weight", binning=(40, 0, 2), x_title="Pileup weight", ) - config.add_variable( + add_variable( + config, name="normalized_pu_weight", expression="normalized_pu_weight", binning=(40, 0, 2), x_title="Normalized pileup weight", ) - config.add_variable( + add_variable( + config, name="btag_weight", expression="btag_weight", binning=(60, 0, 3), x_title="b-tag weight", ) - config.add_variable( + add_variable( + config, name="normalized_btag_weight", expression="normalized_btag_weight", binning=(60, 0, 3), x_title="Normalized b-tag weight", ) - config.add_variable( + add_variable( + config, name="normalized_njet_btag_weight", expression="normalized_njet_btag_weight", binning=(60, 0, 3), @@ -146,40 +165,46 @@ def add_variables(config: od.Config) -> None: ) # cutflow variables - config.add_variable( + add_variable( + config, name="cf_njet", expression="cutflow.n_jet", binning=(17, -0.5, 16.5), x_title="Jet multiplicity", discrete_x=True, ) - config.add_variable( + add_variable( + config, name="cf_ht", expression="cutflow.ht", binning=(40, 0.0, 400.0), unit="GeV", x_title=r"$H_{T}$", ) - config.add_variable( + add_variable( + config, name="cf_jet1_pt", expression="cutflow.jet1_pt", binning=(40, 0.0, 400.0), unit="GeV", x_title=r"Jet 1 $p_{T}$", ) - config.add_variable( + add_variable( + config, name="cf_jet1_eta", expression="cutflow.jet1_eta", binning=(40, -5.0, 5.0), x_title=r"Jet 1 $\eta$", ) - config.add_variable( + add_variable( + config, name="cf_jet1_phi", expression="cutflow.jet1_phi", binning=(32, -3.2, 3.2), x_title=r"Jet 1 $\phi$", ) - config.add_variable( + add_variable( + config, name="cf_jet2_pt", expression="cutflow.jet2_pt", binning=(40, 0.0, 400.0), @@ -188,21 +213,24 @@ def add_variables(config: od.Config) -> None: ) # variables of interest - config.add_variable( + add_variable( + config, name="hh_mass", expression="hh.mass", binning=(20, 250, 750.0), unit="GeV", x_title=r"$m_{hh}$", ) - config.add_variable( + add_variable( + config, name="hh_pt", expression="hh.pt", binning=(100, 0, 500.0), unit="GeV", x_title=r"$p_T$", ) - config.add_variable( + add_variable( + config, name="hh_eta", expression="hh.eta", binning=(100, -3.0, 3.0), @@ -210,21 +238,24 @@ def add_variables(config: od.Config) -> None: x_title=r"$\eta$", ) - config.add_variable( + add_variable( + config, name="ditau_mass", expression="diTau.mass", binning=(20, 50, 200.0), unit="GeV", x_title=r"$m_{\tau\tau}$", ) - config.add_variable( + add_variable( + config, name="ditau_pt", expression="diTau.pt", binning=(100, 0, 500.0), unit="GeV", x_title=r"$p_T$", ) - config.add_variable( + add_variable( + config, name="ditau_eta", expression="diTau.eta", binning=(100, -3.0, 3.0), @@ -232,21 +263,24 @@ def add_variables(config: od.Config) -> None: x_title=r"$\eta$", ) - config.add_variable( + add_variable( + config, name="dibjet_mass", expression="diBJet.mass", binning=(20, 0, 500.0), unit="GeV", x_title=r"$m_{bb}$", ) - config.add_variable( + add_variable( + config, name="dibjet_pt", expression="diBJet.pt", binning=(100, 0, 500.0), unit="GeV", x_title=r"$p_T$", ) - config.add_variable( + add_variable( + config, name="dibjet_eta", expression="diBJet.eta", binning=(100, -3.0, 3.0), @@ -254,9 +288,26 @@ def add_variables(config: od.Config) -> None: x_title=r"$\eta$", ) + def dilep_mass_test(events): + import awkward as ak + leps = ak.concatenate([events.Electron * 1, events.Muon * 1, events.Tau * 1], axis=1)[:, :2] + dilep = leps.sum(axis=1) + return dilep.mass + + add_variable( + config, + name="dilep_mass", + expression=dilep_mass_test, + aux={"inputs": ["{Electron,Muon,Tau}.{pt,eta,phi,mass}"]}, + binning=(40, 40, 120), + unit="GeV", + x_title=r"$m_{ll}$", + ) + for proc in ["hh", "tt", "dy"]: # outputs of the resonant pDNN at SM-like mass and spin values - config.add_variable( + add_variable( + config, name=f"res_pdnn_{proc}", expression=f"res_pdnn_s0_m500_{proc}", binning=(25, 0.0, 1.0), @@ -264,16 +315,32 @@ def add_variables(config: od.Config) -> None: ) # outputs of the resonant DNN trained over flat masses - config.add_variable( + add_variable( + config, name=f"res_dnn_{proc}", expression=f"res_dnn_{proc}", binning=(25, 0.0, 1.0), x_title=rf"{proc.upper()} output node, res. DNN", ) - config.add_variable( + add_variable( + config, name=f"res_dnn_{proc}_fine", expression=f"res_dnn_{proc}", binning=(5000, 0.0, 1.0), x_title=rf"{proc.upper()} output node, res. DNN", ) + + +# helper to add a variable to the config with some defaults +def add_variable(config: od.Config, *args, **kwargs) -> od.Variable: + # create the variable + variable = config.add_variable(*args, **kwargs) + + # defaults + if not variable.has_aux("underflow"): + variable.x.underflow = True + if not variable.has_aux("overflow"): + variable.x.overflow = True + + return variable diff --git a/hbt/production/res_networks.py b/hbt/production/res_networks.py index d7484cef..bddd0a41 100644 --- a/hbt/production/res_networks.py +++ b/hbt/production/res_networks.py @@ -12,8 +12,10 @@ import law from columnflow.production import Producer, producer -from columnflow.production.util import attach_coffea_behavior, default_collections -from columnflow.columnar_util import set_ak_column, attach_behavior, flat_np_view, EMPTY_FLOAT +from columnflow.production.util import attach_coffea_behavior +from columnflow.columnar_util import ( + set_ak_column, attach_behavior, flat_np_view, EMPTY_FLOAT, default_coffea_collections, +) from columnflow.util import maybe_import, dev_sandbox, InsertableDict, DotDict np = maybe_import("numpy") @@ -68,7 +70,7 @@ def _res_dnn_evaluation( # ensure coffea behavior events = self[attach_coffea_behavior]( events, - collections={"HHBJet": default_collections["Jet"]}, + collections={"HHBJet": default_coffea_collections["Jet"]}, **kwargs, ) diff --git a/modules/columnflow b/modules/columnflow index 693f14d6..1b01b794 160000 --- a/modules/columnflow +++ b/modules/columnflow @@ -1 +1 @@ -Subproject commit 693f14d6b188ae42197a824b39548b50a624d667 +Subproject commit 1b01b7948c960474e1fff52c660d03960e6da83a From 92d00546a816b4fd4eca3220fb9b62e80ddbc4c5 Mon Sep 17 00:00:00 2001 From: "Marcel R." Date: Tue, 14 Jan 2025 17:55:46 +0100 Subject: [PATCH 19/20] Make plot style CMS compatible. --- hbt/config/categories.py | 6 ++++- hbt/config/configs_hbt.py | 48 ++++++++++++++++++++++++++----------- hbt/config/styles.py | 50 +++++++++++++++++++++++++++++---------- hbt/selection/jet.py | 15 ++++++++---- modules/columnflow | 2 +- 5 files changed, 89 insertions(+), 32 deletions(-) diff --git a/hbt/config/categories.py b/hbt/config/categories.py index 453f2337..0d8aa4ad 100644 --- a/hbt/config/categories.py +++ b/hbt/config/categories.py @@ -58,7 +58,11 @@ def kwargs_fn(categories, add_qcd_group=True): # auxiliary information "aux": aux, # label - "label": [cat.label or name for name, cat in categories.items()] or None, + "label": [ + cat.label or cat.name + for cat in categories.values() + if cat.name != "os" # os is the default + ] or None, } # main analysis categories diff --git a/hbt/config/configs_hbt.py b/hbt/config/configs_hbt.py index aa9f174e..4132e8ab 100644 --- a/hbt/config/configs_hbt.py +++ b/hbt/config/configs_hbt.py @@ -103,6 +103,12 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str] label="Multiboson", processes=[procs.n.vv, procs.n.vvv], ) + cfg.add_process( + name="all_v", + id=7996, + label="Multiboson", + processes=[cfg.processes.n.v, cfg.processes.n.multiboson], + ) cfg.add_process( name="tt_multiboson", id=7999, @@ -173,7 +179,7 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str] cfg.add_process(proc) # configure colors, labels, etc - from hbt.config.styles import stylize_processes + from hbt.config.styles import stylize_processes, update_legend_labels stylize_processes(cfg) ################################################################################################ @@ -430,14 +436,13 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str] "hh_ggf_hbb_htt_kl5_kt1", ], "backgrounds": (backgrounds := [ - "h", - "tt", "dy", + "tt", "qcd", "st", - "v", - "multiboson", "tt_multiboson", + "all_v", + "h", "ewk", ]), "dy_split": [ @@ -493,9 +498,17 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str] # dataset groups for conveniently looping over certain datasets # (used in wrapper_factory and during plotting) cfg.x.dataset_groups = { + "data": (data_group := [dataset.name for dataset in cfg.datasets if dataset.is_data]), + "backgrounds": (backgrounds := [ + dataset.name for dataset in cfg.datasets + if dataset.is_mc and not dataset.has_tag("signal") + ]), + "sm_ggf": (sm_ggf_group := ["hh_ggf_hbb_htt_kl1_kt1_powheg", *backgrounds]), + "sm": (sm_group := ["hh_ggf_hbb_htt_kl1_kt1_powheg", "hh_vbf_hbb_htt_kv1_k2v1_kl1_madgraph", *backgrounds]), + "sm_ggf_data": data_group + sm_ggf_group, + "sm_data": data_group + sm_group, "dy": [dataset.name for dataset in cfg.datasets if dataset.has_tag("dy")], "w_lnu": [dataset.name for dataset in cfg.datasets if dataset.has_tag("w_lnu")], - # TODO: resonant (mostly for excluding them) } # category groups for conveniently looping over certain categories @@ -513,15 +526,18 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str] # selector step groups for conveniently looping over certain steps # (used in cutflow tasks) cfg.x.selector_step_groups = { - "default": ["json", "trigger", "met_filter", "jet_veto_map", "lepton", "jet", "bjet"], + "default": ["json", "trigger", "met_filter", "jet_veto_map", "lepton", "jet2", "bjet"], } cfg.x.default_selector_steps = "default" # plotting overwrites cfg.x.custom_style_config_groups = { "small_legend": { - "legend_cfg": {"ncols": 2, "fontsize": 16, "columnspacing": 0.6}, - "annotate_cfg": {"fontsize": 18, "style": "italic"}, + "legend_cfg": { + "ncols": 2, "fontsize": 16, "columnspacing": 0.6, "labelspacing": 0.275, + "update_handles_labels": update_legend_labels, + }, + "annotate_cfg": {"fontsize": 18}, "cms_label": "wip", }, } @@ -1307,14 +1323,18 @@ def get_dataset_lfns( path = f"store/{dataset_inst.data_source}/{main_campaign}/{dataset_id}/{tier}/{sub_campaign}/0" # create the lfn base directory, local or remote - dir_cls = law.LocalDirectoryTarget - fs = f"local_fs_{cfg.campaign.x.custom['name']}" - if not law.config.has_section(fs): - dir_cls = law.wlcg.WLCGDirectoryTarget - fs = f"wlcg_fs_{cfg.campaign.x.custom['name']}" + dir_cls = law.wlcg.WLCGDirectoryTarget + fs = f"wlcg_fs_{cfg.campaign.x.custom['name']}" + local_fs = f"local_fs_{cfg.campaign.x.custom['name']}" + if law.config.has_section(local_fs): + base = law.target.file.remove_scheme(law.config.get_expanded(local_fs, "base")) + if os.path.exists(base): + dir_cls = law.LocalDirectoryTarget + fs = local_fs lfn_base = dir_cls(path, fs=fs) # loop though files and interpret paths as lfns + print(lfn_base) return sorted( "/" + lfn_base.child(basename, type="f").path.lstrip("/") for basename in lfn_base.listdir(pattern="*.root") diff --git a/hbt/config/styles.py b/hbt/config/styles.py index bc04cd7f..c1edfa7a 100644 --- a/hbt/config/styles.py +++ b/hbt/config/styles.py @@ -16,6 +16,7 @@ def stylize_processes(config: od.Config) -> None: cfg = config # recommended cms colors + # see https://cms-analysis.docs.cern.ch/guidelines/plotting/colors cfg.x.colors = DotDict( bright_blue="#3f90da", dark_blue="#011c87", @@ -32,7 +33,7 @@ def stylize_processes(config: od.Config) -> None: for kl in ["0", "1", "2p45", "5"]: if (p := config.get_process(f"hh_ggf_hbb_htt_kl{kl}_kt1", default=None)): - p.color1 = cfg.x.colors.bright_blue + p.color1 = cfg.x.colors.dark_blue p.label = ( r"$HH_{ggf} \rightarrow bb\tau\tau$ __SCALE__" "\n" @@ -40,7 +41,7 @@ def stylize_processes(config: od.Config) -> None: ) if (p := config.get_process("hh_vbf_hbb_htt_kv1_k2v1_kl1", default=None)): - p.color1 = cfg.x.colors.dark_blue + p.color1 = cfg.x.colors.brown p.label = ( r"$HH_{vbf} \rightarrow bb\tau\tau$ __SCALE__" "\n" @@ -48,40 +49,43 @@ def stylize_processes(config: od.Config) -> None: ) if (p := config.get_process("h", default=None)): - p.color1 = cfg.x.colors.purple + p.color1 = cfg.x.colors.teal if (p := config.get_process("tt", default=None)): p.color1 = cfg.x.colors.bright_orange p.label = r"$t\bar{t}$" if (p := config.get_process("st", default=None)): - p.color1 = cfg.x.colors.aubergine + p.color1 = cfg.x.colors.purple if (p := config.get_process("dy", default=None)): - p.color1 = cfg.x.colors.dark_orange + p.color1 = cfg.x.colors.bright_blue if (p := config.get_process("vv", default=None)): - p.color1 = cfg.x.colors.yellow + p.color1 = cfg.x.colors.aubergine if (p := config.get_process("vvv", default=None)): - p.color1 = cfg.x.colors.yellow + p.color1 = cfg.x.colors.aubergine if (p := config.get_process("multiboson", default=None)): - p.color1 = cfg.x.colors.yellow + p.color1 = cfg.x.colors.aubergine if (p := config.get_process("w", default=None)): - p.color1 = cfg.x.colors.teal + p.color1 = cfg.x.colors.aubergine p.label = "W" if (p := config.get_process("z", default=None)): - p.color1 = cfg.x.colors.brown + p.color1 = cfg.x.colors.aubergine p.label = "Z" if (p := config.get_process("v", default=None)): - p.color1 = cfg.x.colors.teal + p.color1 = cfg.x.colors.aubergine + + if (p := config.get_process("all_v", default=None)): + p.color1 = cfg.x.colors.aubergine if (p := config.get_process("ewk", default=None)): - p.color1 = cfg.x.colors.brown + p.color1 = cfg.x.colors.dark_orange if (p := config.get_process("ttv", default=None)): p.color1 = cfg.x.colors.grey @@ -96,3 +100,25 @@ def stylize_processes(config: od.Config) -> None: if (p := config.get_process("qcd", default=None)): p.color1 = cfg.x.colors.red + + +def update_legend_labels(ax, handles, labels, n_cols): + """ + Fill empty handles so that all backgrounds are in the first column. + """ + # only implemented for 2 columns so far + if n_cols != 2: + return + # get number of entries per column + n_backgrounds = sum(1 for handle in handles if handle.__class__.__name__ == "StepPatch") + n_other = len(handles) - n_backgrounds + if n_backgrounds == n_other: + return + + # fill left or right column + n_add = abs(n_backgrounds - n_other) + pos = len(handles) if n_backgrounds > n_other else n_backgrounds + empty_handle = ax.plot([], label="", linestyle="None")[0] + for _ in range(n_add): + handles.insert(pos, empty_handle) + labels.insert(pos, "") diff --git a/hbt/selection/jet.py b/hbt/selection/jet.py index 5d91c159..3bde8a30 100644 --- a/hbt/selection/jet.py +++ b/hbt/selection/jet.py @@ -268,7 +268,13 @@ def jet_selection( ) # final event selection (only looking at number of default jets for now) - jet_sel = ak.sum(default_mask, axis=1) >= 2 + # perform a cut on ≥1 jet and all other cuts first, and then cut on ≥2, resulting in an + # additional, _skippable_ step + jet_sel = ( + (ak.sum(default_mask, axis=1) >= 1) + # add additional cuts here in the future + ) + jet_sel2 = jet_sel & (ak.sum(default_mask, axis=1) >= 2) # some final type conversions jet_indices = ak.values_astype(ak.fill_none(jet_indices, 0), np.int32) @@ -284,11 +290,12 @@ def jet_selection( result = SelectionResult( steps={ "jet": jet_sel, + "jet2": jet_sel2, # the btag weight normalization requires a selection with everything but the bjet # selection, so add this step here - # note: there is currently no b-tag discriminant cut at this point, so take jet_sel - "bjet_deepjet": jet_sel, - "bjet_pnet": jet_sel, # no need in run 2 + # note: there is currently no b-tag discriminant cut at this point, so skip it + # "bjet_deepjet": jet_sel, + # "bjet_pnet": jet_sel, # no need in run 2 }, objects={ "Jet": { diff --git a/modules/columnflow b/modules/columnflow index 1b01b794..de74536b 160000 --- a/modules/columnflow +++ b/modules/columnflow @@ -1 +1 @@ -Subproject commit 1b01b7948c960474e1fff52c660d03960e6da83a +Subproject commit de74536b1038ce9ae445d108324a709848bbcf23 From e7f46e5773600556ed2caa5b4e7ee032728e3cfd Mon Sep 17 00:00:00 2001 From: "Marcel R." Date: Wed, 15 Jan 2025 11:15:22 +0100 Subject: [PATCH 20/20] Plot updates. --- hbt/calibration/default.py | 63 +++++++++++++++++++++++--------------- hbt/config/categories.py | 4 +-- hbt/config/configs_hbt.py | 18 +++++++---- hbt/config/styles.py | 39 +++++++++++------------ hbt/config/variables.py | 4 ++- hbt/production/btag.py | 43 ++++++++++---------------- modules/cmsdb | 2 +- modules/columnflow | 2 +- 8 files changed, 95 insertions(+), 80 deletions(-) diff --git a/hbt/calibration/default.py b/hbt/calibration/default.py index c5c9c0bc..d4eb8ab7 100644 --- a/hbt/calibration/default.py +++ b/hbt/calibration/default.py @@ -71,30 +71,45 @@ def default_init(self: Calibrator) -> None: met_name = self.config_inst.x.met_name raw_met_name = self.config_inst.x.raw_met_name - # derive calibrators to add settings - self.jec_full_cls = jec.derive("jec_full", cls_dict={ - "mc_only": True, - "nominal_only": True, - "met_name": met_name, - "raw_met_name": raw_met_name, - }) - self.jec_nominal_cls = jec_nominal.derive("jec_nominal", cls_dict={ - "met_name": met_name, - "raw_met_name": raw_met_name, - }) - - # version of jer that uses the first random number from deterministic_seeds - self.deterministic_jer_cls = jer.derive("deterministic_jer", cls_dict={ - "deterministic_seed_index": 0, - "met_name": met_name, - }) - - # derive tec calibrators - self.tec_cls = tec.derive("tec", cls_dict={"met_name": met_name}) - self.tec_nominal_cls = tec_nominal.derive("tec_nominal", cls_dict={"met_name": met_name}) - - # derive met_phi calibrator (currently only for run 2) - self.met_phi_cls = met_phi.derive("met_phi", cls_dict={"met_name": met_name}) + # derive calibrators to add settings once + flag = f"custom_calibs_registered_{self.cls_name}" + if not self.config_inst.x(flag, False): + # jec calibrators + self.config_inst.x.calib_jec_full_cls = jec.derive("jec_full", cls_dict={ + "mc_only": True, + "nominal_only": True, + "met_name": met_name, + "raw_met_name": raw_met_name, + }) + self.config_inst.x.calib_jec_nominal_cls = jec_nominal.derive("jec_nominal", cls_dict={ + "met_name": met_name, + "raw_met_name": raw_met_name, + }) + # version of jer that uses the first random number from deterministic_seeds + self.config_inst.x.calib_deterministic_jer_cls = jer.derive("deterministic_jer", cls_dict={ + "deterministic_seed_index": 0, + "met_name": met_name, + }) + # derive tec calibrators + self.config_inst.x.calib_jec_cls = tec.derive("tec", cls_dict={ + "met_name": met_name, + }) + self.config_inst.x.calib_jec_cls = tec_nominal.derive("tec_nominal", cls_dict={ + "met_name": met_name, + }) + # derive met_phi calibrator (currently only used in run 2) + self.config_inst.x.calib_met_phi_cls = met_phi.derive("met_phi", cls_dict={ + "met_name": met_name, + }) + # change the flag + self.config_inst.set_aux(flag, True) + + self.jec_full_cls = self.config_inst.x.calib_jec_full_cls + self.jec_nominal_cls = self.config_inst.x.calib_jec_nominal_cls + self.deterministic_jer_cls = self.config_inst.x.calib_deterministic_jer_cls + self.tec_cls = self.config_inst.x.calib_jec_cls + self.tec_nominal_cls = self.config_inst.x.calib_jec_cls + self.met_phi_cls = self.config_inst.x.calib_met_phi_cls # collect derived calibrators and add them to the calibrator uses and produces derived_calibrators = { diff --git a/hbt/config/categories.py b/hbt/config/categories.py index 0d8aa4ad..efd819da 100644 --- a/hbt/config/categories.py +++ b/hbt/config/categories.py @@ -58,11 +58,11 @@ def kwargs_fn(categories, add_qcd_group=True): # auxiliary information "aux": aux, # label - "label": [ + "label": ", ".join([ cat.label or cat.name for cat in categories.values() if cat.name != "os" # os is the default - ] or None, + ]) or None, } # main analysis categories diff --git a/hbt/config/configs_hbt.py b/hbt/config/configs_hbt.py index 4132e8ab..37477e49 100644 --- a/hbt/config/configs_hbt.py +++ b/hbt/config/configs_hbt.py @@ -179,7 +179,7 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str] cfg.add_process(proc) # configure colors, labels, etc - from hbt.config.styles import stylize_processes, update_legend_labels + from hbt.config.styles import stylize_processes stylize_processes(cfg) ################################################################################################ @@ -441,7 +441,8 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str] "qcd", "st", "tt_multiboson", - "all_v", + "v", + "multiboson", "h", "ewk", ]), @@ -531,14 +532,19 @@ def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str] cfg.x.default_selector_steps = "default" # plotting overwrites + from hbt.config.styles import legend_entries_per_column + cfg.x.default_general_settings = { + "cms_label": "wip", + "whitespace_fraction": 0.31, + } cfg.x.custom_style_config_groups = { "small_legend": { "legend_cfg": { - "ncols": 2, "fontsize": 16, "columnspacing": 0.6, "labelspacing": 0.275, - "update_handles_labels": update_legend_labels, + "ncols": 3, "borderpad": 0.7, "loc": "upper left", "fontsize": 16, + "columnspacing": 1.6, "labelspacing": 0.28, + "entries_per_column": legend_entries_per_column, }, - "annotate_cfg": {"fontsize": 18}, - "cms_label": "wip", + "annotate_cfg": {"fontsize": 16, "xycoords": "axes fraction", "xy": (0.035, 0.73), "style": "italic"}, }, } cfg.x.default_custom_style_config = "small_legend" diff --git a/hbt/config/styles.py b/hbt/config/styles.py index c1edfa7a..6b79221a 100644 --- a/hbt/config/styles.py +++ b/hbt/config/styles.py @@ -62,13 +62,13 @@ def stylize_processes(config: od.Config) -> None: p.color1 = cfg.x.colors.bright_blue if (p := config.get_process("vv", default=None)): - p.color1 = cfg.x.colors.aubergine + p.color1 = cfg.x.colors.yellow if (p := config.get_process("vvv", default=None)): - p.color1 = cfg.x.colors.aubergine + p.color1 = cfg.x.colors.yellow if (p := config.get_process("multiboson", default=None)): - p.color1 = cfg.x.colors.aubergine + p.color1 = cfg.x.colors.yellow if (p := config.get_process("w", default=None)): p.color1 = cfg.x.colors.aubergine @@ -102,23 +102,24 @@ def stylize_processes(config: od.Config) -> None: p.color1 = cfg.x.colors.red -def update_legend_labels(ax, handles, labels, n_cols): +def legend_entries_per_column(ax, handles: list, labels: list, n_cols: int) -> list[int]: """ - Fill empty handles so that all backgrounds are in the first column. + Control number of entries such that backgrounds are in the first n - 1 columns, and everything + else in the last one. """ - # only implemented for 2 columns so far - if n_cols != 2: - return - # get number of entries per column + # get number of background and remaining entries n_backgrounds = sum(1 for handle in handles if handle.__class__.__name__ == "StepPatch") n_other = len(handles) - n_backgrounds - if n_backgrounds == n_other: - return - - # fill left or right column - n_add = abs(n_backgrounds - n_other) - pos = len(handles) if n_backgrounds > n_other else n_backgrounds - empty_handle = ax.plot([], label="", linestyle="None")[0] - for _ in range(n_add): - handles.insert(pos, empty_handle) - labels.insert(pos, "") + + # fill number of entries per column + entries_per_col = n_cols * [0] + n_bkg_cols = n_cols + # set last column if non-backgrounds are present + if n_other: + entries_per_col[-1] = n_other + n_bkg_cols -= 1 + # fill background columns + for i in range(n_bkg_cols): + entries_per_col[i] = n_backgrounds // n_bkg_cols + (n_backgrounds % n_bkg_cols > i) + + return entries_per_col diff --git a/hbt/config/variables.py b/hbt/config/variables.py index 681dced4..2f746224 100644 --- a/hbt/config/variables.py +++ b/hbt/config/variables.py @@ -298,7 +298,9 @@ def dilep_mass_test(events): config, name="dilep_mass", expression=dilep_mass_test, - aux={"inputs": ["{Electron,Muon,Tau}.{pt,eta,phi,mass}"]}, + aux={ + "inputs": ["{Electron,Muon,Tau}.{pt,eta,phi,mass}"], + }, binning=(40, 40, 120), unit="GeV", x_title=r"$m_{ll}$", diff --git a/hbt/production/btag.py b/hbt/production/btag.py index 168ddaa6..14edd9b8 100644 --- a/hbt/production/btag.py +++ b/hbt/production/btag.py @@ -47,29 +47,23 @@ def _normalized_btag_weights(self: Producer, events: ak.Array, **kwargs) -> ak.A if not weight_name.startswith(self.weight_name): continue - # BUG in prod3: some stats fields were missing so skip them for now - # # create a weight vectors starting with ones for both weight variations, i.e., - # # nomalization per pid and normalization per pid and jet multiplicity - # norm_weight_per_pid = np.ones(len(events), dtype=np.float32) - # norm_weight_per_pid_njet = np.ones(len(events), dtype=np.float32) - - # # fill weights with a new mask per unique process id (mostly just one) - # for pid in self.unique_process_ids: - # pid_mask = events.process_id == pid - # # single value - # norm_weight_per_pid[pid_mask] = self.ratio_per_pid[weight_name][pid] - # # lookup table - # n_jets = ak.to_numpy(ak.num(events[pid_mask].Jet.pt, axis=1)) - # norm_weight_per_pid_njet[pid_mask] = self.ratio_per_pid_njet[weight_name][pid][n_jets] - - # # multiply with actual weight - # norm_weight_per_pid = norm_weight_per_pid * events[weight_name] - # norm_weight_per_pid_njet = norm_weight_per_pid_njet * events[weight_name] - - # fake values - from columnflow.columnar_util import full_like - norm_weight_per_pid = full_like(events.event, 1.0, dtype=np.float32) - norm_weight_per_pid_njet = norm_weight_per_pid + # create a weight vectors starting with ones for both weight variations, i.e., + # nomalization per pid and normalization per pid and jet multiplicity + norm_weight_per_pid = np.ones(len(events), dtype=np.float32) + norm_weight_per_pid_njet = np.ones(len(events), dtype=np.float32) + + # fill weights with a new mask per unique process id (mostly just one) + for pid in self.unique_process_ids: + pid_mask = events.process_id == pid + # single value + norm_weight_per_pid[pid_mask] = self.ratio_per_pid[weight_name][pid] + # lookup table + n_jets = ak.to_numpy(ak.num(events[pid_mask].Jet.pt, axis=1)) + norm_weight_per_pid_njet[pid_mask] = self.ratio_per_pid_njet[weight_name][pid][n_jets] + + # multiply with actual weight + norm_weight_per_pid = norm_weight_per_pid * events[weight_name] + norm_weight_per_pid_njet = norm_weight_per_pid_njet * events[weight_name] # store them events = set_ak_column_f32(events, f"normalized_{weight_name}", norm_weight_per_pid) @@ -107,9 +101,6 @@ def _normalized_btag_weights_requires(self: Producer, reqs: dict) -> None: @_normalized_btag_weights.setup def _normalized_btag_weights_setup(self: Producer, reqs: dict, inputs: dict, reader_targets: InsertableDict) -> None: - # BUG in prod3: some stats fields were missing so skip them for now - return - # load the selection stats selection_stats = self.task.cached_value( key="selection_stats", diff --git a/modules/cmsdb b/modules/cmsdb index fcdb5d9c..b443d41d 160000 --- a/modules/cmsdb +++ b/modules/cmsdb @@ -1 +1 @@ -Subproject commit fcdb5d9ca251b178a29ebdecfb1eb66802e9b1ec +Subproject commit b443d41df03e43c6a918f34c0600e617ea2d79d4 diff --git a/modules/columnflow b/modules/columnflow index de74536b..47bfb3d6 160000 --- a/modules/columnflow +++ b/modules/columnflow @@ -1 +1 @@ -Subproject commit de74536b1038ce9ae445d108324a709848bbcf23 +Subproject commit 47bfb3d66a7114ec4c4b2130aee964cffa62096c