From ab48ba80b1e529eae0249975ace3ba00877a32e6 Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Wed, 29 Nov 2023 13:07:23 +0100 Subject: [PATCH] add noise filters, good PV requirement, and loose puId to SL selection --- hbw/config/config_run2.py | 4 ++- hbw/selection/common.py | 72 +++++++++++++++++++++++++++++++++++++++ hbw/selection/sl.py | 15 ++++++-- 3 files changed, 87 insertions(+), 4 deletions(-) diff --git a/hbw/config/config_run2.py b/hbw/config/config_run2.py index 7db1b252..c962803e 100644 --- a/hbw/config/config_run2.py +++ b/hbw/config/config_run2.py @@ -49,6 +49,8 @@ def add_config( # create a config by passing the campaign, so id and name will be identical cfg = analysis.add_config(campaign, name=config_name, id=config_id, tags=analysis.tags) + cfg.add_tag("is_run2") + if cfg.has_tag("is_sl"): cfg.x.lepton_tag = "sl" elif cfg.has_tag("is_dl"): @@ -423,7 +425,7 @@ def make_jme_filename(jme_aux, sample_type, name, era=None): "btag_weight*", } | four_vec( # Jets {"Jet", "Bjet", "VBFJet"}, - {"btagDeepFlavB", "hadronFlavour"}, + {"btagDeepFlavB", "hadronFlavour", "qgl"}, ) | four_vec( # FatJets {"FatJet", "HbbJet"}, { diff --git a/hbw/selection/common.py b/hbw/selection/common.py index 50821580..80945905 100644 --- a/hbw/selection/common.py +++ b/hbw/selection/common.py @@ -234,8 +234,72 @@ def sl_boosted_jet_selection( ) +@selector( + exposed=False, +) +def noise_filter( + self: Selector, + events: ak.Array, + results: SelectionResult, + **kwargs, +) -> Tuple[ak.Array, SelectionResult]: + mask = ak.Array(np.ones(len(events), dtype=bool)) + for flag in self.noise_filter: + mask = mask & events.Flag[flag] + + results.steps["noise_filter"] = mask + return events, results + + +@noise_filter.init +def noise_filter_init(self: Selector): + if not getattr(self, "dataset_inst", None): + return + + # TODO: make campaign dependent + self.noise_filter = { + "goodVertices", + "globalSuperTightHalo2016Filter", + "HBHENoiseFilter", + "HBHENoiseIsoFilter", + "EcalDeadCellTriggerPrimitiveFilter", + "BadPFMuonFilter", + "BadPFMuonDzFilter", + # "hfNoisyHitsFilter", # optional for UL + "eeBadScFilter", # might be data only + # "ecalBadCalibReducedMINIAODFilter", # 2017 and 2018 only, only in MiniAOD + } + + if self.dataset_inst.has_tag("is_hbw") and self.config_inst.has_tag("is_run2"): + # missing in MiniAOD HH samples + self.event_flats.remove("BadPFMuonDzFilter") + + if self.config_inst.has_tag("is_run3"): + self.noise_filter.add("ecalBadCalibFilter") + # if self.dataset_inst.is_data: + # self.noise_filter.add("eeBadScFilter") + + self.uses = {f"Flag.{flag}" for flag in self.noise_filter} + + +@selector( + uses={"PV.npvsGood"}, + exposed=False, +) +def primary_vertex( + self: Selector, + events: ak.Array, + results: SelectionResult, + **kwargs, +) -> Tuple[ak.Array, SelectionResult]: + """ requires at least one good primary vertex """ + results.steps["good_vertex"] = events.PV.npvsGood >= 1 + return events, results + + @selector( uses={ + noise_filter, primary_vertex, process_ids, attach_coffea_behavior, mc_weight, large_weights_killer, }, @@ -271,6 +335,10 @@ def pre_selection( # prepare the selection results that are updated at every step results = SelectionResult() + # apply some general quality criteria on events + events, results = self[noise_filter](events, results, **kwargs) + events, results = self[primary_vertex](events, results, **kwargs) + return events, results @@ -317,6 +385,10 @@ def log_fraction(stats_key: str, msg: str | None = None): if self.config_inst.x("do_cutflow_features", False): events = self[cutflow_features](events, results=results, **kwargs) + # temporary fix for optional types from Calibration (e.g. events.Jet.pt --> ?float32) + # TODO: remove as soon as possible as it might lead to weird bugs when there are none entries in inputs + events = ak.fill_none(events, EMPTY_FLOAT) + return events, results diff --git a/hbw/selection/sl.py b/hbw/selection/sl.py index 105379b4..7d18a59f 100644 --- a/hbw/selection/sl.py +++ b/hbw/selection/sl.py @@ -14,6 +14,7 @@ from columnflow.production.categories import category_ids from columnflow.production.processes import process_ids +from hbw.util import four_vec from hbw.selection.common import ( masked_sorted_indices, sl_boosted_jet_selection, vbf_jet_selection, pre_selection, post_selection, @@ -27,7 +28,9 @@ @selector( - uses={"Jet.pt", "Jet.eta", "Jet.phi", "Jet.mass", "Jet.btagDeepFlavB", "Jet.jetId"}, + uses=four_vec("Jet", { + "btagDeepFlavB", "jetId", "puId", + }), produces={"cutflow.n_jet", "cutflow.n_deepjet_med"}, exposed=True, ) @@ -38,19 +41,23 @@ def sl_jet_selection( stats: defaultdict, **kwargs, ) -> Tuple[ak.Array, SelectionResult]: + # NanoAOD documentation: https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookNanoAOD#Jets # HH -> bbWW(qqlnu) jet selection # - require at least 3 jets with pt>30, eta<2.4 # - require at least 1 jet with pt>30, eta<2.4, b-score>0.3040 (Medium WP) # assign local index to all Jets events = set_ak_column(events, "local_index", ak.local_index(events.Jet)) - # jets jet_mask_loose = (events.Jet.pt > 5) & abs(events.Jet.eta < 2.4) jet_mask = ( (events.Jet.pt > 25) & (abs(events.Jet.eta) < 2.4) & (events.Jet.jetId == 6) & ak.all(events.Jet.metric_table(lepton_results.x.lepton) > 0.4, axis=2) ) + # apply loose Jet puId to jets with pt below 50 GeV + jet_pu_mask = (events.Jet.puId >= 4) | (events.Jet.pt > 50) + jet_mask = jet_mask & jet_pu_mask + events = set_ak_column(events, "cutflow.n_jet", ak.sum(jet_mask, axis=1)) jet_sel = events.cutflow.n_jet >= 3 jet_indices = masked_sorted_indices(jet_mask, events.Jet.pt) @@ -269,7 +276,8 @@ def sl( # combined event selection after all steps except b-jet selection results.steps["all_but_bjet"] = ( - # NOTE: the boosted selection actually includes a b-jet selection... + results.steps.noise_filter & + results.steps.good_vertex & (results.steps.Jet | results.steps.HbbJet_no_bjet) & results.steps.Lepton & results.steps.VetoLepton & @@ -285,6 +293,7 @@ def sl( results.steps.all_but_bjet & ((results.steps.Jet & results.steps.Bjet) | results.steps.HbbJet) ) + results.steps["all"] = results.event # build categories events, results = self[post_selection](events, results, stats, **kwargs)