From abb1dfcec99a30ebbf7fb89bd4681294cf7d0a8f Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Fri, 1 Dec 2023 13:36:59 +0100 Subject: [PATCH] add json_filter and use cf met_filter function --- hbw/config/config_run2.py | 13 +++++++ hbw/config/datasets.py | 3 ++ hbw/config/styling.py | 4 +-- hbw/selection/common.py | 75 ++++++++++++++++----------------------- hbw/selection/sl.py | 3 +- 5 files changed, 49 insertions(+), 49 deletions(-) diff --git a/hbw/config/config_run2.py b/hbw/config/config_run2.py index c962803e..0b7b048f 100644 --- a/hbw/config/config_run2.py +++ b/hbw/config/config_run2.py @@ -384,6 +384,19 @@ def make_jme_filename(jme_aux, sample_type, name, era=None): if year != 2017: raise NotImplementedError("TODO: generalize external files to different years than 2017") + cfg.x.met_filters = { + "Flag.goodVertices", + "Flag.globalSuperTightHalo2016Filter", + "Flag.HBHENoiseFilter", + "Flag.HBHENoiseIsoFilter", + "Flag.EcalDeadCellTriggerPrimitiveFilter", + "Flag.BadPFMuonFilter", + "Flag.BadPFMuonDzFilter", # this filter does not work with our EOY Signal samples + "Flag.eeBadScFilter", + } + if cfg.has_tag("is_run3"): + cfg.x.noise_filter.add("ecalBadCalibFilter") + cfg.x.external_files.update(DotDict.wrap({ # files from TODO "lumi": { diff --git a/hbw/config/datasets.py b/hbw/config/datasets.py index a8cd2266..22085889 100644 --- a/hbw/config/datasets.py +++ b/hbw/config/datasets.py @@ -182,6 +182,9 @@ def configure_hbw_datasets(config: od.Config, limit_dataset_files: int | None = dataset.x.skip_pdf = True dataset.add_tag("skip_scale") dataset.add_tag("skip_pdf") + else: + # our default Run2 signal samples are EOY, so we have to skip golden json, certain met filter + dataset.add_tag("is_eoy") def get_custom_hh_datasets( diff --git a/hbw/config/styling.py b/hbw/config/styling.py index 851981fe..fc2cf1a6 100644 --- a/hbw/config/styling.py +++ b/hbw/config/styling.py @@ -113,8 +113,8 @@ def stylize_processes(config: od.Config) -> None: "puId": (8, -0.5, 7.5), "puIdDisc": (40, -2, 1), "chHEF": (40, 0, 1), - "bRegRes": (40, -10, 10), - "bRegCorr": (40, -10, 10), + "bRegRes": (80, -1, 1), + "bRegCorr": (80, 0, 2), # FatJet "msoftdrop": (40, 0, 400), "deepTagMD_HbbvsQCD": (40, 0, 1), diff --git a/hbw/selection/common.py b/hbw/selection/common.py index 99b174a8..3d946146 100644 --- a/hbw/selection/common.py +++ b/hbw/selection/common.py @@ -16,6 +16,8 @@ from columnflow.production.util import attach_coffea_behavior from columnflow.selection import Selector, SelectionResult, selector +from columnflow.selection.cms.met_filters import met_filters +from columnflow.selection.cms.json_filter import json_filter from columnflow.production.cms.mc_weight import mc_weight from columnflow.production.categories import category_ids from columnflow.production.processes import process_ids @@ -234,52 +236,21 @@ def sl_boosted_jet_selection( ) -@selector( - exposed=False, -) -def noise_filter( - self: Selector, - events: ak.Array, - results: SelectionResult, - **kwargs, -) -> Tuple[ak.Array, SelectionResult]: - mask = ak.Array(np.ones(len(events), dtype=bool)) - for flag in self.noise_filter: - mask = mask & events.Flag[flag] +def get_met_filters(self: Selector): + """ custom function to skip met filter for our Run2 EOY signal samples """ + met_filters = self.config_inst.x.met_filters - results.steps["noise_filter"] = mask - return events, results - - -@noise_filter.init -def noise_filter_init(self: Selector): - if not getattr(self, "dataset_inst", None): - return + if getattr(self, "dataset_inst", None) and self.dataset_inst.has_tag("is_eoy"): + # remove filter for EOY sample + try: + met_filters.remove("Flag.BadPFMuonDzFilter") + except (KeyError, AttributeError): + pass - # TODO: make campaign dependent - self.noise_filter = { - "goodVertices", - "globalSuperTightHalo2016Filter", - "HBHENoiseFilter", - "HBHENoiseIsoFilter", - "EcalDeadCellTriggerPrimitiveFilter", - "BadPFMuonFilter", - "BadPFMuonDzFilter", - # "hfNoisyHitsFilter", # optional for UL - "eeBadScFilter", # might be data only - # "ecalBadCalibReducedMINIAODFilter", # 2017 and 2018 only, only in MiniAOD - } + return met_filters - if self.dataset_inst.has_tag("is_hbw") and self.config_inst.has_tag("is_run2"): - # missing in MiniAOD HH samples - self.noise_filter.remove("BadPFMuonDzFilter") - if self.config_inst.has_tag("is_run3"): - self.noise_filter.add("ecalBadCalibFilter") - # if self.dataset_inst.is_data: - # self.noise_filter.add("eeBadScFilter") - - self.uses = {f"Flag.{flag}" for flag in self.noise_filter} +hbw_met_filters = met_filters.derive("hbw_met_filters", cls_dict=dict(get_met_filters=get_met_filters)) @selector( @@ -299,11 +270,12 @@ def primary_vertex( @selector( uses={ - noise_filter, primary_vertex, + hbw_met_filters, json_filter, "PV.npvsGood", process_ids, attach_coffea_behavior, mc_weight, large_weights_killer, }, produces={ + hbw_met_filters, json_filter, process_ids, attach_coffea_behavior, mc_weight, large_weights_killer, }, @@ -336,8 +308,19 @@ def pre_selection( results = SelectionResult() # apply some general quality criteria on events - events, results = self[noise_filter](events, results, **kwargs) - events, results = self[primary_vertex](events, results, **kwargs) + results.steps["good_vertex"] = events.PV.npvsGood >= 1 + events, met_results = self[hbw_met_filters](events, **kwargs) # produces "met_filter" step + results += met_results + if self.dataset_inst.is_data: + events, json_results = self[json_filter](events, **kwargs) # produces "json" step + results += json_results + else: + results.steps["json"] = ak.Array(np.ones(len(events), dtype=bool)) + + # combine quality criteria into a single step + results.steps["cleanup"] = ( + results.steps.good_vertex & results.steps.met_filter & results.steps.json + ) return events, results @@ -389,6 +372,8 @@ def log_fraction(stats_key: str, msg: str | None = None): # TODO: remove as soon as possible as it might lead to weird bugs when there are none entries in inputs events = ak.fill_none(events, EMPTY_FLOAT) + logger.info(f"Selected {ak.sum(results.event)} from {len(events)} events") + return events, results diff --git a/hbw/selection/sl.py b/hbw/selection/sl.py index 7d18a59f..2a1bb9f4 100644 --- a/hbw/selection/sl.py +++ b/hbw/selection/sl.py @@ -276,8 +276,7 @@ def sl( # combined event selection after all steps except b-jet selection results.steps["all_but_bjet"] = ( - results.steps.noise_filter & - results.steps.good_vertex & + results.steps.cleanup & (results.steps.Jet | results.steps.HbbJet_no_bjet) & results.steps.Lepton & results.steps.VetoLepton &