diff --git a/hbw/config/categories.py b/hbw/config/categories.py index 4edaaa63..e2067d56 100644 --- a/hbw/config/categories.py +++ b/hbw/config/categories.py @@ -2,6 +2,21 @@ """ Definition of categories. + +Categorizer modules (used to determine category masks) are defined in hbw.selection.categories + +Ids for combinations of categories are built as the sum of category ids. +To avoid reusing category ids, each category block (e.g. leptons, jets, ...) uses ids of a different +power of 10. + +power of 10 | category block + +1: free (only used for inclusive category) +2: jet (resolved vs boosted) +3: bjet (1 vs geq 2) +4: lepton +5: dnn +6: gen leptons """ from collections import OrderedDict @@ -17,12 +32,43 @@ logger = law.logger.get_logger(__name__) +@call_once_on_config() +def add_gen_categories(config: od.Config) -> None: + gen_1lep = config.add_category( + name="gen_1lep", + id=100000, + selection="catid_selection_incl", # this should not be called! + label="1 gen lepton", + ) + gen_1lep.add_category( + name="gen_1e", + id=200000, + selection="catid_gen_1e", + label="1 gen electron", + ) + gen_1lep.add_category( + name="gen_1mu", + id=300000, + selection="catid_gen_1mu", + label="1 gen muon", + ) + gen_1lep.add_category( + name="gen_1tau", + id=400000, + selection="catid_gen_1tau", + label="1 gen tau", + ) + + @call_once_on_config() def add_categories_selection(config: od.Config) -> None: """ Adds categories to a *config*, that are typically produced in `SelectEvents`. """ + # adds categories based on the existence of gen particles + # add_gen_categories(config) + config.x.lepton_channels = { "sl": ("1e", "1mu"), "dl": ("2e", "2mu", "emu"), diff --git a/hbw/selection/categories.py b/hbw/selection/categories.py index 7aa2da2f..27af94d0 100644 --- a/hbw/selection/categories.py +++ b/hbw/selection/categories.py @@ -4,9 +4,12 @@ Selection methods defining categories based on selection step results. """ +from __future__ import annotations + from columnflow.util import maybe_import from columnflow.categorization import Categorizer, categorizer from columnflow.selection import SelectionResult +from columnflow.columnar_util import has_ak_column, optional_column np = maybe_import("numpy") ak = maybe_import("awkward") @@ -17,6 +20,50 @@ def catid_selection_incl(self: Categorizer, events: ak.Array, **kwargs) -> tuple mask = ak.ones_like(events.event) > 0 return events, mask +# +# Categorizers based on gen info +# + + +@categorizer( + uses=optional_column("HardGenPart.pdgId", "GenPart.pdgId"), + n_particles={}, # dict with pdgId + number of required hard particles from this pdgId + consider_charge=False, + call_force=True, +) +def catid_n_gen_particles( + self: Categorizer, events: ak.Array, results: SelectionResult | None = None, **kwargs, +) -> tuple[ak.Array, ak.Array]: + """ Categorizer to select events with a certain number of hard gen particles """ + # start with true mask + mask = np.ones(len(events), dtype=bool) + if self.dataset_inst.is_data: + # for data, always return true mask + return events, mask + + if has_ak_column(events, "HardGenPart.pdgId"): + gp_id = events.HardGenPart.pdgId + else: + # try to get gp_id column via SelectionResult + gp_id = events.GenPart.pdgId[results.GenPart.HardGenpart] + + for pdgId, num_particles in self.n_particles: + mask = mask & ak.sum(gp_id == num_particles, axis=1) + + return events, mask + + +catid_gen_1e = catid_n_gen_particles.derive("catid_gen_1e", cls_dict={"n_particles": {11: 1, 13: 0, 15: 0}}) +catid_gen_1mu = catid_n_gen_particles.derive("catid_gen_1mu", cls_dict={"n_particles": {11: 0, 13: 1, 15: 0}}) +catid_gen_1tau = catid_n_gen_particles.derive("catid_gen_1tau", cls_dict={"n_particles": {11: 0, 13: 0, 15: 1}}) +# catid_gen_2e = catid_n_gen_particles.derive("catid_gen_2e", cls_dict={"n_particles": {11: 2, 13: 0, 15: 0}}) +# catid_gen_2mu = catid_n_gen_particles.derive("catid_gen_2mu", cls_dict={"n_particles": {11: 0, 13: 2, 15: 0}}) +# catid_gen_2tau = catid_n_gen_particles.derive("catid_gen_2tau", cls_dict={"n_particles": {11: 0, 13: 0, 15: 2}}) +# catid_gen_emu = catid_n_gen_particles.derive("catid_gen_emu", cls_dict={"n_particles": {11: 1, 13: 1, 15: 0}}) +# catid_gen_etau = catid_n_gen_particles.derive("catid_gen_etau", cls_dict={"n_particles": {11: 1, 13: 0, 15: 1}}) +# catid_gen_mutau = catid_n_gen_particles.derive("catid_gen_mutau", cls_dict={"n_particles": {11: 0, 13: 1, 15: 1}}) + + # # Categorizer called as part of cf.SelectEvents # diff --git a/hbw/selection/common.py b/hbw/selection/common.py index 24d3bf1e..634c3b58 100644 --- a/hbw/selection/common.py +++ b/hbw/selection/common.py @@ -22,6 +22,7 @@ from columnflow.production.categories import category_ids from columnflow.production.processes import process_ids +from hbw.selection.gen import hard_gen_particles from hbw.production.weights import event_weights_to_normalize, large_weights_killer from hbw.selection.stats import hbw_increment_stats from hbw.selection.cutflow_features import cutflow_features @@ -328,6 +329,9 @@ def post_selection( ) -> Tuple[ak.Array, SelectionResult]: """ Methods that are called for both SL and DL after calling the selection modules """ + if self.dataset_inst.is_mc: + events = self[hard_gen_particles](events, **kwargs) + # build categories events = self[category_ids](events, results=results, **kwargs) @@ -371,5 +375,5 @@ def post_selection_init(self: Selector) -> None: if not getattr(self, "dataset_inst", None) or self.dataset_inst.is_data: return - self.uses.add(event_weights_to_normalize) - self.produces.add(event_weights_to_normalize) + self.uses.update({event_weights_to_normalize, hard_gen_particles}) + self.produces.update({event_weights_to_normalize, hard_gen_particles}) diff --git a/hbw/selection/gen.py b/hbw/selection/gen.py new file mode 100644 index 00000000..891c8ca4 --- /dev/null +++ b/hbw/selection/gen.py @@ -0,0 +1,54 @@ +# coding: utf-8 + +""" +Selectors related to gen-level particles. +""" + +import law + +from columnflow.selection import Selector, SelectionResult, selector +from columnflow.util import maybe_import + +np = maybe_import("numpy") +ak = maybe_import("awkward") + +logger = law.logger.get_logger(__name__) + + +pdgId_map = { + 1: "down", + 2: "up", + 3: "strange", + 4: "charm", + 5: "bottom", + 6: "top", + 11: "electron", + 12: "e_neutrino", + 13: "muon", + 14: "mu_neutrino", + 15: "tau", + 16: "tau_neutrino", + 21: "gluon", + 22: "photon", + 23: "Z", + 24: "W", + 25: "Higgs", +} + + +@selector( + uses={"GenPart.statusFlags"}, + mc_only=True, +) +def hard_gen_particles( + self: Selector, + events: ak.Array, + results, + **kwargs, +) -> tuple[ak.Array, SelectionResult]: + + gp_mask = events.GenPart.hasFlags("isHardProcess") + + return events, SelectionResult( + objects={"GenPart": {"HardGenPart": gp_mask}}, + )