Skip to content

Commit

Permalink
implement categorization based on gen particles
Browse files Browse the repository at this point in the history
  • Loading branch information
mafrahm committed Jan 8, 2024
1 parent 7345eaa commit 610685b
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 2 deletions.
46 changes: 46 additions & 0 deletions hbw/config/categories.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,21 @@

"""
Definition of categories.
Categorizer modules (used to determine category masks) are defined in hbw.selection.categories
Ids for combinations of categories are built as the sum of category ids.
To avoid reusing category ids, each category block (e.g. leptons, jets, ...) uses ids of a different
power of 10.
power of 10 | category block
1: free (only used for inclusive category)
2: jet (resolved vs boosted)
3: bjet (1 vs geq 2)
4: lepton
5: dnn
6: gen leptons
"""

from collections import OrderedDict
Expand All @@ -17,12 +32,43 @@
logger = law.logger.get_logger(__name__)


@call_once_on_config()
def add_gen_categories(config: od.Config) -> None:
gen_1lep = config.add_category(
name="gen_1lep",
id=100000,
selection="catid_selection_incl", # this should not be called!
label="1 gen lepton",
)
gen_1lep.add_category(
name="gen_1e",
id=200000,
selection="catid_gen_1e",
label="1 gen electron",
)
gen_1lep.add_category(
name="gen_1mu",
id=300000,
selection="catid_gen_1mu",
label="1 gen muon",
)
gen_1lep.add_category(
name="gen_1tau",
id=400000,
selection="catid_gen_1tau",
label="1 gen tau",
)


@call_once_on_config()
def add_categories_selection(config: od.Config) -> None:
"""
Adds categories to a *config*, that are typically produced in `SelectEvents`.
"""

# adds categories based on the existence of gen particles
# add_gen_categories(config)

config.x.lepton_channels = {
"sl": ("1e", "1mu"),
"dl": ("2e", "2mu", "emu"),
Expand Down
47 changes: 47 additions & 0 deletions hbw/selection/categories.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@
Selection methods defining categories based on selection step results.
"""

from __future__ import annotations

from columnflow.util import maybe_import
from columnflow.categorization import Categorizer, categorizer
from columnflow.selection import SelectionResult
from columnflow.columnar_util import has_ak_column, optional_column

np = maybe_import("numpy")
ak = maybe_import("awkward")
Expand All @@ -17,6 +20,50 @@ def catid_selection_incl(self: Categorizer, events: ak.Array, **kwargs) -> tuple
mask = ak.ones_like(events.event) > 0
return events, mask

#
# Categorizers based on gen info
#


@categorizer(
uses=optional_column("HardGenPart.pdgId", "GenPart.pdgId"),
n_particles={}, # dict with pdgId + number of required hard particles from this pdgId
consider_charge=False,
call_force=True,
)
def catid_n_gen_particles(
self: Categorizer, events: ak.Array, results: SelectionResult | None = None, **kwargs,
) -> tuple[ak.Array, ak.Array]:
""" Categorizer to select events with a certain number of hard gen particles """
# start with true mask
mask = np.ones(len(events), dtype=bool)
if self.dataset_inst.is_data:
# for data, always return true mask
return events, mask

if has_ak_column(events, "HardGenPart.pdgId"):
gp_id = events.HardGenPart.pdgId
else:
# try to get gp_id column via SelectionResult
gp_id = events.GenPart.pdgId[results.GenPart.HardGenpart]

for pdgId, num_particles in self.n_particles:
mask = mask & ak.sum(gp_id == num_particles, axis=1)

return events, mask


catid_gen_1e = catid_n_gen_particles.derive("catid_gen_1e", cls_dict={"n_particles": {11: 1, 13: 0, 15: 0}})
catid_gen_1mu = catid_n_gen_particles.derive("catid_gen_1mu", cls_dict={"n_particles": {11: 0, 13: 1, 15: 0}})
catid_gen_1tau = catid_n_gen_particles.derive("catid_gen_1tau", cls_dict={"n_particles": {11: 0, 13: 0, 15: 1}})
# catid_gen_2e = catid_n_gen_particles.derive("catid_gen_2e", cls_dict={"n_particles": {11: 2, 13: 0, 15: 0}})
# catid_gen_2mu = catid_n_gen_particles.derive("catid_gen_2mu", cls_dict={"n_particles": {11: 0, 13: 2, 15: 0}})
# catid_gen_2tau = catid_n_gen_particles.derive("catid_gen_2tau", cls_dict={"n_particles": {11: 0, 13: 0, 15: 2}})
# catid_gen_emu = catid_n_gen_particles.derive("catid_gen_emu", cls_dict={"n_particles": {11: 1, 13: 1, 15: 0}})
# catid_gen_etau = catid_n_gen_particles.derive("catid_gen_etau", cls_dict={"n_particles": {11: 1, 13: 0, 15: 1}})
# catid_gen_mutau = catid_n_gen_particles.derive("catid_gen_mutau", cls_dict={"n_particles": {11: 0, 13: 1, 15: 1}})


#
# Categorizer called as part of cf.SelectEvents
#
Expand Down
8 changes: 6 additions & 2 deletions hbw/selection/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from columnflow.production.categories import category_ids
from columnflow.production.processes import process_ids

from hbw.selection.gen import hard_gen_particles
from hbw.production.weights import event_weights_to_normalize, large_weights_killer
from hbw.selection.stats import hbw_increment_stats
from hbw.selection.cutflow_features import cutflow_features
Expand Down Expand Up @@ -328,6 +329,9 @@ def post_selection(
) -> Tuple[ak.Array, SelectionResult]:
""" Methods that are called for both SL and DL after calling the selection modules """

if self.dataset_inst.is_mc:
events = self[hard_gen_particles](events, **kwargs)

# build categories
events = self[category_ids](events, results=results, **kwargs)

Expand Down Expand Up @@ -371,5 +375,5 @@ def post_selection_init(self: Selector) -> None:
if not getattr(self, "dataset_inst", None) or self.dataset_inst.is_data:
return

self.uses.add(event_weights_to_normalize)
self.produces.add(event_weights_to_normalize)
self.uses.update({event_weights_to_normalize, hard_gen_particles})
self.produces.update({event_weights_to_normalize, hard_gen_particles})
54 changes: 54 additions & 0 deletions hbw/selection/gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# coding: utf-8

"""
Selectors related to gen-level particles.
"""

import law

from columnflow.selection import Selector, SelectionResult, selector
from columnflow.util import maybe_import

np = maybe_import("numpy")
ak = maybe_import("awkward")

logger = law.logger.get_logger(__name__)


pdgId_map = {
1: "down",
2: "up",
3: "strange",
4: "charm",
5: "bottom",
6: "top",
11: "electron",
12: "e_neutrino",
13: "muon",
14: "mu_neutrino",
15: "tau",
16: "tau_neutrino",
21: "gluon",
22: "photon",
23: "Z",
24: "W",
25: "Higgs",
}


@selector(
uses={"GenPart.statusFlags"},
mc_only=True,
)
def hard_gen_particles(
self: Selector,
events: ak.Array,
results,
**kwargs,
) -> tuple[ak.Array, SelectionResult]:

gp_mask = events.GenPart.hasFlags("isHardProcess")

return events, SelectionResult(
objects={"GenPart": {"HardGenPart": gp_mask}},
)

0 comments on commit 610685b

Please sign in to comment.