Skip to content

Commit

Permalink
added new producer for constituents
Browse files Browse the repository at this point in the history
  • Loading branch information
haddadanas committed May 23, 2024
1 parent 5dd6a35 commit 4efac7c
Show file tree
Hide file tree
Showing 2 changed files with 288 additions and 0 deletions.
187 changes: 187 additions & 0 deletions hbt/production/extra_features.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
# coding: utf-8

"""
Column production methods related to higher-level features.
"""

import functools

from columnflow.production import Producer, producer
from columnflow.util import maybe_import
from columnflow.columnar_util import set_ak_column

from hbt.production.default import default

np = maybe_import("numpy")
ak = maybe_import("awkward")

# helpers
set_ak_column_f32 = functools.partial(set_ak_column, value_type=np.float32)
set_ak_column_i8 = functools.partial(set_ak_column, value_type=np.int8)


@producer(
uses={
"Tau.*", "PFCandidate.*", "PFCandidateIndices.tau",
},
produces={
f"PFCandidatesTau.{name}" for name in [
"charge_sum_pfcand", "ncand_per_part",
"pt_vsum_pfcand", "pt_hsum_pfcand", "dpt_vsum", "dpt_hsum",
"decaymode_charge", "abs_charge_sum_pfcand",
]
},
)
def taucand(self: Producer, events: ak.Array, **kwargs) -> ak.Array:

from coffea.nanoevents.methods import vector

decaymodes = {
0: 1,
1: 1,
2: 1,
5: 2,
6: 2,
7: 2,
10: 3,
11: 3,
}

tau = events.Tau
pfcand_indices = events.PFCandidateIndices.tau
pfcand = events.PFCandidate
pfcand = ak.zip(
{
"pt": pfcand.pt,
"eta": pfcand.eta,
"phi": pfcand.phi,
"mass": pfcand.mass,
"charge": pfcand.charge,
},
with_name="PtEtaPhiMLorentzVector",
behavior=vector.behavior,
)

charge_results = ak.local_index(tau)
ncand_results = ak.local_index(tau)
pt_vsum_results = ak.local_index(tau)
pt_hsum_results = ak.local_index(tau)
decaymode_charge = ak.local_index(tau)
abs_charge_sum_pfcand = ak.local_index(tau)

for i in range(ak.max(charge_results)):
decay_prod = pfcand[pfcand_indices == i]
charge_sum = ak.sum(decay_prod.charge, axis=-1)
abs_chatge_sum = ak.sum(abs(decay_prod.charge), axis=-1)
charge_results = ak.where(charge_results == i, charge_sum, charge_results)
abs_charge_sum_pfcand = ak.where(abs_charge_sum_pfcand == i, abs_chatge_sum, abs_charge_sum_pfcand)
ncand_results = ak.where(ncand_results == i, ak.num(decay_prod), ncand_results)
vec_sum = ak.zip(
{
"x": ak.sum(decay_prod.x, axis=-1),
"y": ak.sum(decay_prod.y, axis=-1),
"z": ak.sum(decay_prod.z, axis=-1),
"t": ak.sum(decay_prod.t, axis=-1),
},
with_name="LorentzVector",
behavior=vector.behavior,
)
hsum = ak.sum(decay_prod.pt, axis=-1)
pt_vsum_results = ak.where(pt_vsum_results == i, vec_sum.pt, pt_vsum_results)
pt_hsum_results = ak.where(pt_hsum_results == i, hsum, pt_hsum_results)

# Calculating the difference of pt
dpt_vsum_results = abs(tau.pt - pt_vsum_results)
dpt_hsum_results = abs(tau.pt - pt_hsum_results)

# charge decay mode difference
for mode, charge in decaymodes.items():
decaymode_charge = ak.where(tau.decayMode == mode, charge, decaymode_charge)

events = set_ak_column_i8(events, "PFCandidatesTau.charge_sum_pfcand", charge_results)
events = set_ak_column_i8(events, "PFCandidatesTau.ncand_per_part", ncand_results)
events = set_ak_column_f32(events, "PFCandidatesTau.pt_vsum_pfcand", pt_vsum_results)
events = set_ak_column_f32(events, "PFCandidatesTau.pt_hsum_pfcand", pt_hsum_results)
events = set_ak_column_f32(events, "PFCandidatesTau.dpt_vsum", dpt_vsum_results)
events = set_ak_column_f32(events, "PFCandidatesTau.dpt_hsum", dpt_hsum_results)
events = set_ak_column_i8(events, "PFCandidatesTau.decaymode_charge", decaymode_charge)
events = set_ak_column_i8(events, "PFCandidatesTau.abs_charge_sum_pfcand", abs_charge_sum_pfcand)

return events


@producer(
uses={
"Jet.*", "PFCandidate.*", "PFCandidateIndices.jet",
},
produces={
f"PFCandidatesJet.{name}" for name in [
"pt_vsum_pfcand", "pt_hsum_pfcand", "dpt_vsum", "dpt_hsum", "ncand_per_part",
]
},
)
def jetcand(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
from coffea.nanoevents.methods import vector

jet = events.Jet
pfcand_indices = events.PFCandidateIndices.jet
pfcand = events.PFCandidate
pfcand = ak.zip(
{
"pt": pfcand.pt,
"eta": pfcand.eta,
"phi": pfcand.phi,
"mass": pfcand.mass,
},
with_name="PtEtaPhiMLorentzVector",
behavior=vector.behavior,
)
pt_vsum_results = ak.local_index(jet)
pt_hsum_results = ak.local_index(jet)
ncand_results = ak.local_index(jet)

for i in range(ak.max(pt_vsum_results)):
decay_prod = pfcand[pfcand_indices == i]
vec_sum = ak.zip(
{
"x": ak.sum(decay_prod.x, axis=-1),
"y": ak.sum(decay_prod.y, axis=-1),
"z": ak.sum(decay_prod.z, axis=-1),
"t": ak.sum(decay_prod.t, axis=-1),
},
with_name="LorentzVector",
behavior=vector.behavior,
)
hsum = ak.sum(decay_prod.pt, axis=-1)
pt_vsum_results = ak.where(pt_vsum_results == i, vec_sum.pt, pt_vsum_results)
pt_hsum_results = ak.where(pt_hsum_results == i, hsum, pt_hsum_results)
ncand_results = ak.where(ncand_results == i, ak.num(decay_prod), ncand_results)

# Calculating the difference of pt
dpt_vsum_results = abs(jet.pt - pt_vsum_results)
dpt_hsum_results = abs(jet.pt - pt_hsum_results)

events = set_ak_column_f32(events, "PFCandidatesJet.pt_vsum_pfcand", pt_vsum_results)
events = set_ak_column_f32(events, "PFCandidatesJet.pt_hsum_pfcand", pt_hsum_results)
events = set_ak_column_i8(events, "PFCandidatesJet.ncand_per_part", ncand_results)
events = set_ak_column_f32(events, "PFCandidatesJet.dpt_vsum", dpt_vsum_results)
events = set_ak_column_f32(events, "PFCandidatesJet.dpt_hsum", dpt_hsum_results)

return events


@producer(
uses={
default, jetcand, taucand,
},
produces={
default, jetcand, taucand,
},
)
def merged_producer(self: Producer, events: ak.Array, **kwargs) -> ak.Array:

events = self[default](events, **kwargs)
events = self[jetcand](events, **kwargs)
events = self[taucand](events, **kwargs)

return events
101 changes: 101 additions & 0 deletions hbt/selection/default_ana.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# coding: utf-8

"""
Empty selectors + trigger selection
"""

from operator import and_
from functools import reduce
from collections import defaultdict

from columnflow.selection import Selector, SelectionResult, selector
from columnflow.selection.stats import increment_stats
from columnflow.production.processes import process_ids
from columnflow.production.categories import category_ids

from columnflow.production.cms.mc_weight import mc_weight
from columnflow.production.util import attach_coffea_behavior
from columnflow.util import maybe_import, dev_sandbox
from hbt.selection.trigger import trigger_selection
from hbt.selection.lepton import lepton_selection
from hbt.production.features import cutflow_features
from hbt.selection.jet import jet_selection

np = maybe_import("numpy")
ak = maybe_import("awkward")


@selector(
uses={
process_ids, mc_weight, increment_stats, cutflow_features, trigger_selection,
lepton_selection, attach_coffea_behavior, category_ids, jet_selection,
},
produces={
process_ids, mc_weight, cutflow_features, trigger_selection, jet_selection,
lepton_selection, category_ids,
},
sandbox=dev_sandbox("bash::$HBT_BASE/sandboxes/venv_columnar_tf.sh"),
exposed=True,
)
def default(
self: Selector,
events: ak.Array,
stats: defaultdict,
**kwargs,
) -> tuple[ak.Array, SelectionResult]:
# ensure coffea behavior
events = self[attach_coffea_behavior](events, **kwargs)

# add corrected mc weights
if self.dataset_inst.is_mc:
events = self[mc_weight](events, **kwargs)

# prepare the selection results that are updated at every step
results = SelectionResult()

# trigger selection
events, trigger_results = self[trigger_selection](events, **kwargs)
results += trigger_results

# lepton selection
events, lepton_results = self[lepton_selection](events, trigger_results, **kwargs)
results += lepton_results

# jet selection
events, jet_results = self[jet_selection](events, trigger_results, lepton_results, **kwargs)
results += jet_results

# combined event selection after all steps
event_sel = reduce(and_, results.steps.values())
results.event = event_sel

# write out process/category IDs
events = self[process_ids](events, **kwargs)
events = self[category_ids](events, **kwargs)

# increment stats
weight_map = {
"num_events": Ellipsis,
"num_events_selected": Ellipsis,
}
if self.dataset_inst.is_mc:
weight_map["sum_mc_weight"] = events.mc_weight
weight_map["sum_mc_weight_selected"] = (events.mc_weight, Ellipsis)

group_map = {
# per process
"process": {
"values": events.process_id,
"mask_fn": (lambda v: events.process_id == v),
},
}
events, results = self[increment_stats](
events,
results,
stats,
weight_map=weight_map,
group_map=group_map,
**kwargs,
)

return events, results

0 comments on commit 4efac7c

Please sign in to comment.