added new producer for constituents

uhh-cms · May 23, 2024 · 4efac7c · 4efac7c
1 parent 5dd6a35
commit 4efac7c
Show file tree

Hide file tree

Showing 2 changed files with 288 additions and 0 deletions.
diff --git a/hbt/production/extra_features.py b/hbt/production/extra_features.py
@@ -0,0 +1,187 @@
+# coding: utf-8
+
+"""
+Column production methods related to higher-level features.
+"""
+
+import functools
+
+from columnflow.production import Producer, producer
+from columnflow.util import maybe_import
+from columnflow.columnar_util import set_ak_column
+
+from hbt.production.default import default
+
+np = maybe_import("numpy")
+ak = maybe_import("awkward")
+
+# helpers
+set_ak_column_f32 = functools.partial(set_ak_column, value_type=np.float32)
+set_ak_column_i8 = functools.partial(set_ak_column, value_type=np.int8)
+
+
+@producer(
+    uses={
+        "Tau.*", "PFCandidate.*", "PFCandidateIndices.tau",
+    },
+    produces={
+        f"PFCandidatesTau.{name}" for name in [
+            "charge_sum_pfcand", "ncand_per_part",
+            "pt_vsum_pfcand", "pt_hsum_pfcand", "dpt_vsum", "dpt_hsum",
+            "decaymode_charge", "abs_charge_sum_pfcand",
+        ]
+    },
+)
+def taucand(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
+
+    from coffea.nanoevents.methods import vector
+
+    decaymodes = {
+        0: 1,
+        1: 1,
+        2: 1,
+        5: 2,
+        6: 2,
+        7: 2,
+        10: 3,
+        11: 3,
+    }
+
+    tau = events.Tau
+    pfcand_indices = events.PFCandidateIndices.tau
+    pfcand = events.PFCandidate
+    pfcand = ak.zip(
+        {
+            "pt": pfcand.pt,
+            "eta": pfcand.eta,
+            "phi": pfcand.phi,
+            "mass": pfcand.mass,
+            "charge": pfcand.charge,
+        },
+        with_name="PtEtaPhiMLorentzVector",
+        behavior=vector.behavior,
+    )
+
+    charge_results = ak.local_index(tau)
+    ncand_results = ak.local_index(tau)
+    pt_vsum_results = ak.local_index(tau)
+    pt_hsum_results = ak.local_index(tau)
+    decaymode_charge = ak.local_index(tau)
+    abs_charge_sum_pfcand = ak.local_index(tau)
+
+    for i in range(ak.max(charge_results)):
+        decay_prod = pfcand[pfcand_indices == i]
+        charge_sum = ak.sum(decay_prod.charge, axis=-1)
+        abs_chatge_sum = ak.sum(abs(decay_prod.charge), axis=-1)
+        charge_results = ak.where(charge_results == i, charge_sum, charge_results)
+        abs_charge_sum_pfcand = ak.where(abs_charge_sum_pfcand == i, abs_chatge_sum, abs_charge_sum_pfcand)
+        ncand_results = ak.where(ncand_results == i, ak.num(decay_prod), ncand_results)
+        vec_sum = ak.zip(
+            {
+                "x": ak.sum(decay_prod.x, axis=-1),
+                "y": ak.sum(decay_prod.y, axis=-1),
+                "z": ak.sum(decay_prod.z, axis=-1),
+                "t": ak.sum(decay_prod.t, axis=-1),
+            },
+            with_name="LorentzVector",
+            behavior=vector.behavior,
+        )
+        hsum = ak.sum(decay_prod.pt, axis=-1)
+        pt_vsum_results = ak.where(pt_vsum_results == i, vec_sum.pt, pt_vsum_results)
+        pt_hsum_results = ak.where(pt_hsum_results == i, hsum, pt_hsum_results)
+
+    # Calculating the difference of pt
+    dpt_vsum_results = abs(tau.pt - pt_vsum_results)
+    dpt_hsum_results = abs(tau.pt - pt_hsum_results)
+
+    # charge decay mode difference
+    for mode, charge in decaymodes.items():
+        decaymode_charge = ak.where(tau.decayMode == mode, charge, decaymode_charge)
+
+    events = set_ak_column_i8(events, "PFCandidatesTau.charge_sum_pfcand", charge_results)
+    events = set_ak_column_i8(events, "PFCandidatesTau.ncand_per_part", ncand_results)
+    events = set_ak_column_f32(events, "PFCandidatesTau.pt_vsum_pfcand", pt_vsum_results)
+    events = set_ak_column_f32(events, "PFCandidatesTau.pt_hsum_pfcand", pt_hsum_results)
+    events = set_ak_column_f32(events, "PFCandidatesTau.dpt_vsum", dpt_vsum_results)
+    events = set_ak_column_f32(events, "PFCandidatesTau.dpt_hsum", dpt_hsum_results)
+    events = set_ak_column_i8(events, "PFCandidatesTau.decaymode_charge", decaymode_charge)
+    events = set_ak_column_i8(events, "PFCandidatesTau.abs_charge_sum_pfcand", abs_charge_sum_pfcand)
+
+    return events
+
+
+@producer(
+    uses={
+        "Jet.*", "PFCandidate.*", "PFCandidateIndices.jet",
+    },
+    produces={
+        f"PFCandidatesJet.{name}" for name in [
+            "pt_vsum_pfcand", "pt_hsum_pfcand", "dpt_vsum", "dpt_hsum", "ncand_per_part",
+        ]
+    },
+)
+def jetcand(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
+    from coffea.nanoevents.methods import vector
+
+    jet = events.Jet
+    pfcand_indices = events.PFCandidateIndices.jet
+    pfcand = events.PFCandidate
+    pfcand = ak.zip(
+        {
+            "pt": pfcand.pt,
+            "eta": pfcand.eta,
+            "phi": pfcand.phi,
+            "mass": pfcand.mass,
+        },
+        with_name="PtEtaPhiMLorentzVector",
+        behavior=vector.behavior,
+    )
+    pt_vsum_results = ak.local_index(jet)
+    pt_hsum_results = ak.local_index(jet)
+    ncand_results = ak.local_index(jet)
+
+    for i in range(ak.max(pt_vsum_results)):
+        decay_prod = pfcand[pfcand_indices == i]
+        vec_sum = ak.zip(
+            {
+                "x": ak.sum(decay_prod.x, axis=-1),
+                "y": ak.sum(decay_prod.y, axis=-1),
+                "z": ak.sum(decay_prod.z, axis=-1),
+                "t": ak.sum(decay_prod.t, axis=-1),
+            },
+            with_name="LorentzVector",
+            behavior=vector.behavior,
+        )
+        hsum = ak.sum(decay_prod.pt, axis=-1)
+        pt_vsum_results = ak.where(pt_vsum_results == i, vec_sum.pt, pt_vsum_results)
+        pt_hsum_results = ak.where(pt_hsum_results == i, hsum, pt_hsum_results)
+        ncand_results = ak.where(ncand_results == i, ak.num(decay_prod), ncand_results)
+
+    # Calculating the difference of pt
+    dpt_vsum_results = abs(jet.pt - pt_vsum_results)
+    dpt_hsum_results = abs(jet.pt - pt_hsum_results)
+
+    events = set_ak_column_f32(events, "PFCandidatesJet.pt_vsum_pfcand", pt_vsum_results)
+    events = set_ak_column_f32(events, "PFCandidatesJet.pt_hsum_pfcand", pt_hsum_results)
+    events = set_ak_column_i8(events, "PFCandidatesJet.ncand_per_part", ncand_results)
+    events = set_ak_column_f32(events, "PFCandidatesJet.dpt_vsum", dpt_vsum_results)
+    events = set_ak_column_f32(events, "PFCandidatesJet.dpt_hsum", dpt_hsum_results)
+
+    return events
+
+
+@producer(
+    uses={
+        default, jetcand, taucand,
+    },
+    produces={
+        default, jetcand, taucand,
+    },
+)
+def merged_producer(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
+
+    events = self[default](events, **kwargs)
+    events = self[jetcand](events, **kwargs)
+    events = self[taucand](events, **kwargs)
+
+    return events
diff --git a/hbt/selection/default_ana.py b/hbt/selection/default_ana.py
@@ -0,0 +1,101 @@
+# coding: utf-8
+
+"""
+Empty selectors + trigger selection
+"""
+
+from operator import and_
+from functools import reduce
+from collections import defaultdict
+
+from columnflow.selection import Selector, SelectionResult, selector
+from columnflow.selection.stats import increment_stats
+from columnflow.production.processes import process_ids
+from columnflow.production.categories import category_ids
+
+from columnflow.production.cms.mc_weight import mc_weight
+from columnflow.production.util import attach_coffea_behavior
+from columnflow.util import maybe_import, dev_sandbox
+from hbt.selection.trigger import trigger_selection
+from hbt.selection.lepton import lepton_selection
+from hbt.production.features import cutflow_features
+from hbt.selection.jet import jet_selection
+
+np = maybe_import("numpy")
+ak = maybe_import("awkward")
+
+
+@selector(
+    uses={
+        process_ids, mc_weight, increment_stats, cutflow_features, trigger_selection,
+        lepton_selection, attach_coffea_behavior, category_ids, jet_selection,
+    },
+    produces={
+        process_ids, mc_weight, cutflow_features, trigger_selection, jet_selection,
+        lepton_selection, category_ids,
+    },
+    sandbox=dev_sandbox("bash::$HBT_BASE/sandboxes/venv_columnar_tf.sh"),
+    exposed=True,
+)
+def default(
+    self: Selector,
+    events: ak.Array,
+    stats: defaultdict,
+    **kwargs,
+) -> tuple[ak.Array, SelectionResult]:
+    # ensure coffea behavior
+    events = self[attach_coffea_behavior](events, **kwargs)
+
+    # add corrected mc weights
+    if self.dataset_inst.is_mc:
+        events = self[mc_weight](events, **kwargs)
+
+    # prepare the selection results that are updated at every step
+    results = SelectionResult()
+
+    # trigger selection
+    events, trigger_results = self[trigger_selection](events, **kwargs)
+    results += trigger_results
+
+    # lepton selection
+    events, lepton_results = self[lepton_selection](events, trigger_results, **kwargs)
+    results += lepton_results
+
+    # jet selection
+    events, jet_results = self[jet_selection](events, trigger_results, lepton_results, **kwargs)
+    results += jet_results
+
+    # combined event selection after all steps
+    event_sel = reduce(and_, results.steps.values())
+    results.event = event_sel
+
+    # write out process/category IDs
+    events = self[process_ids](events, **kwargs)
+    events = self[category_ids](events, **kwargs)
+
+    # increment stats
+    weight_map = {
+        "num_events": Ellipsis,
+        "num_events_selected": Ellipsis,
+    }
+    if self.dataset_inst.is_mc:
+        weight_map["sum_mc_weight"] = events.mc_weight
+        weight_map["sum_mc_weight_selected"] = (events.mc_weight, Ellipsis)
+
+        group_map = {
+            # per process
+            "process": {
+                "values": events.process_id,
+                "mask_fn": (lambda v: events.process_id == v),
+            },
+        }
+    events, results = self[increment_stats](
+        events,
+        results,
+        stats,
+        weight_map=weight_map,
+        group_map=group_map,
+        **kwargs,
+    )
+
+    return events, results