Skip to content

Commit

Permalink
feat: Add known forth for ATLAS (#1282)
Browse files Browse the repository at this point in the history
* add test for known forth of 2D vector ElementLinks

* add known forth for 2D vector ElementLinks

* style: pre-commit fixes

* rename test file

* update form instead of replacing (fixes tests for assumed forms)

* add treatment for branch=None in known_forth discovery

* switch to hardcoded dict of typenames

* delay awkward import

* make PrimaryVerticesAuxDyn.neutralParticleLinks work

* make if-statement for known_forth and awkward_form more clear

Co-authored-by: Jim Pivarski <[email protected]>

* add xAOD::MuonSegment_v1 to dict of known_forth with VectorVectorElementLink and sort keys

* always convert self._form from dict in AsObjects.awkward_form

* treat more cases for typename

* flatten directory structure and add docstring for VectorVectorElementLink

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Jim Pivarski <[email protected]>
  • Loading branch information
3 people authored Nov 8, 2024
1 parent 95b998b commit 7b39448
Show file tree
Hide file tree
Showing 4 changed files with 208 additions and 2 deletions.
46 changes: 46 additions & 0 deletions src/uproot/interpretation/known_forth/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE

"""
This module provides known forth code and awkward forms for types where it is known a priori.
See :doc:`uproot.interpretation.known_forth.known_forth_of` for the function
that provides the lookup of known forth codes and :doc:`uproot.interpretation.known_forth.atlas.VectorVectorElementLink` for an
implementation used in ATLAS (D)AODs.
"""
from __future__ import annotations

import uproot
from uproot.interpretation.known_forth.atlas import VectorVectorElementLink

KNOWN_FORTH_DICT = {
"std::vector<std::vector<ElementLink<DataVector<xAOD::CaloCluster_v1>>>>": VectorVectorElementLink,
"std::vector<std::vector<ElementLink<DataVector<xAOD::IParticle>>>>": VectorVectorElementLink,
"std::vector<std::vector<ElementLink<DataVector<xAOD::MuonSegment_v1>>>>": VectorVectorElementLink,
"std::vector<std::vector<ElementLink<DataVector<xAOD::NeutralParticle_v1>>>>": VectorVectorElementLink,
"std::vector<std::vector<ElementLink<DataVector<xAOD::TauTrack_v1>>>>": VectorVectorElementLink,
"std::vector<std::vector<ElementLink<DataVector<xAOD::TrackParticle_v1>>>>": VectorVectorElementLink,
"std::vector<std::vector<ElementLink<DataVector<xAOD::TruthParticle_v1>>>>": VectorVectorElementLink,
"std::vector<std::vector<ElementLink<DataVector<xAOD::Vertex_v1>>>>": VectorVectorElementLink,
}


def known_forth_of(model):
"""
Args:
model: The :doc:`uproot.model.Model` to look up known forth for
Returns an object with attributes `forth_code` and `awkward_form` if a known
special case exists, else None
"""
try:
typename = model.typename
except AttributeError:
try:
typename = model.classname
except AttributeError:
typename = uproot.model.classname_decode(model.__name__)

if typename not in KNOWN_FORTH_DICT:
return

return KNOWN_FORTH_DICT[typename](typename)
99 changes: 99 additions & 0 deletions src/uproot/interpretation/known_forth/atlas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE

"""
This module defines ATLAS specific known forth code
"""

from __future__ import annotations

import re


class VectorVectorElementLink:
"""
Known forth and awkward form for ``std::vector<std::vector<ElementLink<T>>`` types in ATLAS (D)AODs
The forth code was adjusted from what was provided in
``branch._complete_forth_code`` after running ``.array()`` once.
The binary data of one vector<vector<ElementLink<T>> looks as follows:
* 6 bytes header for the outer vector
* 4 bytes big endian uint for the size of the outer vector (node1)
* for each outer vector element:
* 4 bytes big endian uint for the size of the inner vector (node2)
* for each inner vector element:
* 20 bytes header for the ElementLink object
* 4 bytes big endian uint for the ``m_persKey`` member (node3)
* 4 bytes big endian uint for the ``m_persIndex`` member (node4)
"""

forth_code = """
input stream
input byteoffsets
input bytestops
output node1-offsets int64
output node2-offsets int64
output node3-data uint32
output node4-data uint32
0 node1-offsets <- stack
0 node2-offsets <- stack
0 do
byteoffsets I-> stack
stream seek
6 stream skip
stream !I-> stack
dup node1-offsets +<- stack
0 do
stream !I-> stack
dup node2-offsets +<- stack
0 do
20 stream skip
stream !I-> node3-data
stream !I-> node4-data
loop
loop
loop
"""

def __init__(self, typename):
self.typename = typename
self.inner_typename = re.sub(
"std::vector<std::vector<(.*)>>", r"\1", self.typename
)

@property
def awkward_form(self):
return {
"class": "ListOffsetArray",
"offsets": "i64",
"form_key": "node1",
"content": {
"class": "ListOffsetArray",
"offsets": "i64",
"form_key": "node2",
"content": {
"class": "RecordArray",
"fields": ["m_persKey", "m_persIndex"],
"contents": [
{
"class": "NumpyArray",
"primitive": "uint32",
"inner_shape": [],
"parameters": {},
"form_key": "node3",
},
{
"class": "NumpyArray",
"primitive": "uint32",
"inner_shape": [],
"parameters": {},
"form_key": "node4",
},
],
"parameters": {"__record__": f"{self.inner_typename}"},
},
},
}
17 changes: 15 additions & 2 deletions src/uproot/interpretation/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

import uproot
import uproot._awkwardforth
from uproot.interpretation.known_forth import known_forth_of


class AsObjects(uproot.interpretation.Interpretation):
Expand All @@ -45,14 +46,22 @@ class AsObjects(uproot.interpretation.Interpretation):
:ref:`uproot.interpretation.objects.AsObjects.simplify` attempts to
replace this interpretation with a faster-to-read equivalent, but not all
data types can be simplified.
:doc:`uproot.interpretation.known_forth` defines forth code and forms for
special cases that will be picked up here as well
"""

def __init__(self, model, branch=None):
self._model = model
self._branch = branch
self._form = None
self._forth = True
self._complete_forth_code = None
known_forth = known_forth_of(self._model)
if known_forth is not None:
self._complete_forth_code = known_forth.forth_code
self._form = known_forth.awkward_form
else:
self._complete_forth_code = None
self._form = None
self._forth_lock = threading.Lock()

@property
Expand Down Expand Up @@ -122,6 +131,10 @@ def awkward_form(
tobject_header=False,
breadcrumbs=(),
):
if self._form is not None:
awkward = uproot.extras.awkward()
return awkward.forms.from_dict(self._form)

context = self._make_context(
context, index_format, header, tobject_header, breadcrumbs
)
Expand Down
48 changes: 48 additions & 0 deletions tests/test_1282_add_known_forth_for_atlas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/usr/bin/env python3

import awkward
import pytest
import skhep_testdata
import uproot

VECTOR_VECTOR_ELEMENTLINK_BRANCHES = [
"AnalysisHLT_e12_lhloose_nod0_2mu10AuxDyn.TrigMatchedObjects",
"AnalysisElectronsAuxDyn.caloClusterLinks",
"AnalysisPhotonsAuxDyn.vertexLinks",
"TruthMuonsAuxDyn.childLinks",
"AnalysisElectronsAuxDyn.trackParticleLinks",
"PrimaryVerticesAuxDyn.neutralParticleLinks",
"AnalysisTauJetsAuxDyn.tauTrackLinks",
]


@pytest.mark.parametrize("key", VECTOR_VECTOR_ELEMENTLINK_BRANCHES)
def test_pickup_vector_vector_elementlink(key):
with uproot.open(
{skhep_testdata.data_path("uproot-issue-123a.root"): "CollectionTree"}
) as tree:
branch = tree[key]
assert branch.interpretation._complete_forth_code is not None
assert branch.interpretation._form is not None


def test_consistent_library_np_vector_vector_elementlink():
arrays_np = {}
with uproot.open(
{skhep_testdata.data_path("uproot-issue-123a.root"): "CollectionTree"}
) as tree:
for key in VECTOR_VECTOR_ELEMENTLINK_BRANCHES:
arrays_np[key] = tree[key].array(library="np")
arrays_ak = {}
with uproot.open(
{skhep_testdata.data_path("uproot-issue-123a.root"): "CollectionTree"}
) as tree:
for key in VECTOR_VECTOR_ELEMENTLINK_BRANCHES:
arrays_ak[key] = tree[key].array()
for key in arrays_np:
array_ak = arrays_ak[key]
array_np = uproot.interpretation.library._object_to_awkward_array(
awkward, array_ak.layout.form.to_dict(), arrays_np[key]
)
for field in array_ak.fields:
assert awkward.all(array_np[field] == array_ak[field])

0 comments on commit 7b39448

Please sign in to comment.