-
Notifications
You must be signed in to change notification settings - Fork 76
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Add known forth for ATLAS (#1282)
* add test for known forth of 2D vector ElementLinks * add known forth for 2D vector ElementLinks * style: pre-commit fixes * rename test file * update form instead of replacing (fixes tests for assumed forms) * add treatment for branch=None in known_forth discovery * switch to hardcoded dict of typenames * delay awkward import * make PrimaryVerticesAuxDyn.neutralParticleLinks work * make if-statement for known_forth and awkward_form more clear Co-authored-by: Jim Pivarski <[email protected]> * add xAOD::MuonSegment_v1 to dict of known_forth with VectorVectorElementLink and sort keys * always convert self._form from dict in AsObjects.awkward_form * treat more cases for typename * flatten directory structure and add docstring for VectorVectorElementLink --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Jim Pivarski <[email protected]>
- Loading branch information
1 parent
95b998b
commit 7b39448
Showing
4 changed files
with
208 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE | ||
|
||
""" | ||
This module provides known forth code and awkward forms for types where it is known a priori. | ||
See :doc:`uproot.interpretation.known_forth.known_forth_of` for the function | ||
that provides the lookup of known forth codes and :doc:`uproot.interpretation.known_forth.atlas.VectorVectorElementLink` for an | ||
implementation used in ATLAS (D)AODs. | ||
""" | ||
from __future__ import annotations | ||
|
||
import uproot | ||
from uproot.interpretation.known_forth.atlas import VectorVectorElementLink | ||
|
||
KNOWN_FORTH_DICT = { | ||
"std::vector<std::vector<ElementLink<DataVector<xAOD::CaloCluster_v1>>>>": VectorVectorElementLink, | ||
"std::vector<std::vector<ElementLink<DataVector<xAOD::IParticle>>>>": VectorVectorElementLink, | ||
"std::vector<std::vector<ElementLink<DataVector<xAOD::MuonSegment_v1>>>>": VectorVectorElementLink, | ||
"std::vector<std::vector<ElementLink<DataVector<xAOD::NeutralParticle_v1>>>>": VectorVectorElementLink, | ||
"std::vector<std::vector<ElementLink<DataVector<xAOD::TauTrack_v1>>>>": VectorVectorElementLink, | ||
"std::vector<std::vector<ElementLink<DataVector<xAOD::TrackParticle_v1>>>>": VectorVectorElementLink, | ||
"std::vector<std::vector<ElementLink<DataVector<xAOD::TruthParticle_v1>>>>": VectorVectorElementLink, | ||
"std::vector<std::vector<ElementLink<DataVector<xAOD::Vertex_v1>>>>": VectorVectorElementLink, | ||
} | ||
|
||
|
||
def known_forth_of(model): | ||
""" | ||
Args: | ||
model: The :doc:`uproot.model.Model` to look up known forth for | ||
Returns an object with attributes `forth_code` and `awkward_form` if a known | ||
special case exists, else None | ||
""" | ||
try: | ||
typename = model.typename | ||
except AttributeError: | ||
try: | ||
typename = model.classname | ||
except AttributeError: | ||
typename = uproot.model.classname_decode(model.__name__) | ||
|
||
if typename not in KNOWN_FORTH_DICT: | ||
return | ||
|
||
return KNOWN_FORTH_DICT[typename](typename) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE | ||
|
||
""" | ||
This module defines ATLAS specific known forth code | ||
""" | ||
|
||
from __future__ import annotations | ||
|
||
import re | ||
|
||
|
||
class VectorVectorElementLink: | ||
""" | ||
Known forth and awkward form for ``std::vector<std::vector<ElementLink<T>>`` types in ATLAS (D)AODs | ||
The forth code was adjusted from what was provided in | ||
``branch._complete_forth_code`` after running ``.array()`` once. | ||
The binary data of one vector<vector<ElementLink<T>> looks as follows: | ||
* 6 bytes header for the outer vector | ||
* 4 bytes big endian uint for the size of the outer vector (node1) | ||
* for each outer vector element: | ||
* 4 bytes big endian uint for the size of the inner vector (node2) | ||
* for each inner vector element: | ||
* 20 bytes header for the ElementLink object | ||
* 4 bytes big endian uint for the ``m_persKey`` member (node3) | ||
* 4 bytes big endian uint for the ``m_persIndex`` member (node4) | ||
""" | ||
|
||
forth_code = """ | ||
input stream | ||
input byteoffsets | ||
input bytestops | ||
output node1-offsets int64 | ||
output node2-offsets int64 | ||
output node3-data uint32 | ||
output node4-data uint32 | ||
0 node1-offsets <- stack | ||
0 node2-offsets <- stack | ||
0 do | ||
byteoffsets I-> stack | ||
stream seek | ||
6 stream skip | ||
stream !I-> stack | ||
dup node1-offsets +<- stack | ||
0 do | ||
stream !I-> stack | ||
dup node2-offsets +<- stack | ||
0 do | ||
20 stream skip | ||
stream !I-> node3-data | ||
stream !I-> node4-data | ||
loop | ||
loop | ||
loop | ||
""" | ||
|
||
def __init__(self, typename): | ||
self.typename = typename | ||
self.inner_typename = re.sub( | ||
"std::vector<std::vector<(.*)>>", r"\1", self.typename | ||
) | ||
|
||
@property | ||
def awkward_form(self): | ||
return { | ||
"class": "ListOffsetArray", | ||
"offsets": "i64", | ||
"form_key": "node1", | ||
"content": { | ||
"class": "ListOffsetArray", | ||
"offsets": "i64", | ||
"form_key": "node2", | ||
"content": { | ||
"class": "RecordArray", | ||
"fields": ["m_persKey", "m_persIndex"], | ||
"contents": [ | ||
{ | ||
"class": "NumpyArray", | ||
"primitive": "uint32", | ||
"inner_shape": [], | ||
"parameters": {}, | ||
"form_key": "node3", | ||
}, | ||
{ | ||
"class": "NumpyArray", | ||
"primitive": "uint32", | ||
"inner_shape": [], | ||
"parameters": {}, | ||
"form_key": "node4", | ||
}, | ||
], | ||
"parameters": {"__record__": f"{self.inner_typename}"}, | ||
}, | ||
}, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import awkward | ||
import pytest | ||
import skhep_testdata | ||
import uproot | ||
|
||
VECTOR_VECTOR_ELEMENTLINK_BRANCHES = [ | ||
"AnalysisHLT_e12_lhloose_nod0_2mu10AuxDyn.TrigMatchedObjects", | ||
"AnalysisElectronsAuxDyn.caloClusterLinks", | ||
"AnalysisPhotonsAuxDyn.vertexLinks", | ||
"TruthMuonsAuxDyn.childLinks", | ||
"AnalysisElectronsAuxDyn.trackParticleLinks", | ||
"PrimaryVerticesAuxDyn.neutralParticleLinks", | ||
"AnalysisTauJetsAuxDyn.tauTrackLinks", | ||
] | ||
|
||
|
||
@pytest.mark.parametrize("key", VECTOR_VECTOR_ELEMENTLINK_BRANCHES) | ||
def test_pickup_vector_vector_elementlink(key): | ||
with uproot.open( | ||
{skhep_testdata.data_path("uproot-issue-123a.root"): "CollectionTree"} | ||
) as tree: | ||
branch = tree[key] | ||
assert branch.interpretation._complete_forth_code is not None | ||
assert branch.interpretation._form is not None | ||
|
||
|
||
def test_consistent_library_np_vector_vector_elementlink(): | ||
arrays_np = {} | ||
with uproot.open( | ||
{skhep_testdata.data_path("uproot-issue-123a.root"): "CollectionTree"} | ||
) as tree: | ||
for key in VECTOR_VECTOR_ELEMENTLINK_BRANCHES: | ||
arrays_np[key] = tree[key].array(library="np") | ||
arrays_ak = {} | ||
with uproot.open( | ||
{skhep_testdata.data_path("uproot-issue-123a.root"): "CollectionTree"} | ||
) as tree: | ||
for key in VECTOR_VECTOR_ELEMENTLINK_BRANCHES: | ||
arrays_ak[key] = tree[key].array() | ||
for key in arrays_np: | ||
array_ak = arrays_ak[key] | ||
array_np = uproot.interpretation.library._object_to_awkward_array( | ||
awkward, array_ak.layout.form.to_dict(), arrays_np[key] | ||
) | ||
for field in array_ak.fields: | ||
assert awkward.all(array_np[field] == array_ak[field]) |