diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 368d295a8..f9ecf576e 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -5,5 +5,3 @@ updates:
directory: "/"
schedule:
interval: "monthly"
- ignore:
- - dependency-name: "actions/*"
diff --git a/.github/logo.png b/.github/logo.png
new file mode 100644
index 000000000..36aec659a
Binary files /dev/null and b/.github/logo.png differ
diff --git a/.github/workflows/distribution.yml b/.github/workflows/distribution.yml
index 17749536a..61aa724a4 100644
--- a/.github/workflows/distribution.yml
+++ b/.github/workflows/distribution.yml
@@ -2,42 +2,47 @@ name: distribute
on:
workflow_dispatch:
+ pull_request:
push:
- tags: "v*"
+ branches:
+ - main
release:
types:
- published
-jobs:
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+env:
+ FORCE_COLOR: 3
+jobs:
dist:
+ name: Distribution build
runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v3
- with:
- fetch-depth: 0
- - name: Build SDist and wheel
- run: pipx run build
-
- - uses: actions/upload-artifact@v3
- with:
- path: dist/*
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
- - name: Check metadata
- run: pipx run twine check dist/*
+ - uses: hynek/build-and-inspect-python-package@v2
publish:
needs: [dist]
+ name: Publish to PyPI
+ environment: pypi
+ permissions:
+ id-token: write
runs-on: ubuntu-latest
if: github.event_name == 'release' && github.event.action == 'published'
steps:
- - uses: actions/download-artifact@v3
- with:
- name: artifact
- path: dist
-
- - uses: pypa/gh-action-pypi-publish@v1.8.10
- with:
- password: ${{ secrets.pypi_password }}
+ - uses: actions/download-artifact@v4
+ with:
+ name: Packages
+ path: dist
+
+ - uses: pypa/gh-action-pypi-publish@release/v1
+ if: github.event_name == 'release' && github.event.action == 'published'
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 92ae646bb..76eb5c78b 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -1,17 +1,21 @@
name: pygama
on:
+ workflow_dispatch:
+ pull_request:
push:
branches:
- main
- - 'releases/**'
- pull_request:
- release:
+ - "releases/**"
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
+env:
+ FORCE_COLOR: 3
+ TQDM_MININTERVAL: 100
+
jobs:
build-and-test:
@@ -24,9 +28,9 @@ jobs:
os: [ubuntu-latest, macOS-latest]
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v2
+ uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Get dependencies and install the package
@@ -35,16 +39,16 @@ jobs:
python -m pip install --upgrade .[test]
- name: Run unit tests
run: |
- pytest
+ python -m pytest
test-coverage:
name: Calculate and upload test coverage
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
with:
fetch-depth: 2
- - uses: actions/setup-python@v2
+ - uses: actions/setup-python@v5
with:
python-version: '3.10'
@@ -54,7 +58,7 @@ jobs:
python -m pip install --upgrade .[test]
python -m pytest --cov=pygama --cov-report=xml
- name: Upload Coverage to codecov.io
- uses: codecov/codecov-action@v3
+ uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
fail_ci_if_error: false
@@ -63,10 +67,10 @@ jobs:
name: Build documentation
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
with:
fetch-depth: 0
- - uses: actions/setup-python@v2
+ - uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Setup build environment
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e8238d0d9..74f3d3661 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -6,7 +6,7 @@ ci:
exclude: ^(attic|tutorials|src/pygama/math|src/pygama/flow/datagroup.py)
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
- rev: "v4.4.0"
+ rev: "v4.5.0"
hooks:
- id: check-added-large-files
- id: check-case-conflict
@@ -26,35 +26,35 @@ repos:
- id: trailing-whitespace
- repo: https://github.com/asottile/setup-cfg-fmt
- rev: "v2.4.0"
+ rev: "v2.5.0"
hooks:
- id: setup-cfg-fmt
- repo: https://github.com/PyCQA/isort
- rev: "5.12.0"
+ rev: "5.13.2"
hooks:
- id: isort
- repo: https://github.com/asottile/pyupgrade
- rev: "v3.13.0"
+ rev: "v3.15.0"
hooks:
- id: pyupgrade
args: ["--py38-plus"]
- repo: https://github.com/psf/black
- rev: "23.9.1"
+ rev: "23.12.1"
hooks:
- id: black-jupyter
- repo: https://github.com/pre-commit/mirrors-mypy
- rev: "v1.5.1"
+ rev: "v1.8.0"
hooks:
- id: mypy
files: src
stages: [manual]
- repo: https://github.com/hadialqattan/pycln
- rev: "v2.2.2"
+ rev: "v2.4.0"
hooks:
- id: pycln
exclude: ^src/pygama/pargen
@@ -70,6 +70,7 @@ repos:
flake8-print,
pep8-naming
]
+ args: ["--extend-ignore", "E203,E501"]
- repo: https://github.com/kynan/nbstripout
rev: "0.6.1"
@@ -85,9 +86,11 @@ repos:
stages: [manual]
- repo: https://github.com/codespell-project/codespell
- rev: "v2.2.5"
+ rev: "v2.2.6"
hooks:
- id: codespell
+ additional_dependencies:
+ - tomli
- repo: https://github.com/shellcheck-py/shellcheck-py
rev: "v0.9.0.6"
@@ -103,7 +106,7 @@ repos:
- id: rst-inline-touching-normal
- repo: https://github.com/pre-commit/mirrors-prettier
- rev: "v3.0.3"
+ rev: "v4.0.0-alpha.8"
hooks:
- id: prettier
types_or: [json]
diff --git a/CITATION.cff b/CITATION.cff
new file mode 100644
index 000000000..90524f881
--- /dev/null
+++ b/CITATION.cff
@@ -0,0 +1,58 @@
+cff-version: 1.2.0
+title: pygama
+doi: https://doi.org/10.5281/zenodo.10614246
+date-released: 2024-02-03
+url: https://github.com/github-linguist/linguist
+message: "If you use this software, please cite it as below."
+authors:
+ - family-names: Agostini
+ given-names: Matteo
+ orcid: https://orcid.org/0000-0003-1151-5301
+ - family-names: Detwiler
+ given-names: Jason
+ orcid: https://orcid.org/0000-0002-9050-4610
+ - family-names: Pertoldi
+ given-names: Luigi
+ orcid: https://orcid.org/0000-0002-0467-2571
+ - family-names: Guinn
+ given-names: Ian
+ orcid: https://orcid.org/0000-0002-2424-3272
+ - family-names: Marshall
+ given-names: George
+ orcid: https://orcid.org/0000-0002-5470-5132
+ - family-names: D'Andrea
+ given-names: Valerio
+ orcid: https://orcid.org/0000-0003-2037-4133
+ - family-names: Krause
+ given-names: Patrick
+ orcid: https://orcid.org/0000-0002-9603-7865
+ - family-names: Song
+ given-names: Grace
+ email: grsong@uw.edu
+ - family-names: Engelhardt
+ given-names: Erin
+ email: erin717@live.unc.edu
+ - family-names: Borden
+ given-names: Sam
+ orcid: https://orcid.org/0009-0003-2539-4333
+ - family-names: Deckert
+ given-names: Rosanna
+ orcid: https://orcid.org/0009-0006-0431-341X
+ - family-names: Sweigart
+ given-names: David
+ email: dsweigar@uw.edu
+ - family-names: Zschocke
+ given-names: Andreas
+ email: Andreas.Zschocke@uni-tuebingen.de
+ - family-names: Wiseman
+ given-names: Clint
+ orcid: https://orcid.org/0000-0002-4232-1326
+ - family-names: Mathew
+ given-names: Tim
+ email: tmathew@uoregon.edu
+ - family-names: Kermaïdic
+ given-names: Yoann
+ orcid: https://orcid.org/0000-0001-8007-8016
+ - family-names: Shanks
+ given-names: Ben
+ email: benjamin.shanks@gmail.com
diff --git a/README.md b/README.md
index e182a9c06..8abf4f7e5 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,5 @@
+
+
# pygama
[](https://pypi.org/project/pygama/)
@@ -10,6 +12,7 @@
[](https://github.com/legend-exp/pygama/pulls)
[](https://github.com/legend-exp/pygama/blob/main/LICENSE)
[](https://pygama.readthedocs.io)
+[](https://zenodo.org/doi/10.5281/zenodo.10614246)
*pygama* is a Python package for:
@@ -25,3 +28,15 @@
- generating and selecting high-level event data for further analysis
Check out the [online documentation](https://pygama.readthedocs.io).
+
+If you are using this software, consider
+[citing](https://zenodo.org/doi/10.5281/zenodo.10614246)!
+
+## Related repositories
+
+- [legend-exp/legend-pydataobj](https://github.com/legend-exp/legend-pydataobj)
+ → LEGEND Python Data Objects
+- [legend-exp/legend-daq2lh5](https://github.com/legend-exp/legend-daq2lh5)
+ → Convert digitizer data to LEGEND HDF5
+- [legend-exp/dspeed](https://github.com/legend-exp/dspeed)
+ → Fast Digital Signal Processing for particle detector signals in Python
diff --git a/docs/source/conf.py b/docs/source/conf.py
index a02d2d512..3bcd7d1fc 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -39,6 +39,7 @@
"source_directory": "docs/source",
}
html_title = f"{project} {version}"
+html_logo = "../../.github/logo.png"
# sphinx-napoleon
# enforce consistent usage of NumPy-style docstrings
@@ -57,9 +58,11 @@
"scipy": ("https://docs.scipy.org/doc/scipy", None),
"pandas": ("https://pandas.pydata.org/docs", None),
"matplotlib": ("https://matplotlib.org/stable", None),
- "iminuit": ("https://iminuit.readthedocs.io/en/stable", None),
"h5py": ("https://docs.h5py.org/en/stable", None),
"pint": ("https://pint.readthedocs.io/en/stable", None),
+ "lgdo": ("https://legend-pydataobj.readthedocs.io/en/stable", None),
+ "dspeed": ("https://dspeed.readthedocs.io/en/stable", None),
+ "daq2lh5": ("https://legend-daq2lh5.readthedocs.io/en/stable", None),
}
suppress_warnings = [
diff --git a/docs/source/developer.rst b/docs/source/developer.rst
index 5d3c85dcf..c27d05def 100644
--- a/docs/source/developer.rst
+++ b/docs/source/developer.rst
@@ -1,14 +1,23 @@
Developer's guide
=================
+.. note::
+
+ The https://learn.scientific-python.org webpages are an extremely valuable
+ learning resource for Python software developer. The reader is referred to
+ that for any detail not covered in the following guide.
+
The following rules and conventions have been established for the package
development and are enforced throughout the entire code base. Merge requests
that do not comply to the following directives will be rejected.
To start developing :mod:`pygama`, fork the remote repository to your personal
-GitHub account (see `About Forks `_).
+GitHub account (see `About Forks
+`_).
If you have not set up your ssh keys on the computer you will be working on,
-please follow `GitHub's instructions `_. Once you have your own fork, you can clone it via
+please follow `GitHub's instructions
+`_.
+Once you have your own fork, you can clone it via
(replace "yourusername" with your GitHub username):
.. code-block:: console
@@ -21,7 +30,20 @@ dependencies and can be installed via pip by running:
.. code-block:: console
$ cd pygama
- $ pip install '.[all]' # single quotes are not needed on bash
+ $ pip install -e '.[all]' # single quotes are not needed on bash
+
+.. important::
+
+ Pip's ``--editable | -e`` flag let's you install the package in "developer
+ mode", meaning that any change to the source code will be directly
+ propagated to the installed package and importable in scripts.
+
+.. tip::
+
+ It is strongly recommended to work inside a virtual environment, which
+ guarantees reproductibility and isolation. For more details, see
+ `learn.scientific-python.org
+ `_.
Code style
----------
@@ -29,13 +51,6 @@ Code style
* All functions and methods (arguments and return types) must be
`type-annotated `_. Type
annotations for variables like class attributes are also highly appreciated.
- Do not forget to
-
- .. code-block:: python
-
- from __future__ import annotations
-
- at the top of a module implementation.
* Messaging to the user is managed through the :mod:`logging` module. Do not
add :func:`print` statements. To make a logging object available in a module,
add this:
@@ -48,7 +63,8 @@ Code style
at the top. In general, try to keep the number of :func:`logging.debug` calls
low and use informative messages. :func:`logging.info` calls should be
reserved for messages from high-level routines (like
- :func:`pygama.dsp.build_dsp`). Good code is never too verbose.
+ :func:`pygama.dsp.build_dsp`) and very sporadic. Good code is never too
+ verbose.
* If an error condition leading to undefined behavior occurs, raise an
exception. try to find the most suitable between the `built-in exceptions
`_, otherwise ``raise
@@ -63,18 +79,19 @@ The pre-commit tool is able to identify common style problems and automatically
fix them, wherever possible. Configured hooks are listed in the
``.pre-commit-config.yaml`` file at the project root folder. They are run
remotely on the GitHub repository through the `pre-commit bot
-`_, but can also be run locally before submitting a
-pull request (recommended):
+`_, but should also be run locally before submitting a
+pull request:
.. code-block:: console
$ cd pygama
$ pip install '.[test]'
$ pre-commit run --all-files # analyse the source code and fix it wherever possible
- $ pre-commit install # install a Git pre-commit hook (optional but recommended)
+ $ pre-commit install # install a Git pre-commit hook (strongly recommended)
-For a more comprehensive guide, check out the `Scikit-HEP documentation about
-code style `_.
+For a more comprehensive guide, check out the `learn.scientific-python.org
+documentation about code style
+`_.
Testing
-------
@@ -82,26 +99,9 @@ Testing
* The :mod:`pygama` test suite is available below ``tests/``. We use `pytest
`_ to run tests and analyze their output. As
a starting point to learn how to write good tests, reading of `the
- Scikit-HEP Intro to testing `_ is
- recommended. Refer to `pytest's how-to guides
- `_ for a complete
- overview.
-* :mod:`pygama` tests belong to three categories:
-
- :unit tests: Should ensure the correct behaviour of each function
- independently, possibly without relying on other :mod:`pygama` methods.
- The existence of these micro-tests makes it possible to promptly identify
- and fix the source of a bug. An example of this are tests for each single
- DSP processor
-
- :integration tests: Should ensure that independent parts of the code base
- work well together and are integrated in a cohesive framework. An example
- of this is testing whether :func:`moduleA.process_obj` is able to
- correctly handle :class:`moduleB.DataObj`
-
- :functional tests: High-level tests of realistic applications. An example is
- testing whether the processing of a real or synthetic data sample yields
- consistent output parameters
+ relevant learn.scientific-python.org webpage
+ `_ is
+ recommended.
* Unit tests are automatically run for every push event and pull request to the
remote Git repository on a remote server (currently handled by GitHub
@@ -125,127 +125,6 @@ Testing
$ pytest --cov=pygama
-Testing Numba-Wrapped Functions
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-When using Numba to vectorize Python functions, the Python version of the function
-does not, by default, get directly tested, but the Numba version instead. In
-this case, we need to unwrap the Numba function and test the pure Python version.
-With various processors in :mod:`pygama.dsp.processors`, this means that testing
-and triggering the code coverage requires this unwrapping.
-
-Within the testing suite, we use the :func:`@pytest.fixture()`
-decorator to include a helper function called ``compare_numba_vs_python`` that
-can be used in any test. This function runs both the Numba and pure Python versions
-of a function, asserts that they are equal up to floating precision, and returns the
-output value.
-
-As an example, we show a snippet from the test for
-:func:`pygama.dsp.processors.fixed_time_pickoff`, a processor which uses the
-:func:`@numba.guvectorize()` decorator.
-
-.. code-block:: python
-
- def test_fixed_time_pickoff(compare_numba_vs_python):
- """Testing function for the fixed_time_pickoff processor."""
-
- len_wf = 20
-
- # test for nan if w_in has a nan
- w_in = np.ones(len_wf)
- w_in[4] = np.nan
- assert np.isnan(compare_numba_vs_python(fixed_time_pickoff, w_in, 1, ord("i")))
-
-In the assertion that the output is what we expect, we use
-``compare_numba_vs_python(fixed_time_pickoff, w_in, 1, ord("i"))`` in place of
-``fixed_time_pickoff(w_in, 1, ord("i"))``. In general, the replacement to make is
-``func(*inputs)`` becomes ``compare_numba_vs_python(func, *inputs)``.
-
-Note, that in cases of testing for the raising of errors, it is recommended
-to instead run the function twice: once with the Numba version, and once using the
-:func:`inspect.unwrap` function. We again show a snippet from the test for
-:func:`pygama.dsp.processors.fixed_time_pickoff` below. We include the various
-required imports in the snippet for verbosity.
-
-.. code-block:: python
-
- import inspect
-
- import numpy as np
- import pytest
-
- from pygama.dsp.errors import DSPFatal
- from pygama.dsp.processors import fixed_time_pickoff
-
- def test_fixed_time_pickoff(compare_numba_vs_python):
- "skipping parts of function..."
- # test for DSPFatal errors being raised
- # noninteger t_in with integer interpolation
- with pytest.raises(DSPFatal):
- w_in = np.ones(len_wf)
- fixed_time_pickoff(w_in, 1.5, ord("i"))
-
- with pytest.raises(DSPFatal):
- a_out = np.empty(len_wf)
- inspect.unwrap(fixed_time_pickoff)(w_in, 1.5, ord("i"), a_out)
-
-In this case, the general idea is to use :func:`pytest.raises` twice, once with
-``func(*inputs)``, and again with ``inspect.unwrap(func)(*inputs)``.
-
-Testing Factory Functions that Return Numba-Wrapped Functions
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-As in the previous section, we also have processors that are first initialized
-with a factory function, which then returns a callable Numba-wrapped function.
-In this case, there is a slightly different way of testing the function to ensure
-full code coverage when using ``compare_numba_vs_python``, as the function
-signature is generally different.
-
-As an example, we show a snippet from the test for
-:func:`pygama.dsp.processors.dwt.discrete_wavelet_transform`, a processor which uses
-a factory function to return a function wrapped by the
-:func:`@numba.guvectorize()` decorator.
-
-.. code-block:: python
-
- import numpy as np
- import pytest
-
- from pygama.dsp.errors import DSPFatal
- from pygama.dsp.processors import discrete_wavelet_transform
-
- def test_discrete_wavelet_transform(compare_numba_vs_python):
- """Testing function for the discrete_wavelet_transform processor."""
-
- # set up values to use for each test case
- len_wf_in = 16
- wave_type = 'haar'
- level = 2
- len_wf_out = 4
-
- # ensure the DSPFatal is raised for a negative level
- with pytest.raises(DSPFatal):
- discrete_wavelet_transform(wave_type, -1)
-
- # ensure that a valid input gives the expected output
- w_in = np.ones(len_wf_in)
- w_out = np.empty(len_wf_out)
- w_out_expected = np.ones(len_wf_out) * 2**(level / 2)
-
- dwt_func = discrete_wavelet_transform(wave_type, level)
- assert np.allclose(
- compare_numba_vs_python(dwt_func, w_in, w_out),
- w_out_expected,
- )
- ## rest of test function is truncated in this example
-
-In this case, the error is raised outside of the Numba-wrapped function, and
-we only need to test for the error once. For the comparison of the calculated
-values to expectation, we must initialize the output array and pass it to the
-list of inputs that should be used in the comparison. This is different than
-the previous section, where we are instead now updating the outputted values
-in place.
-
Documentation
-------------
@@ -267,7 +146,7 @@ following:
other) must be provided as separate pages in ``docs/source/`` and linked in
the table of contents.
* Jupyter notebooks should be added to the main Git repository below
- ``tutorials/``.
+ ``docs/source/notebooks``.
* Before submitting a pull request, contributors are required to build the
documentation locally and resolve and warnings or errors.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 3183b87ad..85da7d1e3 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -70,5 +70,6 @@ Next steps
Source Code
License
+ Citation
Changelog
developer
diff --git a/pyproject.toml b/pyproject.toml
index 8f5058ee8..b4edbf1ef 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,19 +1,127 @@
[build-system]
requires = [
- "setuptools>=42.0.0",
- "setuptools_scm[toml]>=3.4",
+ "setuptools>=61.2",
+ "setuptools_scm[toml]>=7",
]
-
build-backend = "setuptools.build_meta"
+[project]
+name = "pygama"
+description = "Python package for data processing and analysis"
+authors = [
+ { name = "The LEGEND collaboration" },
+]
+maintainers = [
+ { name = "The LEGEND collaboration" },
+]
+readme = "README.md"
+classifiers = [
+ "Development Status :: 4 - Beta",
+ "Intended Audience :: Developers",
+ "Intended Audience :: Science/Research",
+ "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
+ "Operating System :: MacOS",
+ "Operating System :: POSIX",
+ "Operating System :: Unix",
+ "Programming Language :: Python",
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3 :: Only",
+ "Topic :: Scientific/Engineering",
+]
+requires-python = ">=3.9"
+dependencies = [
+ "boost-histogram",
+ "colorlog",
+ "dspeed>=1.3",
+ "h5py>=3.2",
+ "iminuit",
+ "legend-daq2lh5>=1.2",
+ "legend-pydataobj>=1.5",
+ "matplotlib",
+ "numba!=0.53.*,!=0.54.*,!=0.57",
+ "numpy>=1.21",
+ "pandas>=1.4.4",
+ "pint",
+ "scikit-learn",
+ "scipy>=1.0.1",
+ "tables",
+ "tqdm>=4.27",
+]
+dynamic = [
+ "version",
+]
+
+[project.urls]
+Homepage = "https://github.com/legend-exp/pygama"
+"Bug Tracker" = "https://github.com/legend-exp/pygama/issues"
+Discussions = "https://github.com/legend-exp/pygama/discussions"
+Changelog = "https://github.com/legend-exp/pygama/releases"
+
[tool.setuptools_scm]
write_to = "src/pygama/_version.py"
+[project.optional-dependencies]
+all = [
+ "pygama[docs,test]",
+]
+docs = [
+ "furo",
+ "jupyter",
+ "myst-parser",
+ "nbsphinx",
+ "sphinx",
+ "sphinx-copybutton",
+ "sphinx-inline-tabs",
+]
+test = [
+ "pre-commit",
+ "pylegendtestdata",
+ "pytest>=6.0",
+ "pytest-cov",
+]
+
+[project.scripts]
+pygama = "pygama.cli:pygama_cli"
+
+[tool.setuptools]
+include-package-data = true
+zip-safe = false
+license-files = [
+ "LICENSE",
+]
+
+[tool.setuptools.package-dir]
+"" = "src"
+
+[tool.setuptools.packages.find]
+where = [
+ "src",
+]
+namespaces = false
+
+[tool.setuptools.package-data]
+"*" = [
+ "*.json",
+]
+
+[tool.coverage]
+run.source = ["pygama"]
+report.exclude_also = [
+ '\.\.\.',
+ 'if typing.TYPE_CHECKING:',
+]
+
+[tool.flake8]
+extend-ignore = "E203, E501"
+
+[tool.codespell]
+ignore-words-list = "hist, gaus, nd, ans, crate, nin, puls, spms, fom"
+
[tool.pytest.ini_options]
minversion = "6.0"
addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"]
xfail_strict = true
-filterwarnings = ["error", "ignore::DeprecationWarning"]
+filterwarnings = ["error", 'ignore:\nPyarrow:DeprecationWarning']
log_cli_level = "info"
testpaths = "tests"
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index fe26cf6db..000000000
--- a/setup.cfg
+++ /dev/null
@@ -1,87 +0,0 @@
-[metadata]
-name = pygama
-description = Python package for decoding and processing digitizer data
-long_description = file: README.md
-long_description_content_type = text/markdown
-url = https://github.com/legend-exp/pygama
-author = The LEGEND collaboration
-maintainer = The LEGEND collaboration
-license = GPL-3.0
-license_files = LICENSE
-classifiers =
- Development Status :: 4 - Beta
- Intended Audience :: Developers
- Intended Audience :: Information Technology
- Intended Audience :: Science/Research
- License :: OSI Approved :: GNU General Public License v3 (GPLv3)
- Operating System :: MacOS
- Operating System :: POSIX
- Operating System :: Unix
- Programming Language :: Python
- Programming Language :: Python :: 3
- Programming Language :: Python :: 3 :: Only
- Topic :: Scientific/Engineering
- Topic :: Scientific/Engineering :: Information Analysis
- Topic :: Scientific/Engineering :: Mathematics
- Topic :: Scientific/Engineering :: Physics
- Topic :: Software Development
-project_urls =
- Documentation = https://pygama.readthedocs.io
-
-[options]
-packages = find:
-install_requires =
- boost-histogram
- colorlog
- dspeed>=1.1
- h5py>=3.2
- iminuit
- legend-daq2lh5>=1.0
- legend-pydataobj>=1.2
- matplotlib
- numba!=0.53.*,!=0.54.*,!=0.57
- numpy>=1.21
- pandas>=1.4.4
- pint
- scikit-learn
- scipy>=1.0.1
- tables
- tqdm>=4.27
-python_requires = >=3.9
-include_package_data = True
-package_dir =
- = src
-zip_safe = False
-
-[options.packages.find]
-where = src
-
-[options.entry_points]
-console_scripts =
- pygama = pygama.cli:pygama_cli
-
-[options.extras_require]
-all =
- pygama[docs,test]
-docs =
- furo
- jupyter
- myst-parser
- nbsphinx
- sphinx
- sphinx-copybutton
- sphinx-inline-tabs
-test =
- pre-commit
- pylegendtestdata
- pytest>=6.0
- pytest-cov
-
-[options.package_data]
-* = *.json
-
-[flake8]
-extend-ignore = E203, E501
-
-[codespell]
-ignore-words-list = hist, gaus, nd, ans, crate, nin, puls, spms, fom
diff --git a/setup.py b/setup.py
deleted file mode 100644
index f6844919c..000000000
--- a/setup.py
+++ /dev/null
@@ -1,4 +0,0 @@
-import setuptools_scm # noqa: F401
-from setuptools import setup
-
-setup()
diff --git a/src/pygama/cli.py b/src/pygama/cli.py
index a6b59abaf..fb05ef658 100644
--- a/src/pygama/cli.py
+++ b/src/pygama/cli.py
@@ -80,7 +80,7 @@ def pygama_cli():
def add_lh5ls_parser(subparsers):
- """Configure :func:`.lgdo.lh5_store.show` command line interface."""
+ """Configure :func:`.lgdo.lh5.show` command line interface."""
parser_lh5ls = subparsers.add_parser(
"lh5ls", description="""Inspect LEGEND HDF5 (LH5) file contents"""
@@ -99,7 +99,7 @@ def add_lh5ls_parser(subparsers):
def lh5_show_cli(args):
- """Passes command line arguments to :func:`.lgdo.lh5_store.show`."""
+ """Passes command line arguments to :func:`.lgdo.lh5.show`."""
show(args.lh5_file, args.lh5_group, attrs=args.attributes)
diff --git a/src/pygama/evt/__init__.py b/src/pygama/evt/__init__.py
index 8257a98e3..80b544455 100644
--- a/src/pygama/evt/__init__.py
+++ b/src/pygama/evt/__init__.py
@@ -2,7 +2,8 @@
Utilities for grouping hit data into events.
"""
+from .build_evt import build_evt
from .build_tcm import build_tcm
from .tcm import generate_tcm_cols
-__all__ = ["build_tcm", "generate_tcm_cols"]
+__all__ = ["build_tcm", "generate_tcm_cols", "build_evt"]
diff --git a/src/pygama/evt/aggregators.py b/src/pygama/evt/aggregators.py
new file mode 100644
index 000000000..dbcae2829
--- /dev/null
+++ b/src/pygama/evt/aggregators.py
@@ -0,0 +1,689 @@
+"""
+This module provides aggregators to build the `evt` tier.
+"""
+
+from __future__ import annotations
+
+import awkward as ak
+import numpy as np
+from lgdo import Array, ArrayOfEqualSizedArrays, VectorOfVectors, lh5
+from lgdo.lh5 import LH5Store
+from numpy.typing import NDArray
+
+from . import utils
+
+
+def evaluate_to_first_or_last(
+ cumulength: NDArray,
+ idx: NDArray,
+ ids: NDArray,
+ f_hit: str,
+ f_dsp: str,
+ chns: list,
+ chns_rm: list,
+ expr: str,
+ exprl: list,
+ qry: str | NDArray,
+ nrows: int,
+ sorter: tuple,
+ var_ph: dict = None,
+ defv: bool | int | float = np.nan,
+ is_first: bool = True,
+ tcm_id_table_pattern: str = "ch{}",
+ evt_group: str = "evt",
+ hit_group: str = "hit",
+ dsp_group: str = "dsp",
+) -> Array:
+ """Aggregates across channels by returning the expression of the channel
+ with value of `sorter`.
+
+ Parameters
+ ----------
+ idx
+ `tcm` index array.
+ ids
+ `tcm` id array.
+ f_hit
+ path to `hit` tier file.
+ f_dsp
+ path to `dsp` tier file.
+ chns
+ list of channels to be aggregated.
+ chns_rm
+ list of channels to be skipped from evaluation and set to default value.
+ expr
+ expression string to be evaluated.
+ exprl
+ list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``.
+ qry
+ query expression to mask aggregation.
+ nrows
+ length of output array.
+ sorter
+ tuple of field in `hit/dsp/evt` tier to evaluate ``(tier, field)``.
+ var_ph
+ dictionary of `evt` and additional parameters and their values.
+ defv
+ default value.
+ is_first
+ defines if sorted by smallest or largest value of `sorter`
+ tcm_id_table_pattern
+ pattern to format `tcm` id values to table name in higher tiers. Must have one
+ placeholder which is the `tcm` id.
+ dsp_group
+ LH5 root group in `dsp` file.
+ hit_group
+ LH5 root group in `hit` file.
+ evt_group
+ LH5 root group in `evt` file.
+ """
+
+ # define dimension of output array
+ out = np.full(nrows, defv, dtype=type(defv))
+ outt = np.zeros(len(out))
+
+ store = LH5Store()
+
+ for ch in chns:
+ # get index list for this channel to be loaded
+ idx_ch = idx[ids == utils.get_tcm_id_by_pattern(tcm_id_table_pattern, ch)]
+ evt_ids_ch = np.searchsorted(
+ cumulength,
+ np.where(ids == utils.get_tcm_id_by_pattern(tcm_id_table_pattern, ch))[0],
+ "right",
+ )
+
+ # evaluate at channel
+ res = utils.get_data_at_channel(
+ ch=ch,
+ ids=ids,
+ idx=idx,
+ expr=expr,
+ exprl=exprl,
+ var_ph=var_ph,
+ is_evaluated=ch not in chns_rm,
+ f_hit=f_hit,
+ f_dsp=f_dsp,
+ defv=defv,
+ tcm_id_table_pattern=tcm_id_table_pattern,
+ evt_group=evt_group,
+ hit_group=hit_group,
+ dsp_group=dsp_group,
+ )
+
+ # get mask from query
+ limarr = utils.get_mask_from_query(
+ qry=qry,
+ length=len(res),
+ ch=ch,
+ idx_ch=idx_ch,
+ f_hit=f_hit,
+ f_dsp=f_dsp,
+ hit_group=hit_group,
+ dsp_group=dsp_group,
+ )
+
+ # find if sorter is in hit or dsp
+ t0 = store.read(
+ f"{ch}/{sorter[0]}/{sorter[1]}",
+ f_hit if f"{hit_group}" == sorter[0] else f_dsp,
+ idx=idx_ch,
+ )[0].view_as("np")
+
+ if t0.ndim > 1:
+ raise ValueError(f"sorter '{sorter[0]}/{sorter[1]}' must be a 1D array")
+
+ if is_first:
+ if ch == chns[0]:
+ outt[:] = np.inf
+
+ out[evt_ids_ch] = np.where(
+ (t0 < outt[evt_ids_ch]) & (limarr), res, out[evt_ids_ch]
+ )
+ outt[evt_ids_ch] = np.where(
+ (t0 < outt[evt_ids_ch]) & (limarr), t0, outt[evt_ids_ch]
+ )
+
+ else:
+ out[evt_ids_ch] = np.where(
+ (t0 > outt[evt_ids_ch]) & (limarr), res, out[evt_ids_ch]
+ )
+ outt[evt_ids_ch] = np.where(
+ (t0 > outt[evt_ids_ch]) & (limarr), t0, outt[evt_ids_ch]
+ )
+
+ return Array(nda=out, dtype=type(defv))
+
+
+def evaluate_to_scalar(
+ mode: str,
+ cumulength: NDArray,
+ idx: NDArray,
+ ids: NDArray,
+ f_hit: str,
+ f_dsp: str,
+ chns: list,
+ chns_rm: list,
+ expr: str,
+ exprl: list,
+ qry: str | NDArray,
+ nrows: int,
+ var_ph: dict = None,
+ defv: bool | int | float = np.nan,
+ tcm_id_table_pattern: str = "ch{}",
+ evt_group: str = "evt",
+ hit_group: str = "hit",
+ dsp_group: str = "dsp",
+) -> Array:
+ """Aggregates by summation across channels.
+
+ Parameters
+ ----------
+ mode
+ aggregation mode.
+ idx
+ `tcm` index array.
+ ids
+ `tcm` id array.
+ f_hit
+ path to `hit` tier file.
+ f_dsp
+ path to `dsp` tier file.
+ chns
+ list of channels to be aggregated.
+ chns_rm
+ list of channels to be skipped from evaluation and set to default value.
+ expr
+ expression string to be evaluated.
+ exprl
+ list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``.
+ qry
+ query expression to mask aggregation.
+ nrows
+ length of output array
+ var_ph
+ dictionary of `evt` and additional parameters and their values.
+ defv
+ default value.
+ tcm_id_table_pattern
+ pattern to format `tcm` id values to table name in higher tiers. Must have one
+ placeholder which is the `tcm` id.
+ dsp_group
+ LH5 root group in `dsp` file.
+ hit_group
+ LH5 root group in `hit` file.
+ evt_group
+ LH5 root group in `evt` file.
+ """
+
+ # define dimension of output array
+ out = np.full(nrows, defv, dtype=type(defv))
+
+ for ch in chns:
+ # get index list for this channel to be loaded
+ idx_ch = idx[ids == utils.get_tcm_id_by_pattern(tcm_id_table_pattern, ch)]
+ evt_ids_ch = np.searchsorted(
+ cumulength,
+ np.where(ids == utils.get_tcm_id_by_pattern(tcm_id_table_pattern, ch))[0],
+ "right",
+ )
+
+ res = utils.get_data_at_channel(
+ ch=ch,
+ ids=ids,
+ idx=idx,
+ expr=expr,
+ exprl=exprl,
+ var_ph=var_ph,
+ is_evaluated=ch not in chns_rm,
+ f_hit=f_hit,
+ f_dsp=f_dsp,
+ defv=defv,
+ tcm_id_table_pattern=tcm_id_table_pattern,
+ evt_group=evt_group,
+ hit_group=hit_group,
+ dsp_group=dsp_group,
+ )
+
+ # get mask from query
+ limarr = utils.get_mask_from_query(
+ qry=qry,
+ length=len(res),
+ ch=ch,
+ idx_ch=idx_ch,
+ f_hit=f_hit,
+ f_dsp=f_dsp,
+ hit_group=hit_group,
+ dsp_group=dsp_group,
+ )
+
+ # switch through modes
+ if "sum" == mode:
+ if res.dtype == bool:
+ res = res.astype(int)
+ out[evt_ids_ch] = np.where(limarr, res + out[evt_ids_ch], out[evt_ids_ch])
+ if "any" == mode:
+ if res.dtype != bool:
+ res = res.astype(bool)
+ out[evt_ids_ch] = out[evt_ids_ch] | (res & limarr)
+ if "all" == mode:
+ if res.dtype != bool:
+ res = res.astype(bool)
+ out[evt_ids_ch] = out[evt_ids_ch] & res & limarr
+
+ return Array(nda=out, dtype=type(defv))
+
+
+def evaluate_at_channel(
+ cumulength: NDArray,
+ idx: NDArray,
+ ids: NDArray,
+ f_hit: str,
+ f_dsp: str,
+ chns_rm: list,
+ expr: str,
+ exprl: list,
+ ch_comp: Array,
+ var_ph: dict = None,
+ defv: bool | int | float = np.nan,
+ tcm_id_table_pattern: str = "ch{}",
+ evt_group: str = "evt",
+ hit_group: str = "hit",
+ dsp_group: str = "dsp",
+) -> Array:
+ """Aggregates by evaluating the expression at a given channel.
+
+ Parameters
+ ----------
+ idx
+ `tcm` index array.
+ ids
+ `tcm` id array.
+ f_hit
+ path to `hit` tier file.
+ f_dsp
+ path to `dsp` tier file.
+ chns_rm
+ list of channels to be skipped from evaluation and set to default value.
+ expr
+ expression string to be evaluated.
+ exprl
+ list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``.
+ ch_comp
+ array of rawids at which the expression is evaluated.
+ var_ph
+ dictionary of `evt` and additional parameters and their values.
+ defv
+ default value.
+ tcm_id_table_pattern
+ pattern to format `tcm` id values to table name in higher tiers. Must have one
+ placeholder which is the `tcm` id.
+ dsp_group
+ LH5 root group in `dsp` file.
+ hit_group
+ LH5 root group in `hit` file.
+ evt_group
+ LH5 root group in `evt` file.
+ """
+
+ out = np.full(len(ch_comp.nda), defv, dtype=type(defv))
+
+ for ch in np.unique(ch_comp.nda.astype(int)):
+ # skip default value
+ if utils.get_table_name_by_pattern(tcm_id_table_pattern, ch) not in lh5.ls(
+ f_hit
+ ):
+ continue
+ idx_ch = idx[ids == ch]
+ evt_ids_ch = np.searchsorted(cumulength, np.where(ids == ch)[0], "right")
+ res = utils.get_data_at_channel(
+ ch=utils.get_table_name_by_pattern(tcm_id_table_pattern, ch),
+ ids=ids,
+ idx=idx,
+ expr=expr,
+ exprl=exprl,
+ var_ph=var_ph,
+ is_evaluated=utils.get_table_name_by_pattern(tcm_id_table_pattern, ch)
+ not in chns_rm,
+ f_hit=f_hit,
+ f_dsp=f_dsp,
+ defv=defv,
+ tcm_id_table_pattern=tcm_id_table_pattern,
+ evt_group=evt_group,
+ hit_group=hit_group,
+ dsp_group=dsp_group,
+ )
+
+ out[evt_ids_ch] = np.where(ch == ch_comp.nda[idx_ch], res, out[evt_ids_ch])
+
+ return Array(nda=out, dtype=type(defv))
+
+
+def evaluate_at_channel_vov(
+ cumulength: NDArray,
+ idx: NDArray,
+ ids: NDArray,
+ f_hit: str,
+ f_dsp: str,
+ expr: str,
+ exprl: list,
+ ch_comp: VectorOfVectors,
+ chns_rm: list,
+ var_ph: dict = None,
+ defv: bool | int | float = np.nan,
+ tcm_id_table_pattern: str = "ch{}",
+ evt_group: str = "evt",
+ hit_group: str = "hit",
+ dsp_group: str = "dsp",
+) -> VectorOfVectors:
+ """Same as :func:`evaluate_at_channel` but evaluates expression at non
+ flat channels :class:`.VectorOfVectors`.
+
+ Parameters
+ ----------
+ idx
+ `tcm` index array.
+ ids
+ `tcm` id array.
+ f_hit
+ path to `hit` tier file.
+ f_dsp
+ path to `dsp` tier file.
+ expr
+ expression string to be evaluated.
+ exprl
+ list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``.
+ ch_comp
+ array of "rawid"s at which the expression is evaluated.
+ chns_rm
+ list of channels to be skipped from evaluation and set to default value.
+ var_ph
+ dictionary of `evt` and additional parameters and their values.
+ defv
+ default value.
+ tcm_id_table_pattern
+ pattern to format `tcm` id values to table name in higher tiers. Must have one
+ placeholder which is the `tcm` id.
+ dsp_group
+ LH5 root group in `dsp` file.
+ hit_group
+ LH5 root group in `hit` file.
+ evt_group
+ LH5 root group in `evt` file.
+ """
+
+ # blow up vov to aoesa
+ out = ak.Array([[] for _ in range(len(ch_comp))])
+
+ chns = np.unique(ch_comp.flattened_data.nda).astype(int)
+ ch_comp = ch_comp.view_as("ak")
+
+ type_name = None
+ for ch in chns:
+ evt_ids_ch = np.searchsorted(cumulength, np.where(ids == ch)[0], "right")
+ res = utils.get_data_at_channel(
+ ch=utils.get_table_name_by_pattern(tcm_id_table_pattern, ch),
+ ids=ids,
+ idx=idx,
+ expr=expr,
+ exprl=exprl,
+ var_ph=var_ph,
+ is_evaluated=utils.get_table_name_by_pattern(tcm_id_table_pattern, ch)
+ not in chns_rm,
+ f_hit=f_hit,
+ f_dsp=f_dsp,
+ defv=defv,
+ tcm_id_table_pattern=tcm_id_table_pattern,
+ evt_group=evt_group,
+ hit_group=hit_group,
+ dsp_group=dsp_group,
+ )
+
+ # see in which events the current channel is present
+ mask = ak.to_numpy(ak.any(ch_comp == ch, axis=-1), allow_missing=False)
+ cv = np.full(len(ch_comp), np.nan)
+ cv[evt_ids_ch] = res
+ cv[~mask] = np.nan
+ cv = ak.drop_none(ak.nan_to_none(ak.Array(cv)[:, None]))
+
+ out = ak.concatenate((out, cv), axis=-1)
+
+ if ch == chns[0]:
+ type_name = res.dtype
+
+ return VectorOfVectors(ak.values_astype(out, type_name), dtype=type_name)
+
+
+def evaluate_to_aoesa(
+ cumulength: NDArray,
+ idx: NDArray,
+ ids: NDArray,
+ f_hit: str,
+ f_dsp: str,
+ chns: list,
+ chns_rm: list,
+ expr: str,
+ exprl: list,
+ qry: str | NDArray,
+ nrows: int,
+ var_ph: dict = None,
+ defv: bool | int | float = np.nan,
+ missv=np.nan,
+ tcm_id_table_pattern: str = "ch{}",
+ evt_group: str = "evt",
+ hit_group: str = "hit",
+ dsp_group: str = "dsp",
+) -> ArrayOfEqualSizedArrays:
+ """Aggregates by returning an :class:`.ArrayOfEqualSizedArrays` of evaluated
+ expressions of channels that fulfill a query expression.
+
+ Parameters
+ ----------
+ idx
+ `tcm` index array.
+ ids
+ `tcm` id array.
+ f_hit
+ path to `hit` tier file.
+ f_dsp
+ path to `dsp` tier file.
+ chns
+ list of channels to be aggregated.
+ chns_rm
+ list of channels to be skipped from evaluation and set to default value.
+ expr
+ expression string to be evaluated.
+ exprl
+ list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``.
+ qry
+ query expression to mask aggregation.
+ nrows
+ length of output :class:`.VectorOfVectors`.
+ ch_comp
+ array of "rawid"s at which the expression is evaluated.
+ var_ph
+ dictionary of `evt` and additional parameters and their values.
+ defv
+ default value.
+ missv
+ missing value.
+ sorter
+ sorts the entries in the vector according to sorter expression.
+ tcm_id_table_pattern
+ pattern to format `tcm` id values to table name in higher tiers. Must have one
+ placeholder which is the `tcm` id.
+ dsp_group
+ LH5 root group in `dsp` file.
+ hit_group
+ LH5 root group in `hit` file.
+ evt_group
+ LH5 root group in `evt` file.
+ """
+ # define dimension of output array
+ out = np.full((nrows, len(chns)), missv)
+
+ i = 0
+ for ch in chns:
+ idx_ch = idx[ids == utils.get_tcm_id_by_pattern(tcm_id_table_pattern, ch)]
+ evt_ids_ch = np.searchsorted(
+ cumulength,
+ np.where(ids == utils.get_tcm_id_by_pattern(tcm_id_table_pattern, ch))[0],
+ "right",
+ )
+ res = utils.get_data_at_channel(
+ ch=ch,
+ ids=ids,
+ idx=idx,
+ expr=expr,
+ exprl=exprl,
+ var_ph=var_ph,
+ is_evaluated=ch not in chns_rm,
+ f_hit=f_hit,
+ f_dsp=f_dsp,
+ defv=defv,
+ tcm_id_table_pattern=tcm_id_table_pattern,
+ evt_group=evt_group,
+ hit_group=hit_group,
+ dsp_group=dsp_group,
+ )
+
+ # get mask from query
+ limarr = utils.get_mask_from_query(
+ qry=qry,
+ length=len(res),
+ ch=ch,
+ idx_ch=idx_ch,
+ f_hit=f_hit,
+ f_dsp=f_dsp,
+ hit_group=hit_group,
+ dsp_group=dsp_group,
+ )
+
+ out[evt_ids_ch, i] = np.where(limarr, res, out[evt_ids_ch, i])
+
+ i += 1
+
+ return ArrayOfEqualSizedArrays(nda=out)
+
+
+def evaluate_to_vector(
+ cumulength: NDArray,
+ idx: NDArray,
+ ids: NDArray,
+ f_hit: str,
+ f_dsp: str,
+ chns: list,
+ chns_rm: list,
+ expr: str,
+ exprl: list,
+ qry: str | NDArray,
+ nrows: int,
+ var_ph: dict = None,
+ defv: bool | int | float = np.nan,
+ sorter: str = None,
+ tcm_id_table_pattern: str = "ch{}",
+ evt_group: str = "evt",
+ hit_group: str = "hit",
+ dsp_group: str = "dsp",
+) -> VectorOfVectors:
+ """Aggregates by returning a :class:`.VectorOfVector` of evaluated
+ expressions of channels that fulfill a query expression.
+
+ Parameters
+ ----------
+ idx
+ `tcm` index array.
+ ids
+ `tcm` id array.
+ f_hit
+ path to `hit` tier file.
+ f_dsp
+ path to `dsp` tier file.
+ chns
+ list of channels to be aggregated.
+ chns_rm
+ list of channels to be skipped from evaluation and set to default value.
+ expr
+ expression string to be evaluated.
+ exprl
+ list of `dsp/hit/evt` parameter tuples in expression ``(tier, field)``.
+ qry
+ query expression to mask aggregation.
+ nrows
+ length of output :class:`.VectorOfVectors`.
+ ch_comp
+ array of "rawids" at which the expression is evaluated.
+ var_ph
+ dictionary of `evt` and additional parameters and their values.
+ defv
+ default value.
+ sorter
+ sorts the entries in the vector according to sorter expression.
+ ``ascend_by:`` results in an vector ordered ascending,
+ ``decend_by:`` sorts descending.
+ tcm_id_table_pattern
+ pattern to format `tcm` id values to table name in higher tiers. Must have one
+ placeholder which is the `tcm` id.
+ dsp_group
+ LH5 root group in `dsp` file.
+ hit_group
+ LH5 root group in `hit` file.
+ evt_group
+ LH5 root group in `evt` file.
+ """
+ out = evaluate_to_aoesa(
+ cumulength=cumulength,
+ idx=idx,
+ ids=ids,
+ f_hit=f_hit,
+ f_dsp=f_dsp,
+ chns=chns,
+ chns_rm=chns_rm,
+ expr=expr,
+ exprl=exprl,
+ qry=qry,
+ nrows=nrows,
+ var_ph=var_ph,
+ defv=defv,
+ missv=np.nan,
+ tcm_id_table_pattern=tcm_id_table_pattern,
+ evt_group=evt_group,
+ hit_group=hit_group,
+ dsp_group=dsp_group,
+ ).view_as("np")
+
+ # if a sorter is given sort accordingly
+ if sorter is not None:
+ md, fld = sorter.split(":")
+ s_val = evaluate_to_aoesa(
+ cumulength=cumulength,
+ idx=idx,
+ ids=ids,
+ f_hit=f_hit,
+ f_dsp=f_dsp,
+ chns=chns,
+ chns_rm=chns_rm,
+ expr=fld,
+ exprl=[tuple(fld.split("."))],
+ qry=None,
+ nrows=nrows,
+ missv=np.nan,
+ tcm_id_table_pattern=tcm_id_table_pattern,
+ evt_group=evt_group,
+ hit_group=hit_group,
+ dsp_group=dsp_group,
+ ).view_as("np")
+ if "ascend_by" == md:
+ out = out[np.arange(len(out))[:, None], np.argsort(s_val)]
+
+ elif "descend_by" == md:
+ out = out[np.arange(len(out))[:, None], np.argsort(-s_val)]
+ else:
+ raise ValueError(
+ "sorter values can only have 'ascend_by' or 'descend_by' prefixes"
+ )
+
+ return VectorOfVectors(
+ ak.values_astype(ak.drop_none(ak.nan_to_none(ak.Array(out))), type(defv)),
+ dtype=type(defv),
+ )
diff --git a/src/pygama/evt/build_evt.py b/src/pygama/evt/build_evt.py
new file mode 100644
index 000000000..5f7949bdb
--- /dev/null
+++ b/src/pygama/evt/build_evt.py
@@ -0,0 +1,589 @@
+"""
+This module implements routines to build the `evt` tier.
+"""
+
+from __future__ import annotations
+
+import itertools
+import json
+import logging
+import re
+from importlib import import_module
+
+import awkward as ak
+import numpy as np
+from lgdo import Array, ArrayOfEqualSizedArrays, Table, VectorOfVectors, lh5
+from lgdo.lh5 import LH5Store
+
+from . import aggregators, utils
+
+log = logging.getLogger(__name__)
+
+
+def build_evt(
+ f_tcm: str,
+ f_dsp: str,
+ f_hit: str,
+ evt_config: str | dict,
+ f_evt: str | None = None,
+ wo_mode: str = "write_safe",
+ evt_group: str = "evt",
+ tcm_group: str = "hardware_tcm_1",
+ dsp_group: str = "dsp",
+ hit_group: str = "hit",
+ tcm_id_table_pattern: str = "ch{}",
+) -> None | Table:
+ """Transform data from the `hit` and `dsp` levels which a channel sorted to a
+ event sorted data format.
+
+ Parameters
+ ----------
+ f_tcm
+ input LH5 file of the `tcm` level.
+ f_dsp
+ input LH5 file of the `dsp` level.
+ f_hit
+ input LH5 file of the `hit` level.
+ evt_config
+ name of configuration file or dictionary defining event fields. Channel
+ lists can be defined by importing a metadata module.
+
+ - ``operations`` defines the fields ``name=key``, where ``channels``
+ specifies the channels used to for this field (either a string or a
+ list of strings),
+ - ``aggregation_mode`` defines how the channels should be combined (see
+ :func:`evaluate_expression`).
+ - ``expression`` defnies the mathematical/special function to apply
+ (see :func:`evaluate_expression`),
+ - ``query`` defines an expression to mask the aggregation.
+ - ``parameters`` defines any other parameter used in expression.
+
+ For example:
+
+ .. code-block:: json
+
+ {
+ "channels": {
+ "geds_on": ["ch1084803", "ch1084804", "ch1121600"],
+ "spms_on": ["ch1057600", "ch1059201", "ch1062405"],
+ "muon": "ch1027202",
+ },
+ "operations": {
+ "energy_id":{
+ "channels": "geds_on",
+ "aggregation_mode": "gather",
+ "query": "hit.cuspEmax_ctc_cal > 25",
+ "expression": "tcm.array_id",
+ "sort": "ascend_by:dsp.tp_0_est"
+ },
+ "energy":{
+ "aggregation_mode": "keep_at_ch:evt.energy_id",
+ "expression": "hit.cuspEmax_ctc_cal > 25"
+ }
+ "is_muon_rejected":{
+ "channels": "muon",
+ "aggregation_mode": "any",
+ "expression": "dsp.wf_max>a",
+ "parameters": {"a":15100},
+ "initial": false
+ },
+ "multiplicity":{
+ "channels": ["geds_on", "geds_no_psd", "geds_ac"],
+ "aggregation_mode": "sum",
+ "expression": "hit.cuspEmax_ctc_cal > a",
+ "parameters": {"a":25},
+ "initial": 0
+ },
+ "t0":{
+ "aggregation_mode": "keep_at_ch:evt.energy_id",
+ "expression": "dsp.tp_0_est"
+ },
+ "lar_energy":{
+ "channels": "spms_on",
+ "aggregation_mode": "function",
+ "expression": ".modules.spm.get_energy(0.5, evt.t0, 48000, 1000, 5000)"
+ },
+ }
+ }
+
+ f_evt
+ name of the output file. If ``None``, return the output :class:`.Table`
+ instead of writing to disk.
+ wo_mode
+ writing mode.
+ evt group
+ LH5 root group name of `evt` tier.
+ tcm_group
+ LH5 root group in `tcm` file.
+ dsp_group
+ LH5 root group in `dsp` file.
+ hit_group
+ LH5 root group in `hit` file.
+ tcm_id_table_pattern
+ pattern to format `tcm` id values to table name in higher tiers. Must
+ have one placeholder which is the `tcm` id.
+ """
+
+ store = LH5Store()
+ tbl_cfg = evt_config
+ if not isinstance(tbl_cfg, (str, dict)):
+ raise TypeError()
+ if isinstance(tbl_cfg, str):
+ with open(tbl_cfg) as f:
+ tbl_cfg = json.load(f)
+
+ if "channels" not in tbl_cfg.keys():
+ raise ValueError("channel field needs to be specified in the config")
+ if "operations" not in tbl_cfg.keys():
+ raise ValueError("operations field needs to be specified in the config")
+
+ # check tcm_id_table_pattern validity
+ pattern_check = re.findall(r"{([^}]*?)}", tcm_id_table_pattern)
+ if len(pattern_check) != 1:
+ raise ValueError(
+ f"tcm_id_table_pattern must have exactly one placeholder. {tcm_id_table_pattern} is invalid."
+ )
+ elif "{" in pattern_check[0] or "}" in pattern_check[0]:
+ raise ValueError(
+ f"tcm_id_table_pattern {tcm_id_table_pattern} has an invalid placeholder."
+ )
+
+ if (
+ utils.get_table_name_by_pattern(
+ tcm_id_table_pattern,
+ utils.get_tcm_id_by_pattern(tcm_id_table_pattern, lh5.ls(f_hit)[0]),
+ )
+ != lh5.ls(f_hit)[0]
+ ):
+ raise ValueError(
+ f"tcm_id_table_pattern {tcm_id_table_pattern} does not match keys in data!"
+ )
+
+ # create channel list according to config
+ # This can be either read from the meta data
+ # or a list of channel names
+ log.debug("Creating channel dictionary")
+
+ chns = {}
+
+ for k, v in tbl_cfg["channels"].items():
+ if isinstance(v, dict):
+ # it is a meta module. module_name must exist
+ if "module" not in v.keys():
+ raise ValueError(
+ "Need module_name to load channel via a meta data module"
+ )
+
+ attr = {}
+ # the time_key argument is set to the time key of the DSP file
+ # in case it is not provided by the config
+ if "time_key" not in v.keys():
+ attr["time_key"] = re.search(r"\d{8}T\d{6}Z", f_dsp).group(0)
+
+ # if "None" do None
+ elif "None" == v["time_key"]:
+ attr["time_key"] = None
+
+ # load module
+ p, m = v["module"].rsplit(".", 1)
+ met = getattr(import_module(p, package=__package__), m)
+ chns[k] = met(v | attr)
+
+ elif isinstance(v, str):
+ chns[k] = [v]
+
+ elif isinstance(v, list):
+ chns[k] = [e for e in v]
+
+ nrows = store.read_n_rows(f"/{tcm_group}/cumulative_length", f_tcm)
+
+ table = Table(size=nrows)
+
+ for k, v in tbl_cfg["operations"].items():
+ log.debug("Processing field " + k)
+
+ # if mode not defined in operation, it can only be an operation on the evt level.
+ if "aggregation_mode" not in v.keys():
+ var = {}
+ if "parameters" in v.keys():
+ var = var | v["parameters"]
+ res = table.eval(v["expression"].replace(f"{evt_group}.", ""), var)
+
+ # add attribute if present
+ if "lgdo_attrs" in v.keys():
+ res.attrs |= v["lgdo_attrs"]
+
+ table.add_field(k, res)
+
+ # Else we build the event entry
+ else:
+ if "channels" not in v.keys():
+ chns_e = []
+ elif isinstance(v["channels"], str):
+ chns_e = chns[v["channels"]]
+ elif isinstance(v["channels"], list):
+ chns_e = list(
+ itertools.chain.from_iterable([chns[e] for e in v["channels"]])
+ )
+ chns_rm = []
+ if "exclude_channels" in v.keys():
+ if isinstance(v["exclude_channels"], str):
+ chns_rm = chns[v["exclude_channels"]]
+ elif isinstance(v["exclude_channels"], list):
+ chns_rm = list(
+ itertools.chain.from_iterable(
+ [chns[e] for e in v["exclude_channels"]]
+ )
+ )
+
+ pars, qry, defaultv, srter = None, None, np.nan, None
+ if "parameters" in v.keys():
+ pars = v["parameters"]
+ if "query" in v.keys():
+ qry = v["query"]
+ if "initial" in v.keys():
+ defaultv = v["initial"]
+ if isinstance(defaultv, str) and (
+ defaultv in ["np.nan", "np.inf", "-np.inf"]
+ ):
+ defaultv = eval(defaultv)
+ if "sort" in v.keys():
+ srter = v["sort"]
+
+ obj = evaluate_expression(
+ f_tcm=f_tcm,
+ f_hit=f_hit,
+ f_dsp=f_dsp,
+ chns=chns_e,
+ chns_rm=chns_rm,
+ mode=v["aggregation_mode"],
+ expr=v["expression"],
+ nrows=nrows,
+ table=table,
+ para=pars,
+ qry=qry,
+ defv=defaultv,
+ sorter=srter,
+ tcm_id_table_pattern=tcm_id_table_pattern,
+ evt_group=evt_group,
+ hit_group=hit_group,
+ dsp_group=dsp_group,
+ tcm_group=tcm_group,
+ )
+
+ # add attribute if present
+ if "lgdo_attrs" in v.keys():
+ obj.attrs |= v["lgdo_attrs"]
+
+ table.add_field(k, obj)
+
+ # write output fields into f_evt
+ if "outputs" in tbl_cfg.keys():
+ if len(tbl_cfg["outputs"]) < 1:
+ log.warning("No output fields specified, no file will be written.")
+ return table
+ else:
+ clms_to_remove = [e for e in table.keys() if e not in tbl_cfg["outputs"]]
+ for fld in clms_to_remove:
+ table.remove_field(fld, True)
+
+ if f_evt:
+ store.write(
+ obj=table, name=f"/{evt_group}/", lh5_file=f_evt, wo_mode=wo_mode
+ )
+ else:
+ return table
+ else:
+ log.warning("No output fields specified, no file will be written.")
+
+ key = re.search(r"\d{8}T\d{6}Z", f_hit).group(0)
+ log.info(
+ f"Applied {len(tbl_cfg['operations'])} operations to key {key} and saved "
+ f"{len(tbl_cfg['outputs'])} evt fields across {len(chns)} channel groups"
+ )
+
+
+def evaluate_expression(
+ f_tcm: str,
+ f_hit: str,
+ f_dsp: str,
+ chns: list,
+ chns_rm: list,
+ mode: str,
+ expr: str,
+ nrows: int,
+ table: Table = None,
+ para: dict = None,
+ qry: str = None,
+ defv: bool | int | float = np.nan,
+ sorter: str = None,
+ tcm_id_table_pattern: str = "ch{}",
+ evt_group: str = "evt",
+ hit_group: str = "hit",
+ dsp_group: str = "dsp",
+ tcm_group: str = "tcm",
+) -> Array | ArrayOfEqualSizedArrays | VectorOfVectors:
+ """Evaluates the expression defined by the user across all channels
+ according to the mode.
+
+ Parameters
+ ----------
+ f_tcm
+ path to `tcm` tier file.
+ f_hit
+ path to `hit` tier file.
+ f_dsp
+ path to `dsp` tier file.
+ chns
+ list of channel names across which expression gets evaluated (form:
+ ``ch``).
+ chns_rm
+ list of channels which get set to default value during evaluation. In
+ function mode they are removed entirely (form: ``ch``)
+ mode
+ The mode determines how the event entry is calculated across channels.
+ Options are:
+
+ - ``first_at:sorter``: aggregates across channels by returning the
+ expression of the channel with smallest value of sorter.
+ - ``last_at``: aggregates across channels by returning the expression of
+ the channel with largest value of sorter.
+ - ``sum``: aggregates by summation.
+ - ``any``: aggregates by logical or.
+ - ``all``: aggregates by logical and.
+ - ``keep_at_ch:ch_field``: aggregates according to passed ch_field.
+ - ``keep_at_idx:tcm_idx_field``: aggregates according to passed tcm
+ index field.
+ - ``gather``: Channels are not combined, but result saved as
+ :class:`.VectorOfVectors`.
+
+ qry
+ a query that can mask the aggregation.
+ expr
+ the expression. That can be any mathematical equation/comparison. If
+ `mode` is ``function``, the expression needs to be a special processing
+ function defined in modules (e.g. :func:`.modules.spm.get_energy`). In
+ the expression parameters from either hit, dsp, evt tier (from
+ operations performed before this one! Dictionary operations order
+ matters), or from the ``parameters`` field can be used.
+ nrows
+ number of rows to be processed.
+ table
+ table of `evt` tier data.
+ para
+ dictionary of parameters defined in the ``parameters`` field in the
+ configuration dictionary.
+ defv
+ default value of evaluation.
+ sorter
+ can be used to sort vector outputs according to sorter expression (see
+ :func:`evaluate_to_vector`).
+ tcm_id_table_pattern
+ pattern to format tcm id values to table name in higher tiers. Must have one
+ placeholder which is the `tcm` id.
+ evt group
+ LH5 root group name of `evt` tier.
+ tcm_group
+ LH5 root group in `tcm` file.
+ dsp_group
+ LH5 root group in `dsp` file.
+ hit_group
+ LH5 root group in `hit` file.
+ """
+
+ store = LH5Store()
+
+ # find parameters in evt file or in parameters
+ exprl = re.findall(
+ rf"({evt_group}|{hit_group}|{dsp_group}).([a-zA-Z_$][\w$]*)", expr
+ )
+ var_ph = {}
+ if table:
+ var_ph = var_ph | {
+ e: table[e].view_as("ak")
+ for e in table.keys()
+ if isinstance(table[e], (Array, ArrayOfEqualSizedArrays, VectorOfVectors))
+ }
+ if para:
+ var_ph = var_ph | para
+
+ if mode == "function":
+ # evaluate expression
+ func, params = expr.split("(")
+ params = (
+ params.replace(f"{dsp_group}.", f"{dsp_group}_")
+ .replace(f"{hit_group}.", f"{hit_group}_")
+ .replace(f"{evt_group}.", "")
+ )
+ params = [
+ f_hit,
+ f_dsp,
+ f_tcm,
+ hit_group,
+ dsp_group,
+ tcm_group,
+ tcm_id_table_pattern,
+ [x for x in chns if x not in chns_rm],
+ ] + [utils.num_and_pars(e, var_ph) for e in params[:-1].split(",")]
+
+ # load function dynamically
+ p, m = func.rsplit(".", 1)
+ met = getattr(import_module(p, package=__package__), m)
+ return met(*params)
+
+ else:
+ # check if query is either on channel basis or evt basis (and not a mix)
+ qry_mask = qry
+ if qry is not None:
+ if f"{evt_group}." in qry and (
+ f"{hit_group}." in qry or f"{dsp_group}." in qry
+ ):
+ raise ValueError(
+ f"Query can't be a mix of {evt_group} tier and lower tiers."
+ )
+
+ # if it is an evt query we can evaluate it directly here
+ if table and f"{evt_group}." in qry:
+ qry_mask = eval(qry.replace(f"{evt_group}.", ""), table)
+
+ # load TCM data to define an event
+ ids = store.read(f"/{tcm_group}/array_id", f_tcm)[0].view_as("np")
+ idx = store.read(f"/{tcm_group}/array_idx", f_tcm)[0].view_as("np")
+ cumulength = store.read(f"/{tcm_group}/cumulative_length", f_tcm)[0].view_as(
+ "np"
+ )
+
+ # switch through modes
+ if table and (("keep_at_ch:" == mode[:11]) or ("keep_at_idx:" == mode[:12])):
+ if "keep_at_ch:" == mode[:11]:
+ ch_comp = table[mode[11:].replace(f"{evt_group}.", "")]
+ else:
+ ch_comp = table[mode[12:].replace(f"{evt_group}.", "")]
+ if isinstance(ch_comp, Array):
+ ch_comp = Array(nda=ids[ch_comp.view_as("np")])
+ elif isinstance(ch_comp, VectorOfVectors):
+ ch_comp = ch_comp.view_as("ak")
+ ch_comp = VectorOfVectors(
+ array=ak.unflatten(
+ ids[ak.flatten(ch_comp)], ak.count(ch_comp, axis=-1)
+ )
+ )
+ else:
+ raise NotImplementedError(
+ type(ch_comp)
+ + " not supported (only Array and VectorOfVectors are supported)"
+ )
+
+ if isinstance(ch_comp, Array):
+ return aggregators.evaluate_at_channel(
+ cumulength=cumulength,
+ idx=idx,
+ ids=ids,
+ f_hit=f_hit,
+ f_dsp=f_dsp,
+ chns_rm=chns_rm,
+ expr=expr,
+ exprl=exprl,
+ ch_comp=ch_comp,
+ var_ph=var_ph,
+ defv=defv,
+ tcm_id_table_pattern=tcm_id_table_pattern,
+ evt_group=evt_group,
+ hit_group=hit_group,
+ dsp_group=dsp_group,
+ )
+ elif isinstance(ch_comp, VectorOfVectors):
+ return aggregators.evaluate_at_channel_vov(
+ cumulength=cumulength,
+ idx=idx,
+ ids=ids,
+ f_hit=f_hit,
+ f_dsp=f_dsp,
+ expr=expr,
+ exprl=exprl,
+ ch_comp=ch_comp,
+ chns_rm=chns_rm,
+ var_ph=var_ph,
+ defv=defv,
+ tcm_id_table_pattern=tcm_id_table_pattern,
+ evt_group=evt_group,
+ hit_group=hit_group,
+ dsp_group=dsp_group,
+ )
+ else:
+ raise NotImplementedError(
+ type(ch_comp)
+ + " not supported (only Array and VectorOfVectors are supported)"
+ )
+ elif "first_at:" in mode or "last_at:" in mode:
+ sorter = tuple(
+ re.findall(
+ rf"({evt_group}|{hit_group}|{dsp_group}).([a-zA-Z_$][\w$]*)",
+ mode.split("first_at:")[-1],
+ )[0]
+ )
+ return aggregators.evaluate_to_first_or_last(
+ cumulength=cumulength,
+ idx=idx,
+ ids=ids,
+ f_hit=f_hit,
+ f_dsp=f_dsp,
+ chns=chns,
+ chns_rm=chns_rm,
+ expr=expr,
+ exprl=exprl,
+ qry=qry_mask,
+ nrows=nrows,
+ sorter=sorter,
+ var_ph=var_ph,
+ defv=defv,
+ is_first=True if "first_at:" in mode else False,
+ tcm_id_table_pattern=tcm_id_table_pattern,
+ evt_group=evt_group,
+ hit_group=hit_group,
+ dsp_group=dsp_group,
+ )
+ elif mode in ["sum", "any", "all"]:
+ return aggregators.evaluate_to_scalar(
+ mode=mode,
+ cumulength=cumulength,
+ idx=idx,
+ ids=ids,
+ f_hit=f_hit,
+ f_dsp=f_dsp,
+ chns=chns,
+ chns_rm=chns_rm,
+ expr=expr,
+ exprl=exprl,
+ qry=qry_mask,
+ nrows=nrows,
+ var_ph=var_ph,
+ defv=defv,
+ tcm_id_table_pattern=tcm_id_table_pattern,
+ evt_group=evt_group,
+ hit_group=hit_group,
+ dsp_group=dsp_group,
+ )
+ elif "gather" == mode:
+ return aggregators.evaluate_to_vector(
+ cumulength=cumulength,
+ idx=idx,
+ ids=ids,
+ f_hit=f_hit,
+ f_dsp=f_dsp,
+ chns=chns,
+ chns_rm=chns_rm,
+ expr=expr,
+ exprl=exprl,
+ qry=qry_mask,
+ nrows=nrows,
+ var_ph=var_ph,
+ defv=defv,
+ sorter=sorter,
+ tcm_id_table_pattern=tcm_id_table_pattern,
+ evt_group=evt_group,
+ hit_group=hit_group,
+ dsp_group=dsp_group,
+ )
+ else:
+ raise ValueError(mode + " not a valid mode")
diff --git a/src/pygama/evt/build_tcm.py b/src/pygama/evt/build_tcm.py
index 7bb0bbef3..05c7638c4 100644
--- a/src/pygama/evt/build_tcm.py
+++ b/src/pygama/evt/build_tcm.py
@@ -2,7 +2,8 @@
import re
-import lgdo as lgdo
+import lgdo
+from lgdo import lh5
from . import tcm as ptcm
@@ -49,7 +50,7 @@ def build_tcm(
out_name
name for the TCM table in the output file.
wo_mode
- mode to send to :meth:`~.lgdo.lh5_store.LH5Store.write_object`.
+ mode to send to :meth:`~.lgdo.lh5.LH5Store.write`.
See Also
--------
@@ -57,7 +58,7 @@ def build_tcm(
"""
# hash_func: later can add list or dict or a function(str) --> int.
- store = lgdo.LH5Store()
+ store = lh5.LH5Store()
coin_data = []
array_ids = []
all_tables = []
@@ -65,7 +66,7 @@ def build_tcm(
if isinstance(patterns, str):
patterns = [patterns]
for pattern in patterns:
- tables = lgdo.ls(filename, lh5_group=pattern)
+ tables = lh5.ls(filename, lh5_group=pattern)
for table in tables:
all_tables.append(table)
array_id = len(array_ids)
@@ -79,7 +80,7 @@ def build_tcm(
else:
array_id = len(all_tables) - 1
table = table + "/" + coin_col
- coin_data.append(store.read_object(table, filename)[0].nda)
+ coin_data.append(store.read(table, filename)[0].nda)
array_ids.append(array_id)
tcm_cols = ptcm.generate_tcm_cols(
@@ -94,6 +95,6 @@ def build_tcm(
)
if out_file is not None:
- store.write_object(tcm, out_name, out_file, wo_mode=wo_mode)
+ store.write(tcm, out_name, out_file, wo_mode=wo_mode)
return tcm
diff --git a/src/pygama/evt/modules/__init__.py b/src/pygama/evt/modules/__init__.py
new file mode 100644
index 000000000..bd80462f8
--- /dev/null
+++ b/src/pygama/evt/modules/__init__.py
@@ -0,0 +1,21 @@
+"""
+Contains submodules for evt processing
+"""
+
+from .spm import (
+ get_energy,
+ get_energy_dplms,
+ get_etc,
+ get_majority,
+ get_majority_dplms,
+ get_time_shift,
+)
+
+__all__ = [
+ "get_energy",
+ "get_majority",
+ "get_energy_dplms",
+ "get_majority_dplms",
+ "get_etc",
+ "get_time_shift",
+]
diff --git a/src/pygama/evt/modules/legend.py b/src/pygama/evt/modules/legend.py
new file mode 100644
index 000000000..2ee2d7e8e
--- /dev/null
+++ b/src/pygama/evt/modules/legend.py
@@ -0,0 +1,35 @@
+"""
+Module provides LEGEND internal functions
+"""
+from importlib import import_module
+
+from lgdo.lh5 import utils
+
+
+def metadata(params: dict) -> list:
+ # only import legend meta data when needed.
+ # LEGEND collaborators can use the meta keyword
+ # While for users w/o access to the LEGEND meta data this is still working
+ lm = import_module("legendmeta")
+ lmeta = lm.LegendMetadata(path=utils.expand_path(params["meta_path"]))
+ chmap = lmeta.channelmap(params["time_key"])
+
+ tmp = [
+ f"ch{e}"
+ for e in chmap.map("daq.rawid")
+ if chmap.map("daq.rawid")[e]["system"] == params["system"]
+ ]
+
+ if "selectors" in params.keys():
+ for k in params["selectors"].keys():
+ s = ""
+ for e in k.split("."):
+ s += f"['{e}']"
+
+ tmp = [
+ e
+ for e in tmp
+ if eval("dotter" + s, {"dotter": chmap.map("daq.rawid")[int(e[2:])]})
+ == params["selectors"][k]
+ ]
+ return tmp
diff --git a/src/pygama/evt/modules/spm.py b/src/pygama/evt/modules/spm.py
new file mode 100644
index 000000000..6e7140d17
--- /dev/null
+++ b/src/pygama/evt/modules/spm.py
@@ -0,0 +1,527 @@
+"""
+Module for special event level routines for SiPMs
+
+functions must take as the first 8 args in order:
+- path to the hit file
+- path to the dsp ak.Array:
+ if isinstance(trgr, Array):
+ return ak.fill_none(ak.nan_to_none(trgr.view_as("ak")), tdefault)
+
+ elif isinstance(trgr, (VectorOfVectors)):
+ return ak.fill_none(
+ ak.min(ak.fill_none(trgr.view_as("ak"), tdefault), axis=-1), tdefault
+ )
+
+ elif isinstance(trgr, (ak.Array, ak.highlevel.Array)):
+ if trgr.ndim == 1:
+ return ak.fill_none(ak.nan_to_none(trgr), tdefault)
+ elif trgr.ndim == 2:
+ return ak.fill_none(
+ ak.min(ak.fill_none(ak.nan_to_none(trgr), tdefault), axis=-1), tdefault
+ )
+ else:
+ raise ValueError(f"Too many dimensions: {trgr.ndim}")
+ elif isinstance(trgr, (float, int)) and isinstance(length, int):
+ return ak.Array([trgr] * length)
+ else:
+ raise ValueError(f"Can't deal with t0 of type {type(trgr)}")
+
+
+# get SiPM coincidence window mask
+def get_spm_mask(
+ lim: float, trgr: ak.Array, tmin: float, tmax: float, pe: ak.Array, times: ak.Array
+) -> ak.Array:
+ if trgr.ndim != 1:
+ raise ValueError("trigger array muse be 1 dimensional!")
+ if (len(trgr) != len(pe)) or (len(trgr) != len(times)):
+ raise ValueError(
+ f"All arrays must have same dimension across first axis len(pe)={len(pe)}, len(times)={len(times)}, len(trgr)={len(trgr)}"
+ )
+
+ tmi = trgr - tmin
+ tma = trgr + tmax
+
+ mask = (
+ ((times * 16.0) < tma[:, None]) & ((times * 16.0) > tmi[:, None]) & (pe > lim)
+ )
+ return mask
+
+
+# get LAr indices according to mask per event over all channels
+# mode 0 -> return pulse indices
+# mode 1 -> return tcm indices
+# mode 2 -> return rawids
+# mode 3 -> return tcm_idx
+def get_masked_tcm_idx(
+ f_hit,
+ f_dsp,
+ f_tcm,
+ hit_group,
+ dsp_group,
+ tcm_group,
+ tcm_id_table_pattern,
+ chs,
+ lim,
+ trgr,
+ tdefault,
+ tmin,
+ tmax,
+ mode=0,
+) -> VectorOfVectors:
+ # load TCM data to define an event
+ store = LH5Store()
+ ids = store.read(f"/{tcm_group}/array_id", f_tcm)[0].view_as("np")
+ idx = store.read(f"/{tcm_group}/array_idx", f_tcm)[0].view_as("np")
+
+ arr_lst = []
+
+ if isinstance(trgr, (float, int)):
+ tge = cast_trigger(trgr, tdefault, length=np.max(idx) + 1)
+ else:
+ tge = cast_trigger(trgr, tdefault, length=None)
+
+ for ch in chs:
+ idx_ch = idx[ids == utils.get_tcm_id_by_pattern(tcm_id_table_pattern, ch)]
+
+ pe = store.read(f"{ch}/{hit_group}/energy_in_pe", f_hit, idx=idx_ch)[0].view_as(
+ "np"
+ )
+ tmp = np.full((np.max(idx) + 1, len(pe[0])), np.nan)
+ tmp[idx_ch] = pe
+ pe = ak.drop_none(ak.nan_to_none(ak.Array(tmp)))
+
+ # times are in sample units
+ times = store.read(f"{ch}/{hit_group}/trigger_pos", f_hit, idx=idx_ch)[
+ 0
+ ].view_as("np")
+ tmp = np.full((np.max(idx) + 1, len(times[0])), np.nan)
+ tmp[idx_ch] = times
+ times = ak.drop_none(ak.nan_to_none(ak.Array(tmp)))
+
+ mask = get_spm_mask(lim, tge, tmin, tmax, pe, times)
+
+ if mode == 0:
+ out_idx = ak.local_index(mask)[mask]
+
+ elif mode == 1:
+ out_idx = np.full((np.max(idx) + 1), np.nan)
+ out_idx[idx_ch] = np.where(
+ ids == utils.get_tcm_id_by_pattern(tcm_id_table_pattern, ch)
+ )[0]
+ out_idx = ak.drop_none(ak.nan_to_none(ak.Array(out_idx)[:, None]))
+ out_idx = out_idx[mask[mask] - 1]
+
+ elif mode == 2:
+ out_idx = ak.Array(
+ [utils.get_tcm_id_by_pattern(tcm_id_table_pattern, ch)] * len(mask)
+ )
+ out_idx = out_idx[:, None][mask[mask] - 1]
+
+ elif mode == 3:
+ out_idx = np.full((np.max(idx) + 1), np.nan)
+ out_idx[idx_ch] = idx_ch
+ out_idx = ak.drop_none(ak.nan_to_none(ak.Array(out_idx)[:, None]))
+ out_idx = out_idx[mask[mask] - 1]
+
+ else:
+ raise ValueError("Unknown mode")
+
+ arr_lst.append(out_idx)
+
+ return VectorOfVectors(array=ak.concatenate(arr_lst, axis=-1))
+
+
+def get_spm_ene_or_maj(
+ f_hit,
+ f_tcm,
+ hit_group,
+ tcm_group,
+ tcm_id_table_pattern,
+ chs,
+ lim,
+ trgr,
+ tdefault,
+ tmin,
+ tmax,
+ mode,
+):
+ if mode not in ["energy_hc", "energy_dplms", "majority_hc", "majority_dplms"]:
+ raise ValueError("Unknown mode")
+
+ # load TCM data to define an event
+ store = LH5Store()
+ ids = store.read(f"/{tcm_group}/array_id", f_tcm)[0].view_as("np")
+ idx = store.read(f"/{tcm_group}/array_idx", f_tcm)[0].view_as("np")
+ out = np.zeros(np.max(idx) + 1)
+
+ if isinstance(trgr, (float, int)):
+ tge = cast_trigger(trgr, tdefault, length=np.max(idx) + 1)
+ else:
+ tge = cast_trigger(trgr, tdefault, length=None)
+
+ for ch in chs:
+ idx_ch = idx[ids == utils.get_tcm_id_by_pattern(tcm_id_table_pattern, ch)]
+
+ if mode in ["energy_dplms", "majority_dplms"]:
+ pe = ak.drop_none(
+ ak.nan_to_none(
+ store.read(
+ f"{ch}/{hit_group}/energy_in_pe_dplms", f_hit, idx=idx_ch
+ )[0].view_as("ak")
+ )
+ )
+
+ # times are in sample units
+ times = ak.drop_none(
+ ak.nan_to_none(
+ store.read(
+ f"{ch}/{hit_group}/trigger_pos_dplms", f_hit, idx=idx_ch
+ )[0].view_as("ak")
+ )
+ )
+
+ else:
+ pe = ak.drop_none(
+ ak.nan_to_none(
+ store.read(f"{ch}/{hit_group}/energy_in_pe", f_hit, idx=idx_ch)[
+ 0
+ ].view_as("ak")
+ )
+ )
+
+ # times are in sample units
+ times = ak.drop_none(
+ ak.nan_to_none(
+ store.read(f"{ch}/{hit_group}/trigger_pos", f_hit, idx=idx_ch)[
+ 0
+ ].view_as("ak")
+ )
+ )
+
+ mask = get_spm_mask(lim, tge[idx_ch], tmin, tmax, pe, times)
+ pe = pe[mask]
+
+ if mode in ["energy_hc", "energy_dplms"]:
+ out[idx_ch] = out[idx_ch] + ak.to_numpy(ak.nansum(pe, axis=-1))
+
+ else:
+ out[idx_ch] = out[idx_ch] + ak.to_numpy(
+ ak.where(ak.nansum(pe, axis=-1) > lim, 1, 0)
+ )
+
+ return Array(nda=out)
+
+
+# get LAr energy per event over all channels
+def get_energy(
+ f_hit,
+ f_dsp,
+ f_tcm,
+ hit_group,
+ dsp_group,
+ tcm_group,
+ tcm_id_table_pattern,
+ chs,
+ lim,
+ trgr,
+ tdefault,
+ tmin,
+ tmax,
+) -> Array:
+ return get_spm_ene_or_maj(
+ f_hit,
+ f_tcm,
+ hit_group,
+ tcm_group,
+ tcm_id_table_pattern,
+ chs,
+ lim,
+ trgr,
+ tdefault,
+ tmin,
+ tmax,
+ "energy_hc",
+ )
+
+
+# get LAr majority per event over all channels
+def get_majority(
+ f_hit,
+ f_dsp,
+ f_tcm,
+ hit_group,
+ dsp_group,
+ tcm_group,
+ tcm_id_table_pattern,
+ chs,
+ lim,
+ trgr,
+ tdefault,
+ tmin,
+ tmax,
+) -> Array:
+ return get_spm_ene_or_maj(
+ f_hit,
+ f_tcm,
+ hit_group,
+ tcm_group,
+ tcm_id_table_pattern,
+ chs,
+ lim,
+ trgr,
+ tdefault,
+ tmin,
+ tmax,
+ "majority_hc",
+ )
+
+
+# get LAr energy per event over all channels
+def get_energy_dplms(
+ f_hit,
+ f_dsp,
+ f_tcm,
+ hit_group,
+ dsp_group,
+ tcm_group,
+ tcm_id_table_pattern,
+ chs,
+ lim,
+ trgr,
+ tdefault,
+ tmin,
+ tmax,
+) -> Array:
+ return get_spm_ene_or_maj(
+ f_hit,
+ f_tcm,
+ hit_group,
+ tcm_group,
+ tcm_id_table_pattern,
+ chs,
+ lim,
+ trgr,
+ tdefault,
+ tmin,
+ tmax,
+ "energy_dplms",
+ )
+
+
+# get LAr majority per event over all channels
+def get_majority_dplms(
+ f_hit,
+ f_dsp,
+ f_tcm,
+ hit_group,
+ dsp_group,
+ tcm_group,
+ tcm_id_table_pattern,
+ chs,
+ lim,
+ trgr,
+ tdefault,
+ tmin,
+ tmax,
+) -> Array:
+ return get_spm_ene_or_maj(
+ f_hit,
+ f_tcm,
+ hit_group,
+ tcm_group,
+ tcm_id_table_pattern,
+ chs,
+ lim,
+ trgr,
+ tdefault,
+ tmin,
+ tmax,
+ "majority_dplms",
+ )
+
+
+# Calculate the ETC in different trailing modes:
+# trail = 0: Singlet window = [tge,tge+swin]
+# trail = 1: Singlet window = [t_first_lar_pulse, t_first_lar_pulse+ swin]
+# trail = 2: Like trail = 1, but t_first_lar_pulse <= tge is ensured
+# min_first_pls_ene sets the minimum energy of the first pulse (only used in trail > 0)
+# max_per_channel, maximum number of pes a channel is allowed to have, if above it gets excluded
+def get_etc(
+ f_hit,
+ f_dsp,
+ f_tcm,
+ hit_group,
+ dsp_group,
+ tcm_group,
+ tcm_id_table_pattern,
+ chs,
+ lim,
+ trgr,
+ tdefault,
+ tmin,
+ tmax,
+ swin,
+ trail,
+ min_first_pls_ene,
+ max_per_channel,
+) -> Array:
+ # load TCM data to define an event
+ store = LH5Store()
+ ids = store.read(f"/{tcm_group}/array_id", f_tcm)[0].view_as("np")
+ idx = store.read(f"/{tcm_group}/array_idx", f_tcm)[0].view_as("np")
+ pe_lst = []
+ time_lst = []
+
+ if isinstance(trgr, (float, int)):
+ tge = cast_trigger(trgr, tdefault, length=np.max(idx) + 1)
+ else:
+ tge = cast_trigger(trgr, tdefault, length=None)
+
+ for ch in chs:
+ idx_ch = idx[ids == utils.get_tcm_id_by_pattern(tcm_id_table_pattern, ch)]
+
+ pe = store.read(f"{ch}/{hit_group}/energy_in_pe", f_hit, idx=idx_ch)[0].view_as(
+ "np"
+ )
+ tmp = np.full((np.max(idx) + 1, len(pe[0])), np.nan)
+ tmp[idx_ch] = pe
+ pe = ak.drop_none(ak.nan_to_none(ak.Array(tmp)))
+
+ # times are in sample units
+ times = store.read(f"{ch}/{hit_group}/trigger_pos", f_hit, idx=idx_ch)[
+ 0
+ ].view_as("np")
+ tmp = np.full((np.max(idx) + 1, len(times[0])), np.nan)
+ tmp[idx_ch] = times
+ times = ak.drop_none(ak.nan_to_none(ak.Array(tmp)))
+
+ mask = get_spm_mask(lim, tge, tmin, tmax, pe, times)
+
+ pe = pe[mask]
+
+ # max pe mask
+ max_pe_mask = ak.nansum(pe, axis=-1) < max_per_channel
+ pe = ak.drop_none(
+ ak.nan_to_none(ak.where(max_pe_mask, pe, ak.Array([[np.nan]])))
+ )
+ pe_lst.append(pe)
+
+ times = times[mask] * 16
+ times = ak.drop_none(
+ ak.nan_to_none(ak.where(max_pe_mask, times, ak.Array([[np.nan]])))
+ )
+ time_lst.append(times)
+
+ pe_all = ak.concatenate(pe_lst, axis=-1)
+ time_all = ak.concatenate(time_lst, axis=-1)
+
+ if trail > 0:
+ t1d = ak.min(time_all[pe_all > min_first_pls_ene], axis=-1)
+
+ if trail == 2:
+ t1d = ak.where(t1d > tge, tge, t1d)
+
+ mask_total = time_all > t1d
+ mask_singlet = (time_all > t1d) & (time_all < t1d + swin)
+
+ else:
+ mask_total = time_all > tge
+ mask_singlet = (time_all > tge) & (time_all < tge + swin)
+
+ pe_singlet = ak.to_numpy(
+ ak.fill_none(ak.nansum(pe_all[mask_singlet], axis=-1), 0), allow_missing=False
+ )
+ pe_total = ak.to_numpy(
+ ak.fill_none(ak.nansum(pe_all[mask_total], axis=-1), 0), allow_missing=False
+ )
+ etc = np.divide(
+ pe_singlet, pe_total, out=np.full_like(pe_total, np.nan), where=pe_total != 0
+ )
+
+ return Array(nda=etc)
+
+
+# returns relative time shift of the first LAr pulse relative to the Ge trigger
+def get_time_shift(
+ f_hit,
+ f_dsp,
+ f_tcm,
+ hit_group,
+ dsp_group,
+ tcm_group,
+ tcm_id_table_pattern,
+ chs,
+ lim,
+ trgr,
+ tdefault,
+ tmin,
+ tmax,
+) -> Array:
+ store = LH5Store()
+ # load TCM data to define an event
+ ids = store.read(f"/{tcm_group}/array_id", f_tcm)[0].view_as("np")
+ idx = store.read(f"/{tcm_group}/array_idx", f_tcm)[0].view_as("np")
+ time_all = ak.Array([[] for x in range(np.max(idx) + 1)])
+
+ if isinstance(trgr, (float, int)):
+ tge = cast_trigger(trgr, tdefault, length=np.max(idx) + 1)
+ else:
+ tge = cast_trigger(trgr, tdefault, length=None)
+
+ for ch in chs:
+ idx_ch = idx[ids == utils.get_tcm_id_by_pattern(tcm_id_table_pattern, ch)]
+
+ pe = store.read(f"{ch}/{hit_group}/energy_in_pe", f_hit, idx=idx_ch)[0].view_as(
+ "np"
+ )
+ tmp = np.full((np.max(idx) + 1, len(pe[0])), np.nan)
+ tmp[idx_ch] = pe
+ pe = ak.drop_none(ak.nan_to_none(ak.Array(tmp)))
+
+ # times are in sample units
+ times = store.read(f"{ch}/{hit_group}/trigger_pos", f_hit, idx=idx_ch)[
+ 0
+ ].view_as("np")
+ tmp = np.full((np.max(idx) + 1, len(times[0])), np.nan)
+ tmp[idx_ch] = times
+ times = ak.drop_none(ak.nan_to_none(ak.Array(tmp)))
+
+ mask = get_spm_mask(lim, tge, tmin, tmax, pe, times)
+
+ # apply mask and convert sample units to ns
+ times = times[mask] * 16
+
+ time_all = ak.concatenate((time_all, times), axis=-1)
+
+ out = ak.min(time_all, axis=-1)
+
+ # Convert to 1D numpy array
+ out = ak.to_numpy(ak.fill_none(out, np.inf), allow_missing=False)
+ tge = ak.to_numpy(tge, allow_missing=False)
+
+ return Array(out - tge)
diff --git a/src/pygama/evt/tcm.py b/src/pygama/evt/tcm.py
index fea54d167..ad87d7a47 100644
--- a/src/pygama/evt/tcm.py
+++ b/src/pygama/evt/tcm.py
@@ -81,10 +81,10 @@ def generate_tcm_cols(
for ii, array in enumerate(coin_data):
array = np.array(array)
array_id = array_ids[ii] if array_ids is not None else ii
- array_id = np.full_like(array, array_id)
+ array_id = np.full_like(array, array_id, dtype=int)
col_dict = {"array_id": array_id, "coin_data": array}
if array_idxs is not None:
- col_dict["array_idx"] = array_idxs[ii]
+ col_dict["array_idx"] = array_idxs.astype(int)[ii]
dfs.append(pd.DataFrame(col_dict, copy=False)) # don't copy the data!
# concat and sort
diff --git a/src/pygama/evt/utils.py b/src/pygama/evt/utils.py
new file mode 100644
index 000000000..175cd868a
--- /dev/null
+++ b/src/pygama/evt/utils.py
@@ -0,0 +1,282 @@
+"""
+This module provides utilities to build the `evt` tier.
+"""
+
+from __future__ import annotations
+
+import re
+
+import awkward as ak
+import numpy as np
+from lgdo.lh5 import LH5Store
+from numpy.typing import NDArray
+
+
+def get_tcm_id_by_pattern(tcm_id_table_pattern: str, ch: str) -> int:
+ pre = tcm_id_table_pattern.split("{")[0]
+ post = tcm_id_table_pattern.split("}")[1]
+ return int(ch.strip(pre).strip(post))
+
+
+def get_table_name_by_pattern(tcm_id_table_pattern: str, ch_id: int) -> str:
+ # check tcm_id_table_pattern validity
+ pattern_check = re.findall(r"{([^}]*?)}", tcm_id_table_pattern)[0]
+ if pattern_check == "" or ":" == pattern_check[0]:
+ return tcm_id_table_pattern.format(ch_id)
+ else:
+ raise NotImplementedError(
+ "Only empty placeholders with format specifications are currently implemented"
+ )
+
+
+def num_and_pars(value: str, par_dic: dict):
+ # function tries to convert a string to a int, float, bool
+ # or returns the value if value is a key in par_dic
+ if value in par_dic.keys():
+ return par_dic[value]
+ try:
+ value = int(value)
+ except ValueError:
+ try:
+ value = float(value)
+ except ValueError:
+ try:
+ value = bool(value)
+ except ValueError:
+ pass
+ return value
+
+
+def find_parameters(
+ f_hit: str,
+ f_dsp: str,
+ ch: str,
+ idx_ch: NDArray,
+ exprl: list,
+ hit_group: str = "hit",
+ dsp_group: str = "dsp",
+) -> dict:
+ """Wraps :func:`load_vars_to_nda` to return parameters from `hit` and `dsp`
+ tiers.
+
+ Parameters
+ ----------
+ f_hit
+ path to `hit` tier file.
+ f_dsp
+ path to `dsp` tier file.
+ ch
+ "rawid" in the tiers.
+ idx_ch
+ index array of entries to be read from files.
+ exprl
+ list of tuples ``(tier, field)`` to be found in the `hit/dsp` tiers.
+ dsp_group
+ LH5 root group in dsp file.
+ hit_group
+ LH5 root group in hit file.
+ """
+
+ # find fields in either dsp, hit
+ dsp_flds = [e[1] for e in exprl if e[0] == dsp_group]
+ hit_flds = [e[1] for e in exprl if e[0] == hit_group]
+
+ store = LH5Store()
+ hit_dict, dsp_dict = {}, {}
+ if len(hit_flds) > 0:
+ hit_ak = store.read(
+ f"{ch.replace('/','')}/{hit_group}/", f_hit, field_mask=hit_flds, idx=idx_ch
+ )[0].view_as("ak")
+ hit_dict = dict(
+ zip([f"{hit_group}_" + e for e in ak.fields(hit_ak)], ak.unzip(hit_ak))
+ )
+ if len(dsp_flds) > 0:
+ dsp_ak = store.read(
+ f"{ch.replace('/','')}/{dsp_group}/", f_dsp, field_mask=dsp_flds, idx=idx_ch
+ )[0].view_as("ak")
+ dsp_dict = dict(
+ zip([f"{dsp_group}_" + e for e in ak.fields(dsp_ak)], ak.unzip(dsp_ak))
+ )
+
+ return hit_dict | dsp_dict
+
+
+def get_data_at_channel(
+ ch: str,
+ ids: NDArray,
+ idx: NDArray,
+ expr: str,
+ exprl: list,
+ var_ph: dict,
+ is_evaluated: bool,
+ f_hit: str,
+ f_dsp: str,
+ defv,
+ tcm_id_table_pattern: str = "ch{}",
+ evt_group: str = "evt",
+ hit_group: str = "hit",
+ dsp_group: str = "dsp",
+) -> np.ndarray:
+ """Evaluates an expression and returns the result.
+
+ Parameters
+ ----------
+ ch
+ "rawid" of channel to be evaluated.
+ idx
+ `tcm` index array.
+ ids
+ `tcm` id array.
+ expr
+ expression to be evaluated.
+ exprl
+ list of parameter-tuples ``(root_group, field)`` found in the expression.
+ var_ph
+ dict of additional parameters that are not channel dependent.
+ is_evaluated
+ if false, the expression does not get evaluated but an array of default
+ values is returned.
+ f_hit
+ path to `hit` tier file.
+ f_dsp
+ path to `dsp` tier file.
+ defv
+ default value.
+ tcm_id_table_pattern
+ Pattern to format tcm id values to table name in higher tiers. Must have one
+ placeholder which is the tcm id.
+ dsp_group
+ LH5 root group in dsp file.
+ hit_group
+ LH5 root group in hit file.
+ evt_group
+ LH5 root group in evt file.
+ """
+
+ # get index list for this channel to be loaded
+ idx_ch = idx[ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch)]
+ outsize = len(idx_ch)
+
+ if not is_evaluated:
+ res = np.full(outsize, defv, dtype=type(defv))
+ elif "tcm.array_id" == expr:
+ res = np.full(
+ outsize, get_tcm_id_by_pattern(tcm_id_table_pattern, ch), dtype=int
+ )
+ elif "tcm.index" == expr:
+ res = np.where(ids == get_tcm_id_by_pattern(tcm_id_table_pattern, ch))[0]
+ else:
+ var = find_parameters(
+ f_hit=f_hit,
+ f_dsp=f_dsp,
+ ch=ch,
+ idx_ch=idx_ch,
+ exprl=exprl,
+ hit_group=hit_group,
+ dsp_group=dsp_group,
+ )
+
+ if var_ph is not None:
+ var = var | var_ph
+
+ # evaluate expression
+ # move tier+dots in expression to underscores (e.g. evt.foo -> evt_foo)
+ res = eval(
+ expr.replace(f"{dsp_group}.", f"{dsp_group}_")
+ .replace(f"{hit_group}.", f"{hit_group}_")
+ .replace(f"{evt_group}.", ""),
+ var,
+ )
+
+ # in case the expression evaluates to a single value blow it up
+ if (not hasattr(res, "__len__")) or (isinstance(res, str)):
+ return np.full(outsize, res)
+
+ # the resulting arrays need to be 1D from the operation,
+ # this can only change once we support larger than two dimensional LGDOs
+ # ak.to_numpy() raises error if array not regular
+ res = ak.to_numpy(res, allow_missing=False)
+
+ # in this method only 1D values are allowed
+ if res.ndim > 1:
+ raise ValueError(
+ f"expression '{expr}' must return 1D array. If you are using VectorOfVectors or ArrayOfEqualSizedArrays, use awkward reduction functions to reduce the dimension"
+ )
+
+ return res
+
+
+def get_mask_from_query(
+ qry: str | NDArray,
+ length: int,
+ ch: str,
+ idx_ch: NDArray,
+ f_hit: str,
+ f_dsp: str,
+ hit_group: str = "hit",
+ dsp_group: str = "dsp",
+) -> np.ndarray:
+ """Evaluates a query expression and returns a mask accordingly.
+
+ Parameters
+ ----------
+ qry
+ query expression.
+ length
+ length of the return mask.
+ ch
+ "rawid" of channel to be evaluated.
+ idx_ch
+ channel indices to be read.
+ f_hit
+ path to `hit` tier file.
+ f_dsp
+ path to `dsp` tier file.
+ hit_group
+ LH5 root group in hit file.
+ dsp_group
+ LH5 root group in dsp file.
+ """
+
+ # get sub evt based query condition if needed
+ if isinstance(qry, str):
+ qry_lst = re.findall(r"(hit|dsp).([a-zA-Z_$][\w$]*)", qry)
+ qry_var = find_parameters(
+ f_hit=f_hit,
+ f_dsp=f_dsp,
+ ch=ch,
+ idx_ch=idx_ch,
+ exprl=qry_lst,
+ hit_group=hit_group,
+ dsp_group=dsp_group,
+ )
+ limarr = eval(
+ qry.replace(f"{dsp_group}.", f"{dsp_group}_").replace(
+ f"{hit_group}.", f"{hit_group}_"
+ ),
+ qry_var,
+ )
+
+ # in case the expression evaluates to a single value blow it up
+ if (not hasattr(limarr, "__len__")) or (isinstance(limarr, str)):
+ return np.full(len(idx_ch), limarr)
+
+ limarr = ak.to_numpy(limarr, allow_missing=False)
+ if limarr.ndim > 1:
+ raise ValueError(
+ f"query '{qry}' must return 1D array. If you are using VectorOfVectors or ArrayOfEqualSizedArrays, use awkward reduction functions to reduce the dimension"
+ )
+
+ # or forward the array
+ elif isinstance(qry, np.ndarray):
+ limarr = qry
+
+ # if no condition, it must be true
+ else:
+ limarr = np.ones(length).astype(bool)
+
+ # explicit cast to bool
+ if limarr.dtype != bool:
+ limarr = limarr.astype(bool)
+
+ return limarr
diff --git a/src/pygama/flow/data_loader.py b/src/pygama/flow/data_loader.py
index ab4ed5150..7e5c38616 100644
--- a/src/pygama/flow/data_loader.py
+++ b/src/pygama/flow/data_loader.py
@@ -14,7 +14,9 @@
import numpy as np
import pandas as pd
from dspeed.vis import WaveformBrowser
-from lgdo import Array, LH5Iterator, LH5Store, Struct, Table, lgdo_utils
+from lgdo.lh5 import LH5Iterator, LH5Store
+from lgdo.lh5.utils import expand_vars
+from lgdo.types import Array, Struct, Table
from lgdo.types.vectorofvectors import build_cl, explode_arrays, explode_cl
from tqdm.auto import tqdm
@@ -193,9 +195,7 @@ def set_config(self, config: dict | str) -> None:
# look for info in configuration if FileDB is not set
if self.filedb is None:
# expand $_ variables
- value = lgdo_utils.expand_vars(
- config["filedb"], substitute={"_": config_dir}
- )
+ value = expand_vars(config["filedb"], substitute={"_": config_dir})
self.filedb = FileDB(value)
if not os.path.isdir(self.filedb.data_dir):
@@ -505,6 +505,8 @@ def build_entry_list(
# Find out which columns are needed for any cuts
cut_cols = {}
+ # ... and pre-load which tiers need to be loaded to make the cuts
+ col_tiers_dict = {}
for level in [child, parent]:
cut_cols[level] = []
@@ -527,6 +529,9 @@ def build_entry_list(
and save_output_columns
):
for_output.append(term)
+ col_tiers_dict[level] = self.get_tiers_for_col(
+ cut_cols[level], merge_files=False
+ )
if save_output_columns:
entry_cols += for_output
@@ -579,17 +584,17 @@ def build_entry_list(
tcm_table_name = self.filedb.get_table_name(tcm_tier, tcm_tb)
try:
- tcm_lgdo, _ = sto.read_object(tcm_table_name, tcm_path)
+ tcm_lgdo, _ = sto.read(tcm_table_name, tcm_path)
except KeyError:
log.warning(f"Cannot find table {tcm_table_name} in file {tcm_path}")
continue
- # Have to do some hacky stuff until I get a get_dataframe() method
+ # Have to do some hacky stuff until I get a view_as("pd") method
tcm_lgdo[self.tcms[tcm_level]["tcm_cols"]["child_idx"]] = Array(
nda=explode_cl(tcm_lgdo["cumulative_length"].nda)
)
tcm_lgdo.pop("cumulative_length")
tcm_tb = Table(col_dict=tcm_lgdo)
- f_entries = tcm_tb.get_dataframe()
+ f_entries = tcm_tb.view_as("pd")
renaming = {
self.tcms[tcm_level]["tcm_cols"]["child_idx"]: f"{child}_idx",
self.tcms[tcm_level]["tcm_cols"]["parent_tb"]: f"{parent}_table",
@@ -611,7 +616,7 @@ def build_entry_list(
if level in self.cuts.keys():
cut = self.cuts[level]
- col_tiers = self.get_tiers_for_col(cut_cols[level], merge_files=False)
+ col_tiers = col_tiers_dict[level]
# Tables in first tier of event should be the same for all tiers in one level
tables = self.filedb.df.loc[file, f"{self.tiers[level][0]}_tables"]
@@ -644,7 +649,7 @@ def build_entry_list(
if tb in col_tiers[file]["tables"][tier]:
table_name = self.filedb.get_table_name(tier, tb)
try:
- tier_table, _ = sto.read_object(
+ tier_table, _ = sto.read(
table_name,
tier_path,
field_mask=cut_cols[level],
@@ -661,7 +666,7 @@ def build_entry_list(
tb_table.join(tier_table)
if tb_table is None:
continue
- tb_df = tb_table.get_dataframe()
+ tb_df = tb_table.view_as("pd")
tb_df.query(cut, inplace=True)
idx_match = f_entries.query(f"{level}_idx in {list(tb_df.index)}")
if level == parent:
@@ -703,11 +708,9 @@ def build_entry_list(
f_dict = f_entries.to_dict("list")
f_struct = Struct(f_dict)
if self.merge_files:
- sto.write_object(f_struct, "entries", output_file, wo_mode="a")
+ sto.write(f_struct, "entries", output_file, wo_mode="a")
else:
- sto.write_object(
- f_struct, f"entries/{file}", output_file, wo_mode="a"
- )
+ sto.write(f_struct, f"entries/{file}", output_file, wo_mode="a")
if log.getEffectiveLevel() >= logging.INFO:
progress_bar.close()
@@ -857,7 +860,7 @@ def build_hit_entries(
# load the data from the tier file, just the columns needed for the cut
table_name = self.filedb.get_table_name(tier, tb)
try:
- tier_tb, _ = sto.read_object(
+ tier_tb, _ = sto.read(
table_name, tier_path, field_mask=cut_cols
)
except KeyError:
@@ -865,7 +868,7 @@ def build_hit_entries(
f"Cannot find {table_name} in file {tier_path}"
)
continue
- # join eveything in one table
+ # join everything in one table
if tb_table is None:
tb_table = tier_tb
else:
@@ -875,7 +878,7 @@ def build_hit_entries(
continue
# convert to DataFrame and apply cuts
- tb_df = tb_table.get_dataframe()
+ tb_df = tb_table.view_as("pd")
tb_df.query(cut, inplace=True)
tb_df[f"{low_level}_table"] = tb
tb_df[f"{low_level}_idx"] = tb_df.index
@@ -897,11 +900,9 @@ def build_hit_entries(
f_dict = f_entries.to_dict("list")
f_struct = Struct(f_dict)
if self.merge_files:
- sto.write_object(f_struct, "entries", output_file, wo_mode="a")
+ sto.write(f_struct, "entries", output_file, wo_mode="a")
else:
- sto.write_object(
- f_struct, f"entries/{file}", output_file, wo_mode="a"
- )
+ sto.write(f_struct, f"entries/{file}", output_file, wo_mode="a")
if log.getEffectiveLevel() >= logging.INFO:
progress_bar.close()
@@ -1112,7 +1113,7 @@ def explode_evt_cols(el: pd.DataFrame, tier_table: Table):
for file in files
]
- tier_table, _ = sto.read_object(
+ tier_table, _ = sto.read(
name=tb_name,
lh5_file=tier_paths,
idx=idx_mask,
@@ -1138,12 +1139,12 @@ def explode_evt_cols(el: pd.DataFrame, tier_table: Table):
f_table = utils.dict_to_table(col_dict=col_dict, attr_dict=attr_dict)
if output_file:
- sto.write_object(f_table, "merged_data", output_file, wo_mode="o")
+ sto.write(f_table, "merged_data", output_file, wo_mode="o")
if in_memory:
if self.output_format == "lgdo.Table":
return f_table
elif self.output_format == "pd.DataFrame":
- return f_table.get_dataframe()
+ return f_table.view_as("pd")
else:
raise ValueError(
f"'{self.output_format}' output format not supported"
@@ -1215,7 +1216,7 @@ def explode_evt_cols(el: pd.DataFrame, tier_table: Table):
raise FileNotFoundError(tier_path)
table_name = self.filedb.get_table_name(tier, tb)
- tier_table, _ = sto.read_object(
+ tier_table, _ = sto.read(
table_name,
tier_path,
idx=idx_mask,
@@ -1241,7 +1242,7 @@ def explode_evt_cols(el: pd.DataFrame, tier_table: Table):
if in_memory:
load_out.add_field(name=file, obj=f_table)
if output_file:
- sto.write_object(f_table, f"{file}", output_file, wo_mode="o")
+ sto.write(f_table, f"{file}", output_file, wo_mode="o")
# end file loop
if log.getEffectiveLevel() >= logging.INFO:
@@ -1254,7 +1255,7 @@ def explode_evt_cols(el: pd.DataFrame, tier_table: Table):
return load_out
elif self.output_format == "pd.DataFrame":
for file in load_out.keys():
- load_out[file] = load_out[file].get_dataframe()
+ load_out[file] = load_out[file].view_as("pd")
return load_out
else:
raise ValueError(
@@ -1313,7 +1314,7 @@ def load_evts(
)
if os.path.exists(tier_path):
table_name = self.filedb.get_table_name(tier, tb)
- tier_table, _ = sto.read_object(
+ tier_table, _ = sto.read(
table_name,
tier_path,
idx=idx_mask,
@@ -1327,7 +1328,7 @@ def load_evts(
if in_memory:
load_out[file] = f_table
if output_file:
- sto.write_object(f_table, f"file{file}", output_file, wo_mode="o")
+ sto.write(f_table, f"file{file}", output_file, wo_mode="o")
# end file loop
if in_memory:
@@ -1335,7 +1336,7 @@ def load_evts(
return load_out
elif self.output_format == "pd.DataFrame":
for file in load_out.keys():
- load_out[file] = load_out[file].get_dataframe()
+ load_out[file] = load_out[file].view_as("pd")
return load_out
else:
raise ValueError(
diff --git a/src/pygama/flow/file_db.py b/src/pygama/flow/file_db.py
index c64e6b786..fdca65b2d 100644
--- a/src/pygama/flow/file_db.py
+++ b/src/pygama/flow/file_db.py
@@ -9,11 +9,12 @@
import warnings
import h5py
-import lgdo
import numpy as np
import pandas as pd
-from lgdo import Array, Scalar, VectorOfVectors
-from lgdo import lh5_store as lh5
+from lgdo.lh5 import ls
+from lgdo.lh5.store import LH5Store
+from lgdo.lh5.utils import expand_path, expand_vars
+from lgdo.types import Array, Scalar, VectorOfVectors
from parse import parse
from . import utils
@@ -185,14 +186,12 @@ def set_config(self, config: dict, config_path: str = None) -> None:
if config_path is not None:
subst_vars["_"] = os.path.dirname(str(config_path))
- data_dir = lgdo.lgdo_utils.expand_path(
- self.config["data_dir"], substitute=subst_vars
- )
+ data_dir = expand_path(self.config["data_dir"], substitute=subst_vars)
self.data_dir = data_dir
tier_dirs = self.config["tier_dirs"]
for k, val in tier_dirs.items():
- tier_dirs[k] = lgdo.lgdo_utils.expand_vars(val, substitute=subst_vars)
+ tier_dirs[k] = expand_vars(val, substitute=subst_vars)
self.tier_dirs = tier_dirs
def scan_files(self, dirs: list[str] = None) -> None:
@@ -274,7 +273,10 @@ def scan_files(self, dirs: list[str] = None) -> None:
# convert cols to numeric dtypes where possible
for col in self.df.columns:
- self.df[col] = pd.to_numeric(self.df[col], errors="ignore")
+ try:
+ self.df[col] = pd.to_numeric(self.df[col])
+ except ValueError:
+ continue
# sort rows according to timestamps
utils.inplace_sort(self.df, self.sortby)
@@ -407,7 +409,7 @@ def update_tables_cols(row, tier: str, utc_cache: dict = None) -> pd.Series:
)
# TODO this call here is really expensive!
- groups = lh5.ls(f, wildcard)
+ groups = ls(f, wildcard)
if len(groups) > 0 and parse(template, groups[0]) is None:
log.warning(f"groups in {fpath} don't match template")
else:
@@ -431,7 +433,7 @@ def update_tables_cols(row, tier: str, utc_cache: dict = None) -> pd.Series:
table_name = template
try:
- col = lh5.ls(f[table_name])
+ col = ls(f[table_name])
except KeyError:
log.warning(f"cannot find '{table_name}' in {fpath}")
continue
@@ -477,8 +479,8 @@ def update_tables_cols(row, tier: str, utc_cache: dict = None) -> pd.Series:
columns_vov = VectorOfVectors(
flattened_data=flattened, cumulative_length=length
)
- sto = lh5.LH5Store()
- sto.write_object(columns_vov, "unique_columns", to_file)
+ sto = LH5Store()
+ sto.write(columns_vov, "unique_columns", to_file)
return self.columns
@@ -501,12 +503,12 @@ def from_disk(self, path: str | list[str]) -> None:
# expand wildcards
paths = []
for p in path:
- paths += lgdo.lgdo_utils.expand_path(p, list=True)
+ paths += expand_path(p, list=True)
if not paths:
raise FileNotFoundError(path)
- sto = lh5.LH5Store()
+ sto = LH5Store()
# objects/accumulators that will be used to configure the FileDB at the end
_cfg = None
_df = None
@@ -528,7 +530,7 @@ def _replace_idx(row, trans, tier):
# loop over the files
for p in paths:
- cfg, _ = sto.read_object("config", p)
+ cfg, _ = sto.read("config", p)
cfg = json.loads(cfg.value.decode())
# make sure configurations are all the same
@@ -540,7 +542,7 @@ def _replace_idx(row, trans, tier):
)
# read in unique columns
- vov, _ = sto.read_object("columns", p)
+ vov, _ = sto.read("columns", p)
# Convert back from VoV of UTF-8 bytestrings to a list of lists of strings
columns = [[v.decode("utf-8") for v in ov] for ov in list(vov)]
@@ -599,14 +601,12 @@ def to_disk(self, filename: str, wo_mode="write_safe") -> None:
filename
output LH5 file name.
wo_mode
- passed to :meth:`~.lgdo.lh5_store.write_object`.
+ passed to :meth:`~.lgdo.lh5.write`.
"""
log.debug(f"writing database to {filename}")
- sto = lh5.LH5Store()
- sto.write_object(
- Scalar(json.dumps(self.config)), "config", filename, wo_mode=wo_mode
- )
+ sto = LH5Store()
+ sto.write(Scalar(json.dumps(self.config)), "config", filename, wo_mode=wo_mode)
if wo_mode in ["write_safe", "w", "overwrite_file", "of"]:
wo_mode = "a"
@@ -623,7 +623,7 @@ def to_disk(self, filename: str, wo_mode="write_safe") -> None:
flattened_data=Array(nda=np.array(flat).astype("S")),
cumulative_length=Array(nda=np.array(cum_l)),
)
- sto.write_object(col_vov, "columns", filename, wo_mode=wo_mode)
+ sto.write(col_vov, "columns", filename, wo_mode=wo_mode)
# FIXME: to_hdf() throws this:
#
@@ -673,7 +673,10 @@ def scan_daq_files(self, daq_dir: str, daq_template: str) -> None:
# convert cols to numeric dtypes where possible
for col in self.df.columns:
- self.df[col] = pd.to_numeric(self.df[col], errors="ignore")
+ try:
+ self.df[col] = pd.to_numeric(self.df[col])
+ except ValueError:
+ continue
def get_table_name(self, tier: str, tb: str) -> str:
"""Get the table name for a tier given its table identifier.
diff --git a/src/pygama/flow/utils.py b/src/pygama/flow/utils.py
index c51cd6898..eef8b3983 100644
--- a/src/pygama/flow/utils.py
+++ b/src/pygama/flow/utils.py
@@ -122,12 +122,7 @@ def fill_col_dict(
(table_length, len(tier_table[col].nda[0])),
dtype=tier_table[col].dtype,
)
- try:
- col_dict[col][tcm_idx] = tier_table[col].nda
- except BaseException:
- raise ValueError(
- f"self.aoesa_to_vov is False but {col} is a jagged array"
- )
+ col_dict[col][tcm_idx] = tier_table[col].nda
elif isinstance(tier_table[col], VectorOfVectors):
# Allocate memory for column for all channels
if col not in col_dict.keys():
diff --git a/src/pygama/hit/build_hit.py b/src/pygama/hit/build_hit.py
index e531fa872..2a6d6a066 100644
--- a/src/pygama/hit/build_hit.py
+++ b/src/pygama/hit/build_hit.py
@@ -7,9 +7,11 @@
import logging
import os
from collections import OrderedDict
+from typing import Iterable, Mapping
+import lgdo
import numpy as np
-from lgdo import LH5Iterator, LH5Store, ls
+from lgdo.lh5 import LH5Iterator, LH5Store, ls
log = logging.getLogger(__name__)
@@ -17,18 +19,20 @@
def build_hit(
infile: str,
outfile: str = None,
- hit_config: str | dict = None,
- lh5_tables: list[str] = None,
- lh5_tables_config: str | dict[str] = None,
+ hit_config: str | Mapping = None,
+ lh5_tables: Iterable[str] = None,
+ lh5_tables_config: str | Mapping[str, Mapping] = None,
n_max: int = np.inf,
wo_mode: str = "write_safe",
buffer_len: int = 3200,
) -> None:
"""
- Transform a :class:`~.lgdo.Table` into a new :class:`~.lgdo.Table` by
- evaluating strings describing column operations.
+ Transform a :class:`~lgdo.types.table.Table` into a new
+ :class:`~lgdo.types.table.Table` by evaluating strings describing column
+ operations.
- Operates on columns only, not specific rows or elements.
+ Operates on columns only, not specific rows or elements. Relies on
+ :meth:`~lgdo.types.table.Table.eval`.
Parameters
----------
@@ -44,14 +48,14 @@ def build_hit(
.. code-block:: json
{
- "outputs": ["calE", "AoE"],
- "operations": {
- "calE": {
- "expression": "sqrt(@a + @b * trapEmax**2)",
- "parameters": {"a": "1.23", "b": "42.69"},
- },
- "AoE": {"expression": "A_max/calE"},
- }
+ "outputs": ["calE", "AoE"],
+ "operations": {
+ "calE": {
+ "expression": "sqrt(a + b * trapEmax**2)",
+ "parameters": {"a": "1.23", "b": "42.69"},
+ },
+ "AoE": {"expression": "A_max/calE"},
+ }
}
The ``outputs`` array lists columns that will be effectively written in
@@ -69,7 +73,11 @@ def build_hit(
n_max
maximum number of rows to process
wo_mode
- forwarded to :meth:`~.lgdo.lh5_store.write_object`.
+ forwarded to :meth:`lgdo.lh5.store.LH5Store.write`.
+
+ See Also
+ --------
+ lgdo.types.table.Table.eval
"""
store = LH5Store()
@@ -93,16 +101,14 @@ def build_hit(
for k, v in tbl_cfg.items():
if isinstance(v, str):
with open(v) as f:
- # order in hit configs is important (dependencies)
- tbl_cfg[k] = json.load(f, object_pairs_hook=OrderedDict)
+ tbl_cfg[k] = json.load(f)
lh5_tables_config = tbl_cfg
else:
if isinstance(hit_config, str):
# sanitize config
with open(hit_config) as f:
- # order in hit configs is important (dependencies)
- hit_config = json.load(f, object_pairs_hook=OrderedDict)
+ hit_config = json.load(f)
if lh5_tables is None:
lh5_tables_config = {}
@@ -113,11 +119,19 @@ def build_hit(
if f"{el}/dsp" in ls(infile, f"{el}/"):
log.debug(f"found candidate table /{el}/dsp")
lh5_tables_config[f"{el}/dsp"] = hit_config
+ else:
+ for tbl in lh5_tables:
+ lh5_tables_config[tbl] = hit_config
if outfile is None:
outfile = os.path.splitext(os.path.basename(infile))[0]
outfile = outfile.removesuffix("_dsp") + "_hit.lh5"
+ # reorder blocks in "operations" based on dependency
+ log.debug("reordering operations based on mutual dependency")
+ for cfg in lh5_tables_config.values():
+ cfg["operations"] = _reorder_table_operations(cfg["operations"])
+
first_done = False
for tbl, cfg in lh5_tables_config.items():
lh5_it = LH5Iterator(infile, tbl, buffer_len=buffer_len)
@@ -129,7 +143,40 @@ def build_hit(
for tbl_obj, start_row, n_rows in lh5_it:
n_rows = min(tot_n_rows - start_row, n_rows)
- outtbl_obj = tbl_obj.eval(cfg["operations"])
+ # create a new table object that links all the columns in the
+ # current table (i.e. no copy)
+ outtbl_obj = lgdo.Table(col_dict=tbl_obj)
+
+ for outname, info in cfg["operations"].items():
+ outcol = outtbl_obj.eval(
+ info["expression"], info.get("parameters", None)
+ )
+ if "lgdo_attrs" in info:
+ outcol.attrs |= info["lgdo_attrs"]
+
+ outtbl_obj.add_column(outname, outcol)
+
+ # make high level flags
+ if "aggregations" in cfg:
+ for high_lvl_flag, flags in cfg["aggregations"].items():
+ flags_list = list(flags.values())
+ n_flags = len(flags_list)
+ if n_flags <= 8:
+ flag_dtype = np.uint8
+ elif n_flags <= 16:
+ flag_dtype = np.uint16
+ elif n_flags <= 32:
+ flag_dtype = np.uint32
+ else:
+ flag_dtype = np.uint64
+
+ df_flags = outtbl_obj.view_as("pd", cols=flags_list)
+ flag_values = df_flags.values.astype(flag_dtype)
+
+ multiplier = 2 ** np.arange(n_flags, dtype=flag_values.dtype)
+ flag_out = np.dot(flag_values, multiplier)
+
+ outtbl_obj.add_field(high_lvl_flag, lgdo.Array(flag_out))
# remove or add columns according to "outputs" in the configuration
# dictionary
@@ -137,7 +184,7 @@ def build_hit(
if isinstance(cfg["outputs"], list):
# add missing columns (forwarding)
for out in cfg["outputs"]:
- if out not in outtbl_obj.keys():
+ if out not in outtbl_obj:
outtbl_obj.add_column(out, tbl_obj[out])
# remove non-required columns
@@ -146,7 +193,7 @@ def build_hit(
if col not in cfg["outputs"]:
outtbl_obj.remove_column(col, delete=True)
- store.write_object(
+ store.write(
obj=outtbl_obj,
name=tbl.replace("/dsp", "/hit"),
lh5_file=outfile,
@@ -156,3 +203,59 @@ def build_hit(
)
first_done = True
+
+
+def _reorder_table_operations(
+ config: Mapping[str, Mapping]
+) -> OrderedDict[str, Mapping]:
+ """Reorder operations in `config` according to mutual dependency."""
+
+ def _one_pass(config):
+ """Loop once over `config` and do a first round of reordering"""
+ # list to hold reordered config keys
+ ordered_keys = []
+
+ # start looping over config
+ for outname in config:
+ # initialization
+ if not ordered_keys:
+ ordered_keys.append(outname)
+ continue
+
+ if outname in ordered_keys:
+ raise RuntimeError(f"duplicated operation '{outname}' detected")
+
+ # loop over existing reordered keys and figure out where to place
+ # the new key
+ idx = 0
+ for k in ordered_keys:
+ # get valid names in the expression
+ c = compile(
+ config[k]["expression"], "gcc -O3 -ffast-math build_hit.py", "eval"
+ )
+
+ # if we need "outname" for this expression, insert it before!
+ if outname in c.co_names:
+ break
+ else:
+ idx += 1
+
+ ordered_keys.insert(idx, outname)
+
+ # now replay the config dictionary based on sorted keys
+ opdict = OrderedDict()
+ for k in ordered_keys:
+ opdict[k] = config[k]
+
+ return opdict
+
+ # okay, now we need to repeat this until we've sorted everything
+ current = OrderedDict(config)
+
+ while True:
+ new = _one_pass(current)
+
+ if new == current:
+ return new
+ else:
+ current = new
diff --git a/src/pygama/pargen/AoE_cal.py b/src/pygama/pargen/AoE_cal.py
index 86a23e08b..227aec4e2 100644
--- a/src/pygama/pargen/AoE_cal.py
+++ b/src/pygama/pargen/AoE_cal.py
@@ -15,7 +15,7 @@
import matplotlib as mpl
mpl.use("agg")
-import lgdo.lh5_store as lh5
+import lgdo.lh5 as lh5
import matplotlib.cm as cmx
import matplotlib.colors as mcolors
import matplotlib.dates as mdates
@@ -27,10 +27,9 @@
from matplotlib.colors import LogNorm
from scipy.stats import chi2
-import pygama.math.distributions as pgd
import pygama.math.histogram as pgh
-import pygama.math.hpge_peak_fitting as pghpf
-from pygama.math.functions.error_function import nb_erfc
+import pygama.math.peak_fitting as pgf
+from pygama.math.peak_fitting import nb_erfc
from pygama.pargen.energy_cal import get_i_local_maxima
from pygama.pargen.utils import *
@@ -68,10 +67,10 @@ def pdf(
PDF for A/E consists of a gaussian signal with gaussian tail background
"""
try:
- sig = n_sig * pgd.gaussian.get_pdf(x, mu, sigma)
- x_lo = np.nanmin(x) if lower_range == np.inf else lower_range
- x_hi = np.nanmax(x) if upper_range == np.inf else upper_range
- bkg = n_bkg * pgd.exgauss.pdf_norm(x, x_lo, x_hi, mu, sigma, tau_bkg)
+ sig = n_sig * pgf.gauss_norm(x, mu, sigma)
+ bkg = n_bkg * pgf.gauss_tail_norm(
+ x, mu, sigma, tau_bkg, lower_range, upper_range
+ )
except:
sig = np.full_like(x, np.nan)
bkg = np.full_like(x, np.nan)
@@ -128,7 +127,7 @@ def guess(hist, bins, var, **kwargs):
try:
_, sigma, _ = pgh.get_gaussian_guess(hist, bins)
except:
- pars, cov = pgbf.gauss_mode_width_max(
+ pars, cov = pgf.gauss_mode_width_max(
hist, bins, var, mode_guess=mu, n_bins=20
)
_, sigma, _ = pars
@@ -215,13 +214,14 @@ def pdf(
PDF for A/E consists of a gaussian signal with tail with gaussian tail background
"""
try:
- x_lo = np.nanmin(x) if lower_range == np.inf else lower_range
- x_hi = np.nanmax(x) if upper_range == np.inf else upper_range
sig = n_sig * (
- (1 - htail) * pgd.gaussian.get_pdf(x, mu, sigma)
- + htail * pgd.exgauss.pdf_norm(x, x_lo, x_hi, mu, sigma, tau_sig)
+ (1 - htail) * pgf.gauss_norm(x, mu, sigma)
+ + htail
+ * pgf.gauss_tail_norm(x, mu, sigma, tau_sig, lower_range, upper_range)
+ )
+ bkg = n_bkg * pgf.gauss_tail_norm(
+ x, mu, sigma, tau_bkg, lower_range, upper_range
)
- bkg = n_bkg * pgd.exgauss.pdf_norm(x, x_lo, x_hi, mu, sigma, tau_bkg)
except:
sig = np.full_like(x, np.nan)
bkg = np.full_like(x, np.nan)
@@ -283,7 +283,7 @@ def guess(hist, bins, var, **kwargs):
try:
_, sigma, _ = pgh.get_gaussian_guess(hist, bins)
except:
- pars, cov = pgbf.gauss_mode_width_max(
+ pars, cov = pgf.gauss_mode_width_max(
hist, bins, var, mode_guess=mu, n_bins=20
)
_, sigma, _ = pars
@@ -352,7 +352,7 @@ def fixed(**kwargs):
]
def width(pars, errs, cov):
- fwhm, fwhm_err = pghpf.hpge_peak_fwhm(
+ fwhm, fwhm_err = pgf.radford_fwhm(
pars[2], pars[3], np.abs(pars[4]), cov=cov[:7, :7]
)
return fwhm / 2.355, fwhm_err / 2.355
@@ -375,9 +375,9 @@ def pdf(
PDF for A/E consists of a gaussian signal with tail with gaussian tail background
"""
try:
- x_lo = np.nanmin(x) if lower_range == np.inf else lower_range
- x_hi = np.nanmax(x) if upper_range == np.inf else upper_range
- sig = n_events * pgd.exgauss.pdf_norm(x, x_lo, x_hi, mu, sigma, tau_bkg)
+ sig = n_events * pgf.gauss_tail_norm(
+ x, mu, sigma, tau_bkg, lower_range, upper_range
+ )
except:
sig = np.full_like(x, np.nan)
@@ -406,7 +406,7 @@ def guess(hist, bins, var, **kwargs):
try:
_, sigma, _ = pgh.get_gaussian_guess(hist, bins)
except:
- pars, cov = pgbf.gauss_mode_width_max(
+ pars, cov = pgf.gauss_mode_width_max(
hist, bins, var, mode_guess=mu, n_bins=20
)
_, sigma, _ = pars
@@ -469,7 +469,7 @@ def pdf(x: np.array, n_events: float, mu: float, sigma: float) -> np.array:
PDF for A/E consists of a gaussian signal with tail with gaussian tail background
"""
try:
- sig = n_events * pgd.gaussian.get_pdf(x, mu, sigma)
+ sig = n_events * pgf.gauss_norm(x, mu, sigma)
except:
sig = np.full_like(x, np.nan)
@@ -489,7 +489,7 @@ def guess(hist, bins, var, **kwargs):
try:
_, sigma, _ = pgh.get_gaussian_guess(hist, bins)
except:
- pars, cov = pgbf.gauss_mode_width_max(
+ pars, cov = pgf.gauss_mode_width_max(
hist, bins, var, mode_guess=mu, n_bins=20
)
_, sigma, _ = pars
@@ -540,10 +540,8 @@ def pdf(
tau2,
components,
):
- gauss1 = n_sig1 * pgd.gauss_on_exgauss.get_pdf(x, mu1, sigma1, htail1, tau1)
- gauss2 = n_sig2 * pgd.gauss_on_exgauss.get_pdf(
- x, mu2, sigma2, tau2, htail2
- ) # NOTE: are tau2 and htail2 in the intended order?
+ gauss1 = n_sig1 * pgf.gauss_with_tail_pdf(x, mu1, sigma1, htail1, tau1)
+ gauss2 = n_sig2 * pgf.gauss_with_tail_pdf(x, mu2, sigma2, tau2, htail2)
if components is True:
return gauss1, gauss2
else:
@@ -609,7 +607,7 @@ def guess(hist: np.array, bins: np.array, var: np.array, **kwargs) -> list:
mu1 = bcs[mus[0]]
mu2 = bcs[mus[-1]]
- pars, cov = pgbf.gauss_mode_width_max(
+ pars, cov = pgf.gauss_mode_width_max(
hist,
bins,
var=None,
@@ -622,7 +620,7 @@ def guess(hist: np.array, bins: np.array, var: np.array, **kwargs) -> list:
mu1, sigma1, amp = pars
ix = np.where(bcs < mu1 + 3 * sigma1)[0][-1]
n_sig1 = np.sum(hist[:ix])
- pars2, cov2 = pgbf.gauss_mode_width_max(
+ pars2, cov2 = pgf.gauss_mode_width_max(
hist,
bins,
var=None,
@@ -911,7 +909,7 @@ def energy_guess(hist, bins, var, func_i, peak, eres, fit_range):
"""
Simple guess for peak fitting
"""
- if func_i == pgd.hpge_peak.pdf_ext:
+ if func_i == pgf.extended_radford_pdf:
bin_cs = (bins[1:] + bins[:-1]) / 2
sigma = eres / 2.355
i_0 = np.nanargmax(hist)
@@ -934,8 +932,6 @@ def energy_guess(hist, bins, var, func_i, peak, eres, fit_range):
if nsig_guess < 0:
nsig_guess = 0
parguess = [
- fit_range[0],
- fit_range[1],
nsig_guess,
mu,
sigma,
@@ -943,13 +939,16 @@ def energy_guess(hist, bins, var, func_i, peak, eres, fit_range):
tau,
nbkg_guess,
hstep,
+ fit_range[0],
+ fit_range[1],
+ 0,
]
for i, guess in enumerate(parguess):
if np.isnan(guess):
parguess[i] = 0
return parguess
- elif func_i == pgd.gauss_on_step.pdf_ext:
+ elif func_i == pgf.extended_gauss_step_pdf:
mu = peak
sigma = eres / 2.355
i_0 = np.argmax(hist)
@@ -966,13 +965,14 @@ def energy_guess(hist, bins, var, func_i, peak, eres, fit_range):
nsig_guess = 0
parguess = [
- fit_range[0],
- fit_range[1],
nsig_guess,
mu,
sigma,
nbkg_guess,
hstep,
+ fit_range[0],
+ fit_range[1],
+ 0,
]
for i, guess in enumerate(parguess):
if np.isnan(guess):
@@ -997,7 +997,7 @@ def unbinned_energy_fit(
energy, dx=0.5, range=(np.nanmin(energy), np.nanmax(energy))
)
except ValueError:
- pars, errs, cov = return_nans(pgd.hpge_peak.get_pdf)
+ pars, errs, cov = return_nans(pgf.radford_pdf)
return pars, errs
sigma = eres / 2.355
if guess is None:
@@ -1005,33 +1005,32 @@ def unbinned_energy_fit(
hist,
bins,
var,
- pgd.gauss_on_step.pdf_ext,
+ pgf.extended_gauss_step_pdf,
peak,
eres,
(np.nanmin(energy), np.nanmax(energy)),
)
- c = cost.ExtendedUnbinnedNLL(energy, pgd.gauss_on_step.pdf_ext)
+ c = cost.ExtendedUnbinnedNLL(energy, pgf.extended_gauss_step_pdf)
m = Minuit(c, *x0)
m.limits = [
- (None, None),
- (None, None),
(0, 2 * np.sum(hist)),
(peak - 1, peak + 1),
(0, None),
(0, 2 * np.sum(hist)),
(-1, 1),
+ (None, None),
+ (None, None),
+ (None, None),
]
- m.fixed[:2] = True
+ m.fixed[-3:] = True
m.simplex().migrad()
m.hesse()
- x0 = m.values[:5]
- x0 += [0.2, 0.2 * m.values[4]]
- x0 += m.values[5:]
+ x0 = m.values[:3]
+ x0 += [0.2, 0.2 * m.values[2]]
+ x0 += m.values[3:]
if verbose:
print(m)
bounds = [
- (None, None),
- (None, None),
(0, 2 * np.sum(hist)),
(peak - 1, peak + 1),
(0, None),
@@ -1039,40 +1038,44 @@ def unbinned_energy_fit(
(0, None),
(0, 2 * np.sum(hist)),
(-1, 1),
+ (None, None),
+ (None, None),
+ (None, None),
]
- fixed = [0, 1]
+ fixed = [7, 8, 9]
else:
x0 = guess
x1 = energy_guess(
hist,
bins,
var,
- pgd.hpge_peak.pdf_ext,
+ pgf.extended_radford_pdf,
peak,
eres,
(np.nanmin(energy), np.nanmax(energy)),
)
- x0[2] = x1[2]
- x0[7] = x1[7]
+ x0[0] = x1[0]
+ x0[5] = x1[5]
bounds = [
- (None, None),
- (None, None),
(0, 2 * np.sum(hist)),
- (guess[3] - 0.5, guess[3] + 0.5),
+ (guess[1] - 0.5, guess[1] + 0.5),
+ sorted((0.8 * guess[2], 1.2 * guess[2])),
+ sorted((0.8 * guess[3], 1.2 * guess[3])),
sorted((0.8 * guess[4], 1.2 * guess[4])),
- sorted((0.8 * guess[5], 1.2 * guess[5])),
- sorted((0.8 * guess[6], 1.2 * guess[6])),
(0, 2 * np.sum(hist)),
- sorted((0.8 * guess[8], 1.2 * guess[8])),
+ sorted((0.8 * guess[6], 1.2 * guess[6])),
+ (None, None),
+ (None, None),
+ (None, None),
]
- fixed = [0, 1, 3, 4, 5, 6, 8]
+ fixed = [1, 2, 3, 4, 6, 7, 8, 9]
if len(x0) == 0:
- pars, errs, cov = return_nans(pgd.hpge_peak.pdf_ext)
+ pars, errs, cov = return_nans(pgf.extended_radford_pdf)
return pars, errs
if verbose:
print(x0)
- c = cost.ExtendedUnbinnedNLL(energy, pgd.hpge_peak.pdf_ext)
+ c = cost.ExtendedUnbinnedNLL(energy, pgf.extended_radford_pdf)
m = Minuit(c, *x0)
m.limits = bounds
for fix in fixed:
@@ -1089,20 +1092,20 @@ def unbinned_energy_fit(
plt.figure()
bcs = (bins[1:] + bins[:-1]) / 2
plt.step(bcs, hist, where="mid")
- plt.plot(bcs, pgd.hpge_peak.get_pdf(bcs, *x0) * np.diff(bcs)[0])
- plt.plot(bcs, pgd.hpge_peak.get_pdf(bcs, *m.values) * np.diff(bcs)[0])
+ plt.plot(bcs, pgf.radford_pdf(bcs, *x0) * np.diff(bcs)[0])
+ plt.plot(bcs, pgf.radford_pdf(bcs, *m.values) * np.diff(bcs)[0])
plt.show()
- if not np.isnan(m.errors[2:]).all():
+ if not np.isnan(m.errors[:-3]).all():
return m.values, m.errors
else:
try:
m.simplex().migrad()
m.minos()
- if not np.isnan(m.errors[2:]).all():
+ if not np.isnan(m.errors[:-3]).all():
return m.values, m.errors
except:
- pars, errs, cov = return_nans(pgd.hpge_peak.pdf_ext)
+ pars, errs, cov = return_nans(pgf.extended_radford_pdf)
return pars, errs
@@ -1317,7 +1320,6 @@ def __init__(
self.dt_cut = dt_cut
self.dep_acc = dep_acc
if self.dt_cut is not None:
- self.update_cal_dicts(dt_cut["cut"])
self.dt_cut_param = dt_cut["out_param"]
self.fit_selection = f"{self.selection_string} & {self.dt_cut_param}"
self.dt_cut_hard = dt_cut["hard"]
diff --git a/src/pygama/pargen/__init__.py b/src/pygama/pargen/__init__.py
index cca7a6038..3dcede5b7 100644
--- a/src/pygama/pargen/__init__.py
+++ b/src/pygama/pargen/__init__.py
@@ -1,3 +1,4 @@
"""
-Subpackage description
+Utilities to generate and optimize parameters of interest from data (e.g.
+calibration routines)
"""
diff --git a/src/pygama/pargen/cuts.py b/src/pygama/pargen/cuts.py
index c957fce8f..638199f64 100644
--- a/src/pygama/pargen/cuts.py
+++ b/src/pygama/pargen/cuts.py
@@ -9,14 +9,15 @@
import logging
import os
-import lgdo.lh5_store as lh5
+import lgdo.lh5 as lh5
import numpy as np
import pandas as pd
+from lgdo.types import Table
from scipy import stats
import pygama.math.histogram as pgh
+import pygama.math.peak_fitting as pgf
import pygama.pargen.energy_cal as pgc
-from pygama.math.binned_fitting import gauss_mode_width_max
log = logging.getLogger(__name__)
@@ -51,7 +52,7 @@ def generate_cuts(
output_dict = {}
if isinstance(data, pd.DataFrame):
pass
- elif isinstance(data, lh5.Table):
+ elif isinstance(data, Table):
data = {entry: data[entry].nda for entry in get_keys(data, parameters)}
data = pd.DataFrame.from_dict(data)
elif isinstance(data, dict):
@@ -123,7 +124,7 @@ def generate_cuts(
fwhm = pgh.get_fwhm(counts, bins)[0]
mean = float(bin_centres[np.argmax(counts)])
- pars, cov = gauss_mode_width_max(
+ pars, cov = pgf.gauss_mode_width_max(
counts,
bins,
mode_guess=mean,
@@ -204,7 +205,7 @@ def get_cut_indexes(
keys = cut_dict.keys()
if isinstance(all_data, pd.DataFrame):
pass
- elif isinstance(all_data, lh5.Table):
+ elif isinstance(all_data, Table):
cut_keys = list(cut_dict)
cut_keys.append(energy_param)
all_data = {
diff --git a/src/pygama/pargen/data_cleaning.py b/src/pygama/pargen/data_cleaning.py
index 4cd573e78..4a1ceb93e 100644
--- a/src/pygama/pargen/data_cleaning.py
+++ b/src/pygama/pargen/data_cleaning.py
@@ -10,10 +10,7 @@
import numpy as np
from scipy import stats
-from pygama.math.binned_fitting import *
-from pygama.math.functions.crystal_ball import nb_crystal_ball_scaled_pdf
-from pygama.math.functions.gauss import nb_gauss
-from pygama.math.histogram import *
+from pygama.math.peak_fitting import *
def gaussian_cut(data, cut_sigma=3, plotAxis=None):
@@ -32,11 +29,11 @@ def gaussian_cut(data, cut_sigma=3, plotAxis=None):
bin_centers = bins[:-1] + (bins[1] - bins[0]) / 2
# fit gaussians to that
- # result = fit_unbinned(nb_gauss, hist, [median, width/2] )
+ # result = fit_unbinned(gauss, hist, [median, width/2] )
# print("unbinned: {}".format(result))
result = fit_binned(
- nb_gauss,
+ gauss,
hist,
bin_centers,
[median, width / 2, np.amax(hist) * (width / 2) * np.sqrt(2 * np.pi)],
@@ -47,7 +44,7 @@ def gaussian_cut(data, cut_sigma=3, plotAxis=None):
if plotAxis is not None:
plotAxis.plot(bin_centers, hist, ls="steps-mid", color="k", label="data")
- fit = nb_gauss(bin_centers, *result)
+ fit = gauss(bin_centers, *result)
plotAxis.plot(bin_centers, fit, label="gaussian fit")
plotAxis.axvline(result[0], color="g", label="fit mean")
plotAxis.axvline(cut_lo, color="r", label=f"+/- {cut_sigma} sigma")
@@ -74,19 +71,15 @@ def xtalball_cut(data, cut_sigma=3, plotFigure=None):
bin_centers = bins[:-1] + (bins[1] - bins[0]) / 2
# fit gaussians to that
- # result = fit_unbinned(nb_gauss, hist, [median, width/2] )
+ # result = fit_unbinned(gauss, hist, [median, width/2] )
# print("unbinned: {}".format(result))
p0 = get_gaussian_guess(hist, bin_centers)
bounds = [
- (p0[2] * 0.2, p0[0] * 0.5, p0[1] * 0.5, 0, 1),
- (p0[2] * 5, p0[0] * 1.5, p0[1] * 1.5, np.inf, np.inf),
+ (p0[0] * 0.5, p0[1] * 0.5, p0[2] * 0.2, 0, 1),
+ (p0[0] * 1.5, p0[1] * 1.5, p0[2] * 5, np.inf, np.inf),
]
result = fit_binned(
- nb_crystal_ball_scaled_pdf,
- hist,
- bin_centers,
- [p0[2], p0[0], p0[1], 10, 1],
- bounds=bounds,
+ xtalball, hist, bin_centers, [p0[0], p0[1], p0[2], 10, 1], bounds=bounds
)
# print("binned: {}".format(result))
cut_lo = result[0] - cut_sigma * result[1]
@@ -95,7 +88,7 @@ def xtalball_cut(data, cut_sigma=3, plotFigure=None):
if plotFigure is not None:
plt.figure(plotFigure.number)
plt.plot(bin_centers, hist, ls="steps-mid", color="k", label="data")
- fit = nb_crystal_ball_scaled_pdf(bin_centers, *result)
+ fit = xtalball(bin_centers, *result)
plt.plot(bin_centers, fit, label="xtalball fit")
plt.axvline(result[0], color="g", label="fit mean")
plt.axvline(cut_lo, color="r", label=f"+/- {cut_sigma} sigma")
diff --git a/src/pygama/pargen/dplms_ge_dict.py b/src/pygama/pargen/dplms_ge_dict.py
new file mode 100644
index 000000000..6a155d239
--- /dev/null
+++ b/src/pygama/pargen/dplms_ge_dict.py
@@ -0,0 +1,564 @@
+"""
+This module is for creating dplms dictionary for ge processing
+"""
+
+from __future__ import annotations
+
+import itertools
+import json
+import logging
+import os
+import time
+
+import matplotlib.pyplot as plt
+import numpy as np
+from lgdo import Array, Table, lh5
+from scipy.signal import convolve, convolve2d
+
+from pygama.math.histogram import get_hist
+from pygama.math.peak_fitting import (
+ extended_gauss_step_pdf,
+ extended_radford_pdf,
+ gauss_step_pdf,
+ radford_pdf,
+)
+from pygama.pargen.cuts import generate_cuts, get_cut_indexes
+from pygama.pargen.dsp_optimize import run_one_dsp
+from pygama.pargen.energy_optimisation import fom_FWHM_with_dt_corr_fit
+
+log = logging.getLogger(__name__)
+sto = lh5.LH5Store()
+
+
+def dplms_ge_dict(
+ lh5_path: str,
+ raw_fft: Table,
+ raw_cal: Table,
+ dsp_config: dict,
+ par_dsp: dict,
+ par_dsp_lh5: str,
+ dplms_dict: dict,
+ decay_const: float = 0,
+ ene_par: str = "dplmsEmax",
+ display: int = 0,
+) -> dict:
+ """
+ This function calculates the dplms dictionary for HPGe detectors.
+
+ Parameters
+ ----------
+ lh5_path
+ Name of channel to process, should be name of lh5 group in raw files
+ fft_files
+ table with fft data
+ raw_cal
+ table with cal data
+ dsp_config
+ dsp config file
+ par_dsp
+ Dictionary with db parameters for dsp processing
+ par_dsp_lh5
+ Path for saving dplms coefficients
+ dplms_dict
+ Dictionary with various parameters
+
+ Returns
+ -------
+ out_dict
+ """
+
+ t0 = time.time()
+ log.info(f"\nSelecting baselines")
+
+ dsp_fft = run_one_dsp(raw_fft, dsp_config, db_dict=par_dsp[lh5_path])
+ cut_dict = generate_cuts(dsp_fft, parameters=dplms_dict["bls_cut_pars"])
+ idxs = get_cut_indexes(dsp_fft, cut_dict)
+ bl_field = dplms_dict["bl_field"]
+ log.info(f"... {len(dsp_fft[bl_field].values.nda[idxs,:])} baselines after cuts")
+
+ bls = dsp_fft[bl_field].values.nda[idxs, : dplms_dict["bsize"]]
+ bls_par = {}
+ bls_cut_pars = [par for par in dplms_dict["bls_cut_pars"].keys()]
+ for par in bls_cut_pars:
+ bls_par[par] = dsp_fft[par].nda
+ t1 = time.time()
+ log.info(
+ f"total events {len(raw_fft)}, {len(bls)} baseline selected in {(t1-t0):.2f} s"
+ )
+
+ log.info(
+ "\nCalculating noise matrix of length",
+ dplms_dict["length"],
+ "n. events",
+ bls.shape[0],
+ "size",
+ bls.shape[1],
+ )
+ nmat = noise_matrix(bls, dplms_dict["length"])
+ t2 = time.time()
+ log.info(f"Time to calculate noise matrix {(t2-t1):.2f} s")
+
+ log.info("\nSelecting signals")
+ wsize = dplms_dict["wsize"]
+ wf_field = dplms_dict["wf_field"]
+ peaks_keV = np.array(dplms_dict["peaks_keV"])
+ kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]]
+
+ log.info(f"Produce dsp data for {len(raw_cal)} events")
+ dsp_cal = run_one_dsp(raw_cal, dsp_config, db_dict=par_dsp[lh5_path])
+ t3 = time.time()
+ log.info(f"Time to run dsp production {(t3-t2):.2f} s")
+
+ dsp_config["outputs"] = [ene_par, "dt_eff"]
+
+ # dictionary for peak fitting
+ peak_dict = {
+ "peak": peaks_keV[-1],
+ "kev_width": kev_widths[-1],
+ "parameter": ene_par,
+ "func": extended_gauss_step_pdf,
+ "gof_func": gauss_step_pdf,
+ }
+
+ if display > 0:
+ plot_dict = {}
+ plot_dict["dplms"] = {}
+
+ # penalized coefficients
+ dp_coeffs = dplms_dict["dp_coeffs"]
+ za_coeff = dplms_dict["dp_def"]["za"]
+ dp_coeffs.pop("za")
+ coeff_keys = [key for key in dp_coeffs.keys()]
+ lists = [dp_coeffs[key] for key in dp_coeffs.keys()]
+
+ prod = list(itertools.product(*lists))
+ grid_dict = {}
+ min_fom = float("inf")
+ min_idx = None
+
+ for i, values in enumerate(prod):
+ coeff_values = dict(zip(coeff_keys, values))
+
+ log.info(
+ "\nCase",
+ i,
+ "->",
+ ", ".join(f"{key} = {value}" for key, value in coeff_values.items()),
+ )
+ grid_dict[i] = coeff_values
+
+ sel_dict = signal_selection(dsp_cal, dplms_dict, coeff_values)
+ wfs = dsp_cal[wf_field].nda[sel_dict["idxs"], :]
+ log.info(f"... {len(wfs)} signals after signal selection")
+
+ ref, rmat, pmat, fmat = signal_matrices(wfs, dplms_dict["length"], decay_const)
+
+ t_tmp = time.time()
+ nm_coeff = coeff_values["nm"]
+ ft_coeff = coeff_values["ft"]
+ x, y, refy = filter_synthesis(
+ ref,
+ nm_coeff * nmat,
+ rmat,
+ za_coeff,
+ pmat,
+ ft_coeff * fmat,
+ dplms_dict["length"],
+ wsize,
+ )
+ par_dsp[lh5_path]["dplms"] = {"length": dplms_dict["length"], "coefficients": x}
+ log.info(
+ f"Filter synthesis in {time.time()-t_tmp:.1f} s, filter area", np.sum(x)
+ )
+
+ t_tmp = time.time()
+ dsp_opt = run_one_dsp(raw_cal, dsp_config, db_dict=par_dsp[lh5_path])
+
+ try:
+ res = fom_FWHM_with_dt_corr_fit(
+ dsp_opt,
+ peak_dict,
+ "QDrift",
+ idxs=np.where(~np.isnan(dsp_opt["dt_eff"].nda))[0],
+ )
+ except:
+ log.debug("FWHM not calculated")
+ continue
+
+ fwhm, fwhm_err, alpha, chisquare = (
+ res["fwhm"],
+ res["fwhm_err"],
+ res["alpha"],
+ res["chisquare"],
+ )
+ log.info(
+ f"FWHM = {fwhm:.2f} ± {fwhm_err:.2f} keV, evaluated in {time.time()-t_tmp:.1f} s"
+ )
+
+ grid_dict[i]["fwhm"] = fwhm
+ grid_dict[i]["fwhm_err"] = fwhm_err
+ grid_dict[i]["alpha"] = alpha
+
+ if (
+ fwhm < dplms_dict["fwhm_limit"]
+ and fwhm_err < dplms_dict["err_limit"]
+ and chisquare < dplms_dict["chi_limit"]
+ ):
+ if fwhm < min_fom:
+ min_idx, min_fom = i, fwhm
+
+ if min_idx is not None:
+ min_result = grid_dict[min_idx]
+ best_case_values = {key: min_result[key] for key in min_result.keys()}
+
+ fwhm = best_case_values.get("fwhm", None)
+ fwhm_err = best_case_values.get("fwhm_err", 0)
+ alpha = best_case_values.get("alpha", 0)
+ nm_coeff = best_case_values.get("nm", dplms_dict["dp_def"]["nm"])
+ ft_coeff = best_case_values.get("ft", dplms_dict["dp_def"]["nm"])
+ rt_coeff = best_case_values.get("rt", dplms_dict["dp_def"]["rt"])
+ pt_coeff = best_case_values.get("pt", dplms_dict["dp_def"]["pt"])
+
+ if all(
+ v is not None
+ for v in [
+ fwhm,
+ fwhm_err,
+ alpha,
+ nm_coeff,
+ ft_coeff,
+ rt_coeff,
+ pt_coeff,
+ ]
+ ):
+ log.info(
+ f"\nBest case: FWHM = {fwhm:.2f} ± {fwhm_err:.2f} keV, ctc {alpha}"
+ )
+ else:
+ log.error("Some values are missing in the best case results")
+ else:
+ log.error("Filter synthesis failed")
+ nm_coeff = dplms_dict["dp_def"]["nm"]
+ ft_coeff = dplms_dict["dp_def"]["ft"]
+ rt_coeff = dplms_dict["dp_def"]["rt"]
+ pt_coeff = dplms_dict["dp_def"]["pt"]
+
+ # filter synthesis
+ sel_dict = signal_selection(dsp_cal, dplms_dict, best_case_values)
+ idxs = sel_dict["idxs"]
+ wfs = dsp_cal[wf_field].nda[idxs, :]
+ ref, rmat, pmat, fmat = signal_matrices(wfs, dplms_dict["length"], decay_const)
+
+ x, y, refy = filter_synthesis(
+ ref,
+ nm_coeff * nmat,
+ rmat,
+ za_coeff,
+ pmat,
+ ft_coeff * fmat,
+ dplms_dict["length"],
+ wsize,
+ )
+
+ sto.write(
+ Array(x),
+ name="dplms",
+ lh5_file=par_dsp_lh5,
+ wo_mode="overwrite",
+ group=lh5_path,
+ )
+
+ out_dict = {
+ "dplms": {
+ "length": dplms_dict["length"],
+ "coefficients": f"loadlh5('{par_dsp_lh5}', '{lh5_path}/dplms')",
+ "dp_coeffs": {
+ "nm": nm_coeff,
+ "za": za_coeff,
+ "ft": ft_coeff,
+ "rt": rt_coeff,
+ "pt": pt_coeff,
+ },
+ }
+ }
+ out_alpha_dict = {
+ f"{ene_par}_ctc": {
+ "expression": f"{ene_par}*(1+dt_eff*a)",
+ "parameters": {"a": round(alpha, 9)},
+ }
+ }
+ out_dict.update({"ctc_params": out_alpha_dict})
+
+ log.info(f"Time to complete DPLMS filter synthesis {time.time()-t0:.1f}")
+
+ if display > 0:
+ plot_dict["dplms"]["ref"] = ref
+ plot_dict["dplms"]["coefficients"] = x
+
+ bl_idxs = np.random.choice(len(bls), dplms_dict["n_plot"])
+ bls = bls[bl_idxs]
+ fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white")
+ for ii, wf in enumerate(bls):
+ if ii < 10:
+ ax.plot(wf, label=f"mean = {wf.mean():.1f}")
+ else:
+ ax.plot(wf)
+ ax.legend(title=f"{lh5_path}", loc="upper right")
+ plot_dict["dplms"]["bls"] = fig
+ fig, ax = plt.subplots(nrows=2, ncols=3, figsize=(16, 9), facecolor="white")
+ for ii, par in enumerate(bls_cut_pars):
+ mean = cut_dict[par]["Mean Value"]
+ llo, lup = cut_dict[par]["Lower Boundary"], cut_dict[par]["Upper Boundary"]
+ plo, pup = mean - 2 * (mean - llo), mean + 2 * (lup - mean)
+ hh, bb = np.histogram(bls_par[par], bins=np.linspace(plo, pup, 200))
+ ax.flat[ii].plot(bb[1:], hh, ds="steps", label=f"cut on {par}")
+ ax.flat[ii].axvline(lup, color="k", linestyle=":", label="selection")
+ ax.flat[ii].axvline(llo, color="k", linestyle=":")
+ ax.flat[ii].set_xlabel(par)
+ ax.flat[ii].set_yscale("log")
+ ax.flat[ii].legend(title=f"{lh5_path}", loc="upper right")
+ plot_dict["dplms"]["bl_sel"] = fig
+
+ wf_idxs = np.random.choice(len(wfs), dplms_dict["n_plot"])
+ wfs = wfs[wf_idxs]
+ peak_pos = dsp_cal["peak_pos"].nda
+ peak_pos_neg = dsp_cal["peak_pos_neg"].nda
+ centroid = dsp_cal["centroid"].nda
+ risetime = dsp_cal["tp_90"].nda - dsp_cal["tp_10"].nda
+ rt_low = dplms_dict["rt_low"]
+ rt_high = dplms_dict["rt_high"]
+ peak_lim = dplms_dict["peak_lim"]
+ cal_par = {}
+ wfs_cut_pars = [par for par in dplms_dict["wfs_cut_pars"].keys()]
+ for par in wfs_cut_pars:
+ cal_par[par] = dsp_cal[par].nda
+ fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white")
+ for ii, wf in enumerate(wfs):
+ if ii < 10:
+ ax.plot(wf, label=f"centr = {centroid[ii]}")
+ else:
+ ax.plot(wf)
+ ax.legend(title=f"{lh5_path}", loc="upper right")
+ axin = ax.inset_axes([0.1, 0.15, 0.35, 0.5])
+ for wf in wfs:
+ axin.plot(wf)
+ axin.set_xlim(wsize / 2 - dplms_dict["zoom"], wsize / 2 + dplms_dict["zoom"])
+ axin.set_yticklabels("")
+ plot_dict["dplms"]["wfs"] = fig
+ fig, ax = plt.subplots(nrows=2, ncols=3, figsize=(16, 9), facecolor="white")
+ wfs_cut_pars.append("centroid")
+ wfs_cut_pars.append("peak_pos")
+ wfs_cut_pars.append("risetime")
+ for ii, par in enumerate(wfs_cut_pars):
+ pspace = np.linspace(
+ wsize / 2 - peak_lim, wsize / 2 + peak_lim, 2 * peak_lim
+ )
+ if par == "centroid":
+ llo, lup = sel_dict["ct_ll"], sel_dict["ct_hh"]
+ hh, bb = np.histogram(centroid, bins=pspace)
+ elif par == "peak_pos":
+ llo, lup = sel_dict["pp_ll"], sel_dict["pp_hh"]
+ hh, bb = np.histogram(peak_pos, bins=pspace)
+ elif par == "risetime":
+ llo, lup = sel_dict["rt_ll"], sel_dict["rt_hh"]
+ rt_bins = int((rt_high - rt_low) / dplms_dict["period"])
+ rt_space = np.linspace(rt_low, rt_high, rt_bins)
+ hh, bb = np.histogram(risetime, bins=rt_space)
+ else:
+ llo, lup = np.min(cal_par[par]), np.max(cal_par[par])
+ hh, bb = np.histogram(cal_par[par], bins=np.linspace(llo, lup, 200))
+ ax.flat[ii + 1].plot(bb[1:], hh, ds="steps", label=f"cut on {par}")
+ ax.flat[ii + 1].axvline(
+ llo, color="k", linestyle=":", label=f"sel. {llo:.1f} {lup:.1f}"
+ )
+ if par != "centroid":
+ ax.flat[ii + 1].axvline(lup, color="k", linestyle=":")
+ ax.flat[ii + 1].set_xlabel(par)
+ ax.flat[ii + 1].set_yscale("log")
+ ax.flat[ii + 1].legend(title=f"{lh5_path}", loc="upper right")
+ roughenergy = dsp_cal["trapTmax"].nda
+ roughenergy_sel = roughenergy[idxs]
+ ell, ehh = roughenergy.min(), roughenergy.max()
+ he, be = np.histogram(roughenergy, bins=np.linspace(ell, ehh, 1000))
+ hs, be = np.histogram(roughenergy_sel, bins=np.linspace(ell, ehh, 1000))
+ ax.flat[0].plot(be[1:], he, c="b", ds="steps", label="initial")
+ ax.flat[0].plot(be[1:], hs, c="r", ds="steps", label="selected")
+ ax.flat[0].set_xlabel("rough energy (ADC)")
+ ax.flat[0].set_yscale("log")
+ ax.flat[0].legend(loc="upper right", title=f"{lh5_path}")
+ plot_dict["dplms"]["wf_sel"] = fig
+
+ fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white")
+ ax.plot(x, "r-", label=f"filter")
+ ax.axhline(0, color="black", linestyle=":")
+ ax.legend(loc="upper right", title=f"{lh5_path}")
+ axin = ax.inset_axes([0.6, 0.1, 0.35, 0.33])
+ axin.plot(x, "r-")
+ axin.set_xlim(
+ dplms_dict["length"] / 2 - dplms_dict["zoom"],
+ dplms_dict["length"] / 2 + dplms_dict["zoom"],
+ )
+ axin.set_yticklabels("")
+ ax.indicate_inset_zoom(axin)
+
+ return out_dict, plot_dict
+ else:
+ return out_dict
+
+
+def is_valid_centroid(
+ centroid: np.array, lim: int, size: int, full_size: int
+) -> list[bool]:
+ llim = size / 2 - lim
+ hlim = full_size - size / 2
+ idxs = (centroid > llim) & (centroid < hlim)
+ return idxs, llim, hlim
+
+
+def is_not_pile_up(
+ peak_pos: np.array, peak_pos_neg: np.array, thr: int, lim: int, size: int
+) -> list[bool]:
+ bin_edges = np.linspace(size / 2 - lim, size / 2 + lim, 2 * lim)
+ hist, bin_edges = np.histogram(peak_pos, bins=bin_edges)
+
+ thr = thr * hist.max() / 100
+ low_thr_idxs = np.where(hist[: hist.argmax()] < thr)[0]
+ upp_thr_idxs = np.where(hist[hist.argmax() :] < thr)[0]
+
+ idx_low = low_thr_idxs[-1] if low_thr_idxs.size > 0 else 0
+ idx_upp = (
+ upp_thr_idxs[0] + hist.argmax() if upp_thr_idxs.size > 0 else len(hist) - 1
+ )
+
+ llow, lupp = bin_edges[idx_low], bin_edges[idx_upp]
+
+ idxs = []
+ for n, nn in zip(peak_pos, peak_pos_neg):
+ condition1 = np.count_nonzero(n > 0) == 1
+ condition2 = (
+ np.count_nonzero((n > 0) & ((n < llow) | (n > lupp) & (n < size))) == 0
+ )
+ condition3 = np.count_nonzero(nn > 0) == 0
+ idxs.append(condition1 and condition2 and condition3)
+ return idxs, llow, lupp
+
+
+def is_valid_risetime(risetime: np.array, llim: int, perc: float):
+ hlim = np.percentile(risetime[~np.isnan(risetime)], perc)
+ idxs = (risetime >= llim) & (risetime <= hlim)
+ return idxs, llim, hlim
+
+
+def signal_selection(dsp_cal, dplms_dict, coeff_values):
+ peak_pos = dsp_cal["peak_pos"].nda
+ peak_pos_neg = dsp_cal["peak_pos_neg"].nda
+ centroid = dsp_cal["centroid"].nda
+ risetime = dsp_cal["tp_90"].nda - dsp_cal["tp_10"].nda
+
+ rt_low = dplms_dict["rt_low"]
+ rt_high = dplms_dict["rt_high"]
+ peak_lim = dplms_dict["peak_lim"]
+ wsize = dplms_dict["wsize"]
+ bsize = dplms_dict["bsize"]
+
+ centroid_lim = dplms_dict["centroid_lim"]
+ if "rt" in coeff_values:
+ perc = coeff_values["rt"]
+ else:
+ perc = dplms_dict["dp_def"]["rt"]
+ if "pt" in coeff_values:
+ thr = coeff_values["pt"]
+ else:
+ thr = dplms_dict["dp_def"]["rt"]
+
+ idxs_ct, ct_ll, ct_hh = is_valid_centroid(centroid, centroid_lim, wsize, bsize)
+ log.info(f"... {len(peak_pos[idxs_ct,:])} signals after alignment")
+
+ idxs_pp, pp_ll, pp_hh = is_not_pile_up(peak_pos, peak_pos_neg, thr, peak_lim, wsize)
+ log.info(f"... {len(peak_pos[idxs_pp,:])} signals after pile-up cut")
+
+ idxs_rt, rt_ll, rt_hh = is_valid_risetime(risetime, rt_low, perc)
+ log.info(f"... {len(peak_pos[idxs_rt,:])} signals after risetime cut")
+
+ idxs = idxs_ct & idxs_pp & idxs_rt
+ sel_dict = {
+ "idxs": idxs,
+ "ct_ll": ct_ll,
+ "ct_hh": ct_hh,
+ "pp_ll": pp_ll,
+ "pp_hh": pp_hh,
+ "rt_ll": rt_ll,
+ "rt_hh": rt_hh,
+ }
+ return sel_dict
+
+
+def noise_matrix(bls: np.array, length: int) -> np.array:
+ nev, size = bls.shape
+ ref = np.mean(bls, axis=0)
+ offset = np.mean(ref)
+ bls = bls - offset
+ nmat = np.matmul(bls.T, bls, dtype=float) / nev
+ kernel = np.identity(size - length + 1)
+ nmat = convolve2d(nmat, kernel, boundary="symm", mode="valid") / (size - length + 1)
+ return nmat
+
+
+def signal_matrices(
+ wfs: np.array, length: int, decay_const: float, ff: int = 2
+) -> np.array:
+ nev, size = wfs.shape
+ lo = size // 2 - 100
+ flo = size // 2 - length // 2
+ fhi = size // 2 + length // 2
+ offsets = np.mean(wfs[:, :lo], axis=1)
+ wfs = wfs - offsets[:, np.newaxis]
+
+ # Reference signal
+ ref = np.sum(wfs, axis=0)
+ ref /= np.max(ref)
+ rmat = np.outer(ref[flo:fhi], ref[flo:fhi])
+
+ # Pile-up matrix
+ if decay_const > 0:
+ decay = np.exp(-np.arange(length) / decay_const)
+ else:
+ decay = np.zeros(length)
+ pmat = np.outer(decay, decay)
+
+ # Flat top matrix
+ flo -= ff // 2
+ fhi += ff // 2
+ wfs = wfs[:, flo:fhi]
+ fmat = np.matmul(wfs.T, wfs, dtype=float) / nev
+ m1 = ((1, -1), (-1, 1))
+ fmat = convolve2d(fmat, m1, boundary="symm", mode="valid")
+ if ff > 0:
+ fmat = convolve2d(fmat, np.identity(ff), boundary="symm", mode="valid") / ff
+ return ref, rmat, pmat, fmat
+
+
+def filter_synthesis(
+ ref: np.array,
+ nmat: np.array,
+ rmat: np.array,
+ za: int,
+ pmat: np.array,
+ fmat: np.array,
+ length: int,
+ size: int,
+ flip: bool = True,
+) -> np.array:
+ mat = nmat + rmat + za * np.ones([length, length]) + pmat + fmat
+ flo = (size // 2) - (length // 2)
+ fhi = (size // 2) + (length // 2)
+ x = np.linalg.solve(mat, ref[flo:fhi]).astype(np.float32)
+ y = convolve(ref, np.flip(x), mode="valid")
+ maxy = np.max(y)
+ x /= maxy
+ y /= maxy
+ refy = ref[(size // 2) - (len(y) // 2) : (size // 2) + (len(y) // 2)]
+ if flip:
+ return np.flip(x), y, refy
+ else:
+ return x, y, refy
diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py
index 6fa2727c2..e3526c63a 100644
--- a/src/pygama/pargen/ecal_th.py
+++ b/src/pygama/pargen/ecal_th.py
@@ -15,7 +15,7 @@
from scipy.stats import binned_statistic
mpl.use("agg")
-import lgdo.lh5_store as lh5
+import lgdo.lh5 as lh5
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
@@ -25,9 +25,8 @@
from matplotlib.colors import LogNorm
from scipy.optimize import curve_fit
-import pygama.math.binned_fitting as pgbf
-import pygama.math.distributions as pgd
import pygama.math.histogram as pgh
+import pygama.math.peak_fitting as pgf
import pygama.pargen.cuts as cts
import pygama.pargen.energy_cal as cal
from pygama.pargen.utils import load_data, return_nans
@@ -143,24 +142,24 @@ class calibrate_parameter:
(60, 60),
] # side bands width
funcs = [
- # pgd.gauss_on_step.pdf_ext,
- pgd.hpge_peak.pdf_ext,
- pgd.hpge_peak.pdf_ext,
- pgd.hpge_peak.pdf_ext,
- pgd.gauss_on_step.pdf_ext,
- pgd.gauss_on_step.pdf_ext,
- pgd.gauss_on_step.pdf_ext,
- pgd.hpge_peak.pdf_ext,
+ # pgf.extended_gauss_step_pdf,
+ pgf.extended_radford_pdf,
+ pgf.extended_radford_pdf,
+ pgf.extended_radford_pdf,
+ pgf.extended_gauss_step_pdf,
+ pgf.extended_gauss_step_pdf,
+ pgf.extended_gauss_step_pdf,
+ pgf.extended_radford_pdf,
]
gof_funcs = [
- # pgd.gauss_on_step.get_pdf,
- pgd.hpge_peak.get_pdf,
- pgd.hpge_peak.get_pdf,
- pgd.hpge_peak.get_pdf,
- pgd.gauss_on_step.get_pdf,
- pgd.gauss_on_step.get_pdf,
- pgd.gauss_on_step.get_pdf,
- pgd.gauss_on_step.get_pdf,
+ # pgf.gauss_step_pdf,
+ pgf.radford_pdf,
+ pgf.radford_pdf,
+ pgf.radford_pdf,
+ pgf.gauss_step_pdf,
+ pgf.gauss_step_pdf,
+ pgf.gauss_step_pdf,
+ pgf.radford_pdf,
]
def __init__(
@@ -192,9 +191,6 @@ def __init__(
self.simplex = simplex
self.tail_weight = tail_weight
- self.output_dict = {}
- self.hit_dict = {}
-
def fit_energy_res(self):
fitted_peaks = self.results["fitted_keV"]
fwhms = self.results["pk_fwhms"][:, 0]
@@ -216,7 +212,7 @@ def fit_energy_res(self):
indexes.append(i)
continue
elif peak == 511.0:
- log.info(f"e annhilation found at index {i}")
+ log.info(f"e annihilation found at index {i}")
indexes.append(i)
continue
elif np.isnan(dfwhms[i]):
@@ -492,10 +488,10 @@ def calibrate_parameter(self, data):
for i, peak in enumerate(self.results["got_peaks_keV"]):
idx = np.where(peak == self.glines)[0][0]
self.funcs[idx] = fitted_funcs[i]
- if fitted_funcs[i] == pgd.hpge_peak.pdf_ext:
- self.gof_funcs[idx] = pgd.hpge_peak.get_pdf
+ if fitted_funcs[i] == pgf.extended_radford_pdf:
+ self.gof_funcs[idx] = pgf.radford_pdf
else:
- self.gof_funcs[idx] = pgd.gauss_on_step.get_pdf
+ self.gof_funcs[idx] = pgf.gauss_step_pdf
except:
found_peaks = np.array([])
fitted_peaks = np.array([])
@@ -542,10 +538,10 @@ def calibrate_parameter(self, data):
for i, peak in enumerate(self.results["got_peaks_keV"]):
idx = np.where(peak == self.glines)[0][0]
self.funcs[idx] = fitted_funcs[i]
- if fitted_funcs[i] == pgd.hpge_peak.pdf_ext:
- self.gof_funcs[idx] = pgd.hpge_peak.get_pdf
+ if fitted_funcs[i] == pgf.extended_radford_pdf:
+ self.gof_funcs[idx] = pgf.radford_pdf
else:
- self.gof_funcs[idx] = pgd.gauss_on_step.get_pdf
+ self.gof_funcs[idx] = pgf.gauss_step_pdf
if self.pars is None:
raise ValueError
@@ -559,10 +555,8 @@ def calibrate_parameter(self, data):
log.info(f"Calibration pars are {self.pars}")
if ~np.isnan(self.pars).all():
self.fit_energy_res()
- self.hit_dict[self.cal_energy_param] = self.gen_pars_dict()
- data[f"{self.energy_param}_cal"] = pgd.nb_poly(
- data[self.energy_param], self.pars
- )
+ self.hit_dict = {self.cal_energy_param: self.gen_pars_dict()}
+ data[self.cal_energy_param] = pgf.poly(data[self.energy_param], self.pars)
def fill_plot_dict(self, data, plot_dict={}):
for key, item in self.plot_options.items():
@@ -632,43 +626,42 @@ class high_stats_fitting(calibrate_parameter):
0.2,
]
funcs = [
- pgd.gauss_on_step.pdf_ext, # probably should be gauss on exp
- pgd.gauss_on_step.pdf_ext,
- pgd.hpge_peak.pdf_ext,
- pgd.hpge_peak.pdf_ext,
- pgd.gauss_on_step.pdf_ext,
- pgd.gauss_on_step.pdf_ext,
- pgd.hpge_peak.pdf_ext,
- pgd.gauss_on_step.pdf_ext,
- pgd.gauss_on_step.pdf_ext,
- pgd.gauss_on_step.pdf_ext,
- pgd.hpge_peak.pdf_ext,
- pgd.hpge_peak.pdf_ext,
- pgd.hpge_peak.pdf_ext,
- pgd.hpge_peak.pdf_ext,
- pgd.gauss_on_step.pdf_ext,
- pgd.gauss_on_step.pdf_ext,
- pgd.gauss_on_step.pdf_ext,
+ pgf.extended_gauss_step_pdf, # probably should be gauss on exp
+ pgf.extended_gauss_step_pdf,
+ pgf.extended_radford_pdf,
+ pgf.extended_radford_pdf,
+ pgf.extended_gauss_step_pdf,
+ pgf.extended_gauss_step_pdf,
+ pgf.extended_radford_pdf,
+ pgf.extended_gauss_step_pdf,
+ pgf.extended_gauss_step_pdf,
+ pgf.extended_gauss_step_pdf,
+ pgf.extended_radford_pdf,
+ pgf.extended_radford_pdf,
+ pgf.extended_radford_pdf,
+ pgf.extended_radford_pdf,
+ pgf.extended_gauss_step_pdf,
+ pgf.extended_gauss_step_pdf,
+ pgf.extended_gauss_step_pdf,
]
gof_funcs = [
- pgd.gauss_on_step.get_pdf,
- pgd.gauss_on_step.get_pdf,
- pgd.hpge_peak.get_pdf,
- pgd.hpge_peak.get_pdf,
- pgd.gauss_on_step.get_pdf,
- pgd.gauss_on_step.get_pdf,
- pgd.gauss_on_step.get_pdf,
- pgd.hpge_peak.get_pdf,
- pgd.gauss_on_step.get_pdf,
- pgd.gauss_on_step.get_pdf,
- pgd.gauss_on_step.get_pdf,
- pgd.hpge_peak.get_pdf,
- pgd.hpge_peak.get_pdf,
- pgd.hpge_peak.get_pdf,
- pgd.hpge_peak.get_pdf,
- pgd.gauss_on_step.get_pdf,
- pgd.gauss_on_step.get_pdf,
- pgd.gauss_on_step.get_pdf,
+ pgf.gauss_step_pdf,
+ pgf.gauss_step_pdf,
+ pgf.radford_pdf,
+ pgf.radford_pdf,
+ pgf.gauss_step_pdf,
+ pgf.gauss_step_pdf,
+ pgf.radford_pdf,
+ pgf.gauss_step_pdf,
+ pgf.gauss_step_pdf,
+ pgf.gauss_step_pdf,
+ pgf.radford_pdf,
+ pgf.radford_pdf,
+ pgf.radford_pdf,
+ pgf.radford_pdf,
+ pgf.gauss_step_pdf,
+ pgf.gauss_step_pdf,
+ pgf.gauss_step_pdf,
]
def __init__(
@@ -680,9 +673,15 @@ def __init__(
plot_options={},
simplex=False,
tail_weight=20,
+ cal_energy_param=None,
+ deg=2,
+ fixed=None,
):
self.energy_param = energy_param
- self.cal_energy_param = energy_param
+ if cal_energy_param is None:
+ self.cal_energy_param = energy_param
+ else:
+ self.cal_energy_param = cal_energy_param
self.selection_string = selection_string
self.threshold = threshold
self.p_val = p_val
@@ -694,6 +693,8 @@ def __init__(
self.output_dict = {}
self.pars = [1, 0]
self.tail_weight = tail_weight
+ self.fixed = fixed
+ self.deg = deg
def get_results_dict(self, data):
if self.results:
@@ -736,74 +737,101 @@ def get_results_dict(self, data):
else:
return {}
- def fit_peaks(self, data):
- log.debug(f"Fitting {self.energy_param}")
- try:
- n_bins = [
- int((self.range_keV[i][1] + self.range_keV[i][0]) / self.binning[i])
- for i in range(len(self.glines))
- ]
- (
- pk_pars,
- pk_errors,
- pk_covs,
- pk_binws,
- pk_ranges,
- pk_pvals,
- valid_pks,
- pk_funcs,
- ) = cal.hpge_fit_E_peaks(
- data.query(self.selection_string)[self.energy_param],
- self.glines,
- self.range_keV,
- n_bins=n_bins,
- funcs=self.funcs,
- method="unbinned",
- gof_funcs=self.gof_funcs,
- n_events=None,
- allowed_p_val=self.p_val,
- tail_weight=20,
- )
- for idx, peak in enumerate(self.glines):
- self.funcs[idx] = pk_funcs[idx]
- if pk_funcs[idx] == pgd.hpge_peak.pdf_ext:
- self.gof_funcs[idx] = pgd.hpge_peak.get_pdf
- else:
- self.gof_funcs[idx] = pgd.gauss_on_step.get_pdf
+ def run_fit(self, data):
+ hist, bins, var = pgh.get_hist(
+ data.query(self.selection_string)[self.energy_param],
+ range=(np.amin(self.glines) * 0.8, np.amax(self.glines) * 1.1),
+ dx=0.5,
+ )
+ (got_peak_locations, got_peak_energies, roughpars) = cal.hpge_get_E_peaks(
+ hist, bins, var, np.array([1, 0]), n_sigma=3, peaks_keV=self.glines
+ )
- self.results["got_peaks_keV"] = self.glines
- self.results["pk_pars"] = pk_pars
- self.results["pk_errors"] = pk_errors
- self.results["pk_covs"] = pk_covs
- self.results["pk_binws"] = pk_binws
- self.results["pk_ranges"] = pk_ranges
- self.results["pk_pvals"] = pk_pvals
+ found_mask = np.in1d(self.glines, got_peak_energies)
+ self.results["got_peaks_locs"] = got_peak_locations
+ self.results["got_peaks_keV"] = got_peak_energies
- for i, pk in enumerate(self.results["got_peaks_keV"]):
- try:
- if self.results["pk_pars"][i]["n_sig"] < 10:
- valid_pks[i] = False
- elif (
- 2 * self.results["pk_errors"][i]["n_sig"]
- > self.results["pk_pars"][i]["n_sig"]
- ):
- valid_pks[i] = False
- except:
- pass
+ log.info(f"{len(got_peak_energies)} peaks obtained:")
+ log.info(f"\t Energy | Position ")
+ for i, (Li, Ei) in enumerate(zip(got_peak_locations, got_peak_energies)):
+ log.info(f"\t{i}".ljust(4) + str(Ei).ljust(9) + f"| {Li:g}".ljust(5))
- self.results["pk_validities"] = valid_pks
+ self.glines = np.array(self.glines)[found_mask].tolist()
+ self.range_keV = np.array(self.range_keV)[found_mask].tolist()
+ self.binning = np.array(self.binning)[found_mask].tolist()
+ self.funcs = np.array(self.funcs)[found_mask].tolist()
+ self.gof_funcs = np.array(self.gof_funcs)[found_mask].tolist()
- # Drop failed fits
- fitted_peaks_keV = self.results["fitted_keV"] = np.asarray(self.glines)[
+ n_bins = [
+ int((self.range_keV[i][1] + self.range_keV[i][0]) / self.binning[i])
+ for i in range(len(self.glines))
+ ]
+ (
+ pk_pars,
+ pk_errors,
+ pk_covs,
+ pk_binws,
+ pk_ranges,
+ pk_pvals,
+ valid_pks,
+ pk_funcs,
+ ) = cal.hpge_fit_E_peaks(
+ data.query(self.selection_string)[self.energy_param],
+ self.glines,
+ self.range_keV,
+ n_bins=n_bins,
+ funcs=self.funcs,
+ method="unbinned",
+ gof_funcs=self.gof_funcs,
+ n_events=None,
+ allowed_p_val=self.p_val,
+ tail_weight=20,
+ )
+ for idx, peak in enumerate(self.glines):
+ self.funcs[idx] = pk_funcs[idx]
+ if pk_funcs[idx] == pgf.extended_radford_pdf:
+ self.gof_funcs[idx] = pgf.radford_pdf
+ else:
+ self.gof_funcs[idx] = pgf.gauss_step_pdf
+
+ self.results["got_peaks_keV"] = self.glines
+ self.results["pk_pars"] = pk_pars
+ self.results["pk_errors"] = pk_errors
+ self.results["pk_covs"] = pk_covs
+ self.results["pk_binws"] = pk_binws
+ self.results["pk_ranges"] = pk_ranges
+ self.results["pk_pvals"] = pk_pvals
+
+ for i, pk in enumerate(self.results["got_peaks_keV"]):
+ try:
+ if self.results["pk_pars"][i]["n_sig"] < 10:
+ valid_pks[i] = False
+ elif (
+ 2 * self.results["pk_errors"][i]["n_sig"]
+ > self.results["pk_pars"][i]["n_sig"]
+ ):
+ valid_pks[i] = False
+ except:
+ pass
+
+ self.results["pk_validities"] = valid_pks
+
+ # Drop failed fits
+ self.results["fitted_keV"] = np.asarray(self.glines)[valid_pks]
+
+ def fit_peaks(self, data):
+ log.debug(f"Fitting {self.energy_param}")
+ try:
+ self.run_fit(data)
+
+ valid_pks = self.results["pk_validities"]
+ fitted_peaks_keV = self.results["fitted_keV"]
+ pk_pars = np.asarray(self.results["pk_pars"], dtype=object)[
valid_pks
- ]
- pk_pars = np.asarray(pk_pars, dtype=object)[valid_pks] # ragged
- pk_errors = np.asarray(pk_errors, dtype=object)[valid_pks]
- pk_covs = np.asarray(pk_covs, dtype=object)[valid_pks]
- pk_binws = np.asarray(pk_binws)[valid_pks]
- pk_ranges = np.asarray(pk_ranges)[valid_pks]
- pk_pvals = np.asarray(pk_pvals)[valid_pks]
- pk_funcs = np.asarray(pk_funcs)[valid_pks]
+ ] # ragged
+ pk_errors = np.asarray(self.results["pk_errors"], dtype=object)[valid_pks]
+ pk_covs = np.asarray(self.results["pk_covs"], dtype=object)[valid_pks]
+ pk_funcs = np.asarray(self.funcs)[valid_pks]
log.info(f"{len(np.where(valid_pks)[0])} peaks fitted:")
for i, (Ei, parsi, errorsi, covsi, func_i) in enumerate(
@@ -813,16 +841,14 @@ def fit_peaks(self, data):
parsi = np.asarray(parsi, dtype=float)
errorsi = np.asarray(errorsi, dtype=float)
covsi = np.asarray(covsi, dtype=float)
- # parsigsi = np.sqrt(covsi.diagonal())
+
log.info(f"\tEnergy: {str(Ei)}")
log.info(f"\t\tParameter | Value +/- Sigma ")
for vari, pari, errorsi in zip(varnames, parsi, errorsi):
- log.info(
- f'\t\t{str(vari).ljust(10)} | {("%4.2f" % pari).rjust(8)} +/- {("%4.2f" % errorsi).ljust(8)}'
- )
+ log.info(f"\t\t{str(vari):<12} | {pari: 8.2f} +/- {errorsi:.2f}")
cal_fwhms = [
- func_i.get_fwhm(pars_i, cov=covs_i)
+ pgf.get_fwhm_func(func_i, pars_i, cov=covs_i)
for func_i, pars_i, covs_i in zip(pk_funcs, pk_pars, pk_covs)
]
@@ -834,14 +860,12 @@ def fit_peaks(self, data):
)
log.info(f"{len(cal_fwhms)} FWHMs found:")
- log.info(f"\t Energy | FWHM ")
+ log.info(f"\t{'Energy':>10}{'| FWHM':>9}")
for i, (Ei, fwhm, fwhme) in enumerate(
zip(fitted_peaks_keV, cal_fwhms, cal_fwhms_errs)
):
log.info(
- f"\t{i}".ljust(4)
- + str(Ei).ljust(9)
- + f"| {fwhm:.2f}+-{fwhme:.2f} keV".ljust(5)
+ f"\t{str(i):<4}{str(Ei):<9}| {f'{fwhm:.2f}+-{fwhme:.2f}':<10} keV"
)
self.fit_energy_res()
log.debug(f"high stats fitting successful")
@@ -849,6 +873,85 @@ def fit_peaks(self, data):
self.results = {}
log.debug(f"high stats fitting failed")
+ def update_calibration(self, data):
+ log.debug(f"Calibrating {self.energy_param}")
+ self.run_fit(data)
+
+ valid_pks = self.results["pk_validities"]
+ fitted_peaks_keV = self.results["fitted_keV"]
+ pk_pars = np.asarray(self.results["pk_pars"], dtype=object)[valid_pks] # ragged
+ pk_errors = np.asarray(self.results["pk_errors"], dtype=object)[valid_pks]
+ pk_covs = np.asarray(self.results["pk_covs"], dtype=object)[valid_pks]
+ pk_funcs = np.asarray(self.funcs)[valid_pks]
+
+ log.info(f"{len(np.where(valid_pks)[0])} peaks fitted:")
+ for i, (Ei, parsi, errorsi, covsi, func_i) in enumerate(
+ zip(fitted_peaks_keV, pk_pars, pk_errors, pk_covs, pk_funcs)
+ ):
+ varnames = func_i.__code__.co_varnames[1 : len(pk_pars[-1]) + 1]
+ parsi = np.asarray(parsi, dtype=float)
+ errorsi = np.asarray(errorsi, dtype=float)
+ covsi = np.asarray(covsi, dtype=float)
+ # parsigsi = np.sqrt(covsi.diagonal())
+ log.info(f"\tEnergy: {str(Ei)}")
+ log.info(f"\t\tParameter | Value +/- Sigma ")
+ for vari, pari, errorsi in zip(varnames, parsi, errorsi):
+ log.info(f"\t\t{str(vari):<12} | {pari: 8.2f} +/- {errorsi:.2f}")
+ # Drop failed fits
+
+ mus = [
+ pgf.get_mu_func(func_i, pars_i, errors=errors_i)
+ for func_i, pars_i, errors_i in zip(pk_funcs, pk_pars, pk_errors)
+ ]
+ mus, mu_vars = zip(*mus)
+ mus = np.asarray(mus)
+ mu_errs = np.asarray(mu_vars)
+ mu_vars = np.asarray(mu_vars) ** 2
+
+ try:
+ pars, errs, cov = cal.hpge_fit_E_scale(
+ mus, mu_vars, fitted_peaks_keV, deg=self.deg, fixed=self.fixed
+ )
+ except ValueError:
+ log.error("Failed to fit enough peaks to get accurate calibration")
+ return None, None, None, results
+
+ # Invert the E scale fit to get a calibration function
+ self.pars, self.errs, self.cov = cal.hpge_fit_E_cal_func(
+ mus, mu_vars, fitted_peaks_keV, pars, deg=self.deg, fixed=self.fixed
+ )
+
+ uncal_fwhms = [
+ pgf.get_fwhm_func(func_i, pars_i, cov=covs_i)
+ for func_i, pars_i, covs_i in zip(pk_funcs, pk_pars, pk_covs)
+ ]
+ uncal_fwhms, uncal_fwhm_errs = zip(*uncal_fwhms)
+ uncal_fwhms = np.asarray(uncal_fwhms)
+ uncal_fwhm_errs = np.asarray(uncal_fwhm_errs)
+ derco = np.polyder(np.poly1d(pars)).coefficients
+ der = [pgf.poly(Ei, derco) for Ei in fitted_peaks_keV]
+
+ cal_fwhms = uncal_fwhms * der
+ cal_fwhms_errs = uncal_fwhm_errs * der
+ self.results["pk_fwhms"] = np.asarray(
+ [(u * d, e * d) for u, e, d in zip(uncal_fwhms, uncal_fwhm_errs, der)]
+ )
+
+ log.info(f"{len(cal_fwhms)} FWHMs found:")
+ log.info(f"\t{'Energy':>10}{'| FWHM':>9}")
+ for i, (Ei, fwhm, fwhme) in enumerate(
+ zip(fitted_peaks_keV, cal_fwhms, cal_fwhms_errs)
+ ):
+ log.info(f"\t{str(i):<4}{str(Ei):<9}| {f'{fwhm:.2f}+-{fwhme:.2f}':<10} keV")
+ self.fit_energy_res()
+ if self.cal_energy_param == self.energy_param:
+ log.info(
+ "Warning dataframe energy will be overwritten as cal energy and input energy have same name"
+ )
+ self.hit_dict = {self.cal_energy_param: self.gen_pars_dict()}
+ data[self.cal_energy_param] = pgf.poly(data[self.energy_param], self.pars)
+ log.debug(f"high stats calibration successful")
+
def get_peak_labels(
labels: list[str], pars: list[float]
@@ -859,7 +962,7 @@ def get_peak_labels(
if i % 2 == 1:
continue
else:
- out.append(f"{pgd.nb_poly(label, pars):.1f}")
+ out.append(f"{pgf.poly(label, pars):.1f}")
out_labels.append(label)
return out_labels, out
@@ -900,13 +1003,13 @@ def plot_fits(
fitted_gof_funcs.append(ecal_class.gof_funcs[i])
mus = [
- func_i.get_mu(pars_i) if pars_i is not None else np.nan
+ pgf.get_mu_func(func_i, pars_i) if pars_i is not None else np.nan
for func_i, pars_i in zip(fitted_gof_funcs, pk_pars)
]
fig = plt.figure()
derco = np.polyder(np.poly1d(ecal_class.pars)).coefficients
- der = [pgd.nb_poly(5, derco) for Ei in fitted_peaks]
+ der = [pgf.poly(5, derco) for Ei in fitted_peaks]
for i, peak in enumerate(mus):
range_adu = 5 / der[i]
plt.subplot(nrows, ncols, i + 1)
@@ -1142,22 +1245,26 @@ def bin_stability(ecal_class, data, time_slice=180, energy_range=[2585, 2660]):
def plot_cal_fit(ecal_class, data, figsize=[12, 8], fontsize=12, erange=[200, 2700]):
- pk_pars = ecal_class.results["pk_pars"]
+ valid_fits = ecal_class.results["pk_validities"]
+ pk_pars = ecal_class.results["pk_pars"][valid_fits]
+ pk_errs = ecal_class.results["pk_errors"][valid_fits]
fitted_peaks = ecal_class.results["got_peaks_keV"]
- pk_errs = ecal_class.results["pk_errors"]
fitted_gof_funcs = []
for i, peak in enumerate(ecal_class.glines):
if peak in fitted_peaks:
fitted_gof_funcs.append(ecal_class.gof_funcs[i])
+ fitted_gof_funcs = np.array(fitted_gof_funcs)[valid_fits]
+ fitted_peaks = np.array(fitted_peaks)[valid_fits]
+
mus = [
- func_i.get_mu(pars_i) if pars_i is not None else np.nan
+ pgf.get_mu_func(func_i, pars_i) if pars_i is not None else np.nan
for func_i, pars_i in zip(fitted_gof_funcs, pk_pars)
]
mu_errs = [
- func_i.get_mu(pars_i) if pars_i is not None else np.nan
+ pgf.get_mu_func(func_i, pars_i) if pars_i is not None else np.nan
for func_i, pars_i in zip(fitted_gof_funcs, pk_errs)
]
@@ -1172,16 +1279,16 @@ def plot_cal_fit(ecal_class, data, figsize=[12, 8], fontsize=12, erange=[200, 27
ax1.scatter(fitted_peaks, mus, marker="x", c="b")
- ax1.plot(pgd.nb_poly(cal_bins, ecal_class.pars), cal_bins, lw=1, c="g")
+ ax1.plot(pgf.poly(cal_bins, ecal_class.pars), cal_bins, lw=1, c="g")
ax1.grid()
ax1.set_xlim([erange[0], erange[1]])
ax1.set_ylabel("Energy (ADC)")
ax2.errorbar(
fitted_peaks,
- pgd.nb_poly(np.array(mus), ecal_class.pars) - fitted_peaks,
- yerr=pgd.nb_poly(np.array(mus) + np.array(mu_errs), ecal_class.pars)
- - pgd.nb_poly(np.array(mus), ecal_class.pars),
+ pgf.poly(np.array(mus), ecal_class.pars) - fitted_peaks,
+ yerr=pgf.poly(np.array(mus) + np.array(mu_errs), ecal_class.pars)
+ - pgf.poly(np.array(mus), ecal_class.pars),
linestyle=" ",
marker="x",
c="b",
@@ -1219,7 +1326,7 @@ def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsiz
indexes.append(i)
continue
elif peak == 511.0:
- log.info(f"e annhilation found at index {i}")
+ log.info(f"e annihilation found at index {i}")
indexes.append(i)
continue
else:
@@ -1230,115 +1337,124 @@ def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsiz
fig, (ax1, ax2) = plt.subplots(
2, 1, sharex=True, gridspec_kw={"height_ratios": [3, 1]}
)
- ax1.errorbar(fwhm_peaks, fit_fwhms, yerr=fit_dfwhms, marker="x", lw=0, c="black")
+ if len(np.where((~np.isnan(fit_fwhms)) & (~np.isnan(fit_dfwhms)))[0]) > 0:
+ ax1.errorbar(
+ fwhm_peaks, fit_fwhms, yerr=fit_dfwhms, marker="x", ls=" ", c="black"
+ )
- fwhm_slope_bins = np.arange(erange[0], erange[1], 10)
+ fwhm_slope_bins = np.arange(erange[0], erange[1], 10)
- qbb_line_vx = [2039.0, 2039.0]
- qbb_line_vy = [
- 0.9
- * np.nanmin(
- fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"])
- ),
- np.nanmax(
+ qbb_line_vx = [2039.0, 2039.0]
+ qbb_line_vy = [
+ 0.9
+ * np.nanmin(
+ fwhm_linear.func(
+ fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"]
+ )
+ ),
+ np.nanmax(
+ [
+ ecal_class.fwhm_fit_linear["Qbb_fwhm_in_keV"],
+ ecal_class.fwhm_fit_quadratic["Qbb_fwhm_in_keV"],
+ ]
+ ),
+ ]
+ qbb_line_hx = [erange[0], 2039.0]
+
+ ax1.plot(
+ fwhm_slope_bins,
+ fwhm_linear.func(
+ fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"]
+ ),
+ lw=1,
+ c="g",
+ label=f'linear, Qbb fwhm: {ecal_class.fwhm_fit_linear["Qbb_fwhm_in_keV"]:1.2f} +- {ecal_class.fwhm_fit_linear["Qbb_fwhm_err_in_keV"]:1.2f} keV',
+ )
+ ax1.plot(
+ fwhm_slope_bins,
+ fwhm_quadratic.func(
+ fwhm_slope_bins, *ecal_class.fwhm_fit_quadratic["parameters"]
+ ),
+ lw=1,
+ c="b",
+ label=f'quadratic, Qbb fwhm: {ecal_class.fwhm_fit_quadratic["Qbb_fwhm_in_keV"]:1.2f} +- {ecal_class.fwhm_fit_quadratic["Qbb_fwhm_err_in_keV"]:1.2f} keV',
+ )
+ ax1.plot(
+ qbb_line_hx,
[
ecal_class.fwhm_fit_linear["Qbb_fwhm_in_keV"],
+ ecal_class.fwhm_fit_linear["Qbb_fwhm_in_keV"],
+ ],
+ lw=1,
+ c="r",
+ ls="--",
+ )
+ ax1.plot(
+ qbb_line_hx,
+ [
ecal_class.fwhm_fit_quadratic["Qbb_fwhm_in_keV"],
- ]
- ),
- ]
- qbb_line_hx = [erange[0], 2039.0]
-
- ax1.plot(
- fwhm_slope_bins,
- fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"]),
- lw=1,
- c="g",
- label=f'linear, Qbb fwhm: {ecal_class.fwhm_fit_linear["Qbb_fwhm_in_keV"]:1.2f} +- {ecal_class.fwhm_fit_linear["Qbb_fwhm_err_in_keV"]:1.2f} keV',
- )
- ax1.plot(
- fwhm_slope_bins,
- fwhm_quadratic.func(
- fwhm_slope_bins, *ecal_class.fwhm_fit_quadratic["parameters"]
- ),
- lw=1,
- c="b",
- label=f'quadratic, Qbb fwhm: {ecal_class.fwhm_fit_quadratic["Qbb_fwhm_in_keV"]:1.2f} +- {ecal_class.fwhm_fit_quadratic["Qbb_fwhm_err_in_keV"]:1.2f} keV',
- )
- ax1.plot(
- qbb_line_hx,
- [
- ecal_class.fwhm_fit_linear["Qbb_fwhm_in_keV"],
- ecal_class.fwhm_fit_linear["Qbb_fwhm_in_keV"],
- ],
- lw=1,
- c="r",
- ls="--",
- )
- ax1.plot(
- qbb_line_hx,
- [
- ecal_class.fwhm_fit_quadratic["Qbb_fwhm_in_keV"],
- ecal_class.fwhm_fit_quadratic["Qbb_fwhm_in_keV"],
- ],
- lw=1,
- c="r",
- ls="--",
- )
- ax1.plot(qbb_line_vx, qbb_line_vy, lw=1, c="r", ls="--")
+ ecal_class.fwhm_fit_quadratic["Qbb_fwhm_in_keV"],
+ ],
+ lw=1,
+ c="r",
+ ls="--",
+ )
+ ax1.plot(qbb_line_vx, qbb_line_vy, lw=1, c="r", ls="--")
- ax1.legend(loc="upper left", frameon=False)
- if np.isnan(ecal_class.fwhm_fit_linear["parameters"]).all():
- [
- 0.9 * np.nanmin(fit_fwhms),
- 1.1 * np.nanmax(fit_fwhms),
- ]
- else:
- ax1.set_ylim(
+ ax1.legend(loc="upper left", frameon=False)
+ if np.isnan(ecal_class.fwhm_fit_linear["parameters"]).all():
[
- 0.9
- * np.nanmin(
- fwhm_linear.func(
- fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"]
- )
- ),
- 1.1
- * np.nanmax(
- fwhm_linear.func(
- fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"]
- )
- ),
+ 0.9 * np.nanmin(fit_fwhms),
+ 1.1 * np.nanmax(fit_fwhms),
]
+ else:
+ ax1.set_ylim(
+ [
+ 0.9
+ * np.nanmin(
+ fwhm_linear.func(
+ fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"]
+ )
+ ),
+ 1.1
+ * np.nanmax(
+ fwhm_linear.func(
+ fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"]
+ )
+ ),
+ ]
+ )
+ ax1.set_xlim(erange)
+ ax1.set_ylabel("FWHM energy resolution (keV)")
+ ax2.plot(
+ fwhm_peaks,
+ (
+ fit_fwhms
+ - fwhm_linear.func(
+ fwhm_peaks, *ecal_class.fwhm_fit_linear["parameters"]
+ )
+ )
+ / fit_dfwhms,
+ lw=0,
+ marker="x",
+ c="g",
)
- ax1.set_xlim(erange)
- ax1.set_ylabel("FWHM energy resolution (keV)")
- ax2.plot(
- fwhm_peaks,
- (
- fit_fwhms
- - fwhm_linear.func(fwhm_peaks, *ecal_class.fwhm_fit_linear["parameters"])
- )
- / fit_dfwhms,
- lw=0,
- marker="x",
- c="g",
- )
- ax2.plot(
- fwhm_peaks,
- (
- fit_fwhms
- - fwhm_quadratic.func(
- fwhm_peaks, *ecal_class.fwhm_fit_quadratic["parameters"]
+ ax2.plot(
+ fwhm_peaks,
+ (
+ fit_fwhms
+ - fwhm_quadratic.func(
+ fwhm_peaks, *ecal_class.fwhm_fit_quadratic["parameters"]
+ )
)
+ / fit_dfwhms,
+ lw=0,
+ marker="x",
+ c="b",
)
- / fit_dfwhms,
- lw=0,
- marker="x",
- c="b",
- )
- ax2.plot(erange, [0, 0], color="black", lw=0.5)
- ax2.set_xlabel("Energy (keV)")
- ax2.set_ylabel("Normalised Residuals")
+ ax2.plot(erange, [0, 0], color="black", lw=0.5)
+ ax2.set_xlabel("Energy (keV)")
+ ax2.set_ylabel("Normalised Residuals")
plt.tight_layout()
plt.close()
return fig
diff --git a/src/pygama/pargen/energy_cal.py b/src/pygama/pargen/energy_cal.py
index 2418873bf..a9de8e6d3 100644
--- a/src/pygama/pargen/energy_cal.py
+++ b/src/pygama/pargen/energy_cal.py
@@ -15,9 +15,8 @@
from iminuit import Minuit, cost
from scipy.signal import find_peaks_cwt, medfilt
-import pygama.math.binned_fitting as pgbf
-import pygama.math.distributions as pgd
import pygama.math.histogram as pgh
+import pygama.math.peak_fitting as pgf
import pygama.math.utils as pgu
from pygama.pargen.utils import return_nans
@@ -160,7 +159,7 @@ def hpge_get_E_peaks(
imaxes = get_i_local_maxima(hist / np.sqrt(var), n_sigma)
# Keep maxes if they coincide with expected peaks
- test_peaks_keV = np.asarray([pgd.nb_poly(i, cal_pars) for i in bins[imaxes]])
+ test_peaks_keV = np.asarray([pgf.poly(i, cal_pars) for i in bins[imaxes]])
imatch = [abs(peaks_keV - i).min() < Etol_keV for i in test_peaks_keV]
got_peak_locations = bins[imaxes[imatch]]
@@ -230,7 +229,7 @@ def hpge_fit_E_peak_tops(
cov_list = []
for E_peak in peak_locs:
try:
- pars, cov = pgbf.gauss_mode_width_max(
+ pars, cov = pgf.gauss_mode_width_max(
hist,
bins,
var,
@@ -260,12 +259,12 @@ def get_hpge_E_peak_par_guess(hist, bins, var, func, mode_guess):
The function to be fit to the peak in the (windowed) hist
"""
if (
- func == pgd.gauss_on_step.get_cdf
- or func == pgd.gauss_on_step.get_pdf
- or func == pgd.gauss_on_step.pdf_ext
+ func == pgf.gauss_step_cdf
+ or func == pgf.gauss_step_pdf
+ or func == pgf.extended_gauss_step_pdf
):
# get mu and height from a gauss fit, also sigma as fallback
- pars, cov = pgbf.gauss_mode_width_max(
+ pars, cov = pgf.gauss_mode_width_max(
hist, bins, var, mode_guess=mode_guess, n_bins=10
)
bin_centres = pgh.get_bin_centers(bins)
@@ -321,7 +320,7 @@ def get_hpge_E_peak_par_guess(hist, bins, var, func, mode_guess):
hstep = step / (bg + np.mean(hist[:10]))
- parguess = [bins[0], bins[-1], n_sig, mu, sigma / 2, n_bkg, hstep]
+ parguess = [n_sig, mu, sigma / 2, n_bkg, hstep, bins[0], bins[-1], 0]
for i, guess in enumerate(parguess):
if np.isnan(guess):
parguess[i] = 0
@@ -329,12 +328,12 @@ def get_hpge_E_peak_par_guess(hist, bins, var, func, mode_guess):
return parguess
if (
- func == pgd.hpge_peak.get_cdf
- or func == pgd.hpge_peak.get_pdf
- or func == pgd.hpge_peak.pdf_ext
+ func == pgf.radford_cdf
+ or func == pgf.radford_pdf
+ or func == pgf.extended_radford_pdf
):
# guess mu, height
- pars, cov = pgbf.gauss_mode_width_max(
+ pars, cov = pgf.gauss_mode_width_max(
hist, bins, var, mode_guess=mode_guess, n_bins=10
)
bin_centres = pgh.get_bin_centers(bins)
@@ -396,7 +395,7 @@ def get_hpge_E_peak_par_guess(hist, bins, var, func, mode_guess):
)
n_bkg = np.sum(hist) - n_sig
- parguess = [bins[0], bins[-1], n_sig, mu, sigma, htail, tau, n_bkg, hstep]
+ parguess = [n_sig, mu, sigma, htail, tau, n_bkg, hstep, bins[0], bins[-1], 0]
for i, guess in enumerate(parguess):
if np.isnan(guess):
@@ -415,21 +414,21 @@ def get_hpge_E_fixed(func):
"""
if (
- func == pgd.gauss_on_step.get_cdf
- or func == pgd.gauss_on_step.get_pdf
- or func == pgd.gauss_on_step.pdf_ext
+ func == pgf.gauss_step_cdf
+ or func == pgf.gauss_step_pdf
+ or func == pgf.extended_gauss_step_pdf
):
- # pars are: x_lo, x_hi, n_sig, mu, sigma, n_bkg, hstep
- return [0, 1], np.array([False, False, True, True, True, True, True])
+ # pars are: n_sig, mu, sigma, n_bkg, hstep, components
+ return [5, 6, 7], np.array([True, True, True, True, True, False, False, False])
if (
- func == pgd.hpge_peak.get_cdf
- or func == pgd.hpge_peak.get_pdf
- or func == pgd.hpge_peak.pdf_ext
+ func == pgf.radford_cdf
+ or func == pgf.radford_pdf
+ or func == pgf.extended_radford_pdf
):
- # pars are: x_lo, x_hi, n_sig, mu, sigma, htail, tau, n_bkg, hstep
- return [0, 1], np.array(
- [False, False, True, True, True, True, True, True, True]
+ # pars are: n_sig, mu, sigma, htail,tau, n_bkg, hstep, components
+ return [7, 8, 9], np.array(
+ [True, True, True, True, True, True, True, False, False, False]
)
else:
@@ -440,35 +439,37 @@ def get_hpge_E_fixed(func):
def get_hpge_E_bounds(func, parguess):
if (
- func == pgd.hpge_peak.get_cdf
- or func == pgd.hpge_peak.get_pdf
- or func == pgd.hpge_peak.pdf_ext
+ func == pgf.radford_cdf
+ or func == pgf.radford_pdf
+ or func == pgf.extended_radford_pdf
):
return [
- (None, None),
- (None, None),
(0, None),
- (parguess[0], parguess[1]),
+ (parguess[-3], parguess[-2]),
(0, None),
(0, 1),
(None, None),
(0, None),
(-1, 1),
+ (None, None),
+ (None, None),
+ (None, None),
]
elif (
- func == pgd.gauss_on_step.get_cdf
- or func == pgd.gauss_on_step.get_pdf
- or func == pgd.gauss_on_step.pdf_ext
+ func == pgf.gauss_step_cdf
+ or func == pgf.gauss_step_pdf
+ or func == pgf.extended_gauss_step_pdf
):
return [
- (None, None),
- (None, None),
(0, None),
- (parguess[0], parguess[1]),
+ (parguess[-3], parguess[-2]),
(0, None),
(0, None),
(-1, 1),
+ (None, None),
+ (None, None),
+ (None, None),
]
else:
@@ -503,6 +504,7 @@ def __call__(
hstep,
lower_range,
upper_range,
+ components,
):
return self.tail_weight * np.log(htail + 0.1) # len(self.data)/
@@ -514,7 +516,7 @@ def staged_fit(
bounds = get_hpge_E_bounds(func_i, par_guesses)
fixed, mask = get_hpge_E_fixed(func_i)
- if func_i == pgd.hpge_peak.pdf_ext or func_i == pgd.hpge_peak.get_pdf:
+ if func_i == pgf.extended_radford_pdf or func_i == pgf.radford_pdf:
cost_func = cost.ExtendedUnbinnedNLL(energies, func_i) + tail_prior(
energies, func_i, tail_weight=tail_weight
)
@@ -533,8 +535,8 @@ def staged_fit(
m.migrad()
try:
# set htail to guess
- m.values["htail"] = par_guesses[5]
- m.values["tau"] = par_guesses[6]
+ m.values["htail"] = par_guesses[3]
+ m.values["tau"] = par_guesses[4]
m.fixed = False
for fix in fixed:
m.fixed[fix] = True
@@ -551,8 +553,8 @@ def staged_fit(
if valid_fit == False:
raise RuntimeError
except:
- func_i = pgd.gauss_on_step.pdf_ext
- gof_func_i = pgd.gauss_on_step.get_pdf
+ func_i = pgf.extended_gauss_step_pdf
+ gof_func_i = pgf.gauss_step_pdf
pars_i, errs_i, cov_i, func_i, gof_func_i, mask, valid_fit = staged_fit(
energies, hist, bins, var, func_i, gof_func_i, simplex, mode_guess
)
@@ -563,8 +565,8 @@ def staged_fit(
or m.values["htail"] < 2 * m.errors["htail"]
or np.isnan(m.values).any()
): # switch to stat test
- func_i = pgd.gauss_on_step.pdf_ext
- gof_func_i = pgd.gauss_on_step.get_pdf
+ func_i = pgf.extended_gauss_step_pdf
+ gof_func_i = pgf.gauss_step_pdf
pars_i, errs_i, cov_i, func_i, gof_func_i, mask, valid_fit = staged_fit(
energies, hist, bins, var, func_i, gof_func_i, simplex, mode_guess
)
@@ -596,7 +598,7 @@ def hpge_fit_E_peaks(
mode_guesses,
wwidths,
n_bins=50,
- funcs=pgd.gauss_on_step.get_cdf,
+ funcs=pgf.gauss_step_cdf,
method="unbinned",
gof_funcs=None,
n_events=None,
@@ -679,7 +681,7 @@ def hpge_fit_E_peaks(
hist, bins, var = pgh.get_hist(
energies, bins=n_bins_i, range=(Euc_min, Euc_max)
)
- if func_i == pgd.hpge_peak.pdf_ext or pgd.gauss_on_step.pdf_ext:
+ if func_i == pgf.extended_radford_pdf or pgf.extended_gauss_step_pdf:
(
pars_i,
errs_i,
@@ -699,7 +701,7 @@ def hpge_fit_E_peaks(
mode_guess,
tail_weight=tail_weight,
)
- if pars_i["n_sig"] < 20:
+ if pars_i["n_sig"] < 100:
valid_fit = False
else:
par_guesses = get_hpge_E_peak_par_guess(hist, bins, var, func_i)
@@ -722,7 +724,7 @@ def hpge_fit_E_peaks(
cov_i = m.covariance
valid_fit = m.valid
- csqr = pgbf.goodness_of_fit(
+ csqr = pgf.goodness_of_fit(
hist,
bins,
None,
@@ -739,7 +741,7 @@ def hpge_fit_E_peaks(
par_guesses = get_hpge_E_peak_par_guess(hist, bins, var, func_i)
bounds = get_hpge_E_bounds(func_i, par_guesses)
fixed, mask = get_hpge_E_fixed(func_i)
- pars_i, errs_i, cov_i = pgbf.fit_binned(
+ pars_i, errs_i, cov_i = pgf.fit_binned(
func_i,
hist,
bins,
@@ -753,7 +755,7 @@ def hpge_fit_E_peaks(
)
valid_fit = True
- csqr = pgbf.goodness_of_fit(
+ csqr = pgf.goodness_of_fit(
hist,
bins,
None,
@@ -771,7 +773,7 @@ def hpge_fit_E_peaks(
p_val = scipy.stats.chi2.sf(csqr[0], csqr[1] + len(np.where(mask)[0]))
- total_events = func_i.get_total_events(pars_i, errors=errs_i)
+ total_events = pgf.get_total_events_func(func_i, pars_i, errors=errs_i)
if (
sum(sum(c) if c is not None else 0 for c in cov_i[mask, :][:, mask])
== np.inf
@@ -837,10 +839,10 @@ def hpge_fit_E_peaks(
def poly_wrapper(x, *pars):
- return pgd.nb_poly(x, pars)
+ return pgf.poly(x, pars)
-def hpge_fit_E_scale(mus, mu_vars, Es_keV, deg=0):
+def hpge_fit_E_scale(mus, mu_vars, Es_keV, deg=0, fixed=None):
"""Find best fit of poly(E) = mus +/- sqrt(mu_vars)
Compare to hpge_fit_E_cal_func which fits for E = poly(mu)
@@ -855,7 +857,9 @@ def hpge_fit_E_scale(mus, mu_vars, Es_keV, deg=0):
deg : int
degree for energy scale fit. deg=0 corresponds to a simple scaling
mu = scale * E. Otherwise deg follows the definition in np.polyfit
-
+ fixed : dict
+ dict where keys are index of polyfit pars to fix and vals are the value
+ to fix at, can be None to fix at guess value
Returns
-------
pars : array
@@ -871,7 +875,16 @@ def hpge_fit_E_scale(mus, mu_vars, Es_keV, deg=0):
else:
poly_pars = np.polyfit(Es_keV, mus, deg=deg, w=1 / np.sqrt(mu_vars))
c = cost.LeastSquares(Es_keV, mus, np.sqrt(mu_vars), poly_wrapper)
+ if fixed is not None:
+ for idx, val in fixed.items():
+ if val is True or val is None:
+ pass
+ else:
+ poly_pars[idx] = val
m = Minuit(c, *poly_pars)
+ if fixed is not None:
+ for idx in list(fixed):
+ m.fixed[idx] = True
m.simplex()
m.migrad()
m.hesse()
@@ -881,7 +894,7 @@ def hpge_fit_E_scale(mus, mu_vars, Es_keV, deg=0):
return pars, errs, cov
-def hpge_fit_E_cal_func(mus, mu_vars, Es_keV, E_scale_pars, deg=0):
+def hpge_fit_E_cal_func(mus, mu_vars, Es_keV, E_scale_pars, deg=0, fixed=None):
"""Find best fit of E = poly(mus +/- sqrt(mu_vars))
This is an inversion of hpge_fit_E_scale.
E uncertainties are computed from mu_vars / dmu/dE where mu = poly(E) is the
@@ -901,6 +914,9 @@ def hpge_fit_E_cal_func(mus, mu_vars, Es_keV, E_scale_pars, deg=0):
deg : int
degree for energy scale fit. deg=0 corresponds to a simple scaling
mu = scale * E. Otherwise deg follows the definition in np.polyfit
+ fixed : dict
+ dict where keys are index of polyfit pars to fix and vals are the value
+ to fix at, can be None to fix at guess value
Returns
-------
@@ -921,8 +937,17 @@ def hpge_fit_E_cal_func(mus, mu_vars, Es_keV, E_scale_pars, deg=0):
dmudEs += E_scale_pars[n] * mus ** (len(E_scale_pars) - 2 - n)
E_weights = dmudEs * mu_vars
poly_pars = np.polyfit(mus, Es_keV, deg=deg, w=1 / E_weights)
+ if fixed is not None:
+ for idx, val in fixed.items():
+ if val is True or val is None:
+ pass
+ else:
+ poly_pars[idx] = val
c = cost.LeastSquares(mus, Es_keV, E_weights, poly_wrapper)
m = Minuit(c, *poly_pars)
+ if fixed is not None:
+ for idx in list(fixed):
+ m.fixed[idx] = True
m.simplex()
m.migrad()
m.hesse()
@@ -939,7 +964,7 @@ def hpge_E_calibration(
deg=0,
uncal_is_int=False,
range_keV=None,
- funcs=pgd.gauss_on_step.get_cdf,
+ funcs=pgf.gauss_step_cdf,
gof_funcs=None,
method="unbinned",
gof_func=None,
@@ -1115,18 +1140,18 @@ def hpge_E_calibration(
n_bins = 50
elif np.isscalar(range_keV):
derco = np.polyder(np.poly1d(roughpars)).coefficients
- der = [pgd.nb_poly(Ei, derco) for Ei in got_peaks_keV]
+ der = [pgf.poly(Ei, derco) for Ei in got_peaks_keV]
range_uncal = [float(range_keV) / d for d in der]
n_bins = [int(range_keV / 0.5 / d) for d in der]
elif isinstance(range_keV, tuple):
rangeleft_keV, rangeright_keV = range_keV
derco = np.polyder(np.poly1d(roughpars)).coefficients
- der = [pgd.nb_poly(Ei, derco) for Ei in got_peaks_keV]
+ der = [pgf.poly(Ei, derco) for Ei in got_peaks_keV]
range_uncal = [(rangeleft_keV / d, rangeright_keV / d) for d in der]
n_bins = [int(sum(range_keV) / 0.5 / d) for d in der]
elif isinstance(range_keV, list):
derco = np.polyder(np.poly1d(roughpars)).coefficients
- der = [pgd.nb_poly(Ei, derco) for Ei in got_peaks_keV]
+ der = [pgf.poly(Ei, derco) for Ei in got_peaks_keV]
range_uncal = [
(r[0] / d, r[1] / d) if isinstance(r, tuple) else r / d
for r, d in zip(range_keV, der)
@@ -1197,7 +1222,7 @@ def hpge_E_calibration(
# Do a second calibration to the results of the full peak fits
mus = [
- func_i.get_mu(pars_i, errors=errors_i)
+ pgf.get_mu_func(func_i, pars_i, errors=errors_i)
for func_i, pars_i, errors_i in zip(pk_funcs, pk_pars, pk_errors)
]
mus, mu_vars = zip(*mus)
@@ -1218,15 +1243,14 @@ def hpge_E_calibration(
# Finally, calculate fwhms in keV
uncal_fwhms = [
- func_i.get_fwhm(pars_i, cov=covs_i)
+ pgf.get_fwhm_func(func_i, pars_i, cov=covs_i)
for func_i, pars_i, covs_i in zip(pk_funcs, pk_pars, pk_covs)
]
-
uncal_fwhms, uncal_fwhm_errs = zip(*uncal_fwhms)
uncal_fwhms = np.asarray(uncal_fwhms)
uncal_fwhm_errs = np.asarray(uncal_fwhm_errs)
derco = np.polyder(np.poly1d(pars)).coefficients
- der = [pgd.nb_poly(Ei, derco) for Ei in fitted_peaks_keV]
+ der = [pgf.poly(Ei, derco) for Ei in fitted_peaks_keV]
cal_fwhms = uncal_fwhms * der
cal_fwhms_errs = uncal_fwhm_errs * der
@@ -1332,7 +1356,7 @@ def poly_match(xx, yy, deg=-1, rtol=1e-5, atol=1e-8):
pars_i = np.polyfit(xx_i, yy_i, deg)
polxx = np.zeros(len(yy_i))
xxn = np.ones(len(yy_i))
- polxx = pgd.nb_poly(xx_i, pars_i)
+ polxx = pgf.poly(xx_i, pars_i)
# by here we have the best polxx. Search for matches and store pars_i if
# its the best so far
@@ -1660,37 +1684,25 @@ def calibrate_tl208(energy_series, cal_peaks=None, plotFigure=None):
# if inp == "q": exit()
bounds = (
+ [0.9 * guess_e, 0.5 * guess_sigma, 0, 0, 0, 0, 0],
[
- bin_centers[0],
- bin_centers[-1],
- 0,
- 0.9 * guess_e,
- 0.5 * guess_sigma,
- 0,
- 0,
- 0,
- 0,
- ],
- [
- bin_centers[0],
- bin_centers[-1],
- 5 * guess_area,
1.1 * guess_e,
2 * guess_sigma,
+ 0.1,
0.75,
window_width_in_adc,
10,
- 0.1,
+ 5 * guess_area,
],
)
params = fit_binned(
- hpge_peak.get_pdf,
+ radford_peak,
peak_hist,
bin_centers,
- [guess_area, guess_e, guess_sigma, 0.7, 5, 0, 1e-3],
+ [guess_e, guess_sigma, 1e-3, 0.7, 5, 0, guess_area],
) # bounds=bounds)
- plt.plot(bin_centers, hpge_peak.get_pdf(bin_centers, *params), color="r")
+ plt.plot(bin_centers, radford_peak(bin_centers, *params), color="r")
# inp = input("q to quit...")
# if inp == "q": exit()
@@ -1722,7 +1734,7 @@ def calibrate_tl208(energy_series, cal_peaks=None, plotFigure=None):
ls="steps-mid",
color="k",
)
- fit = hpge_peak.get_pdf(bin_centers, *params)
+ fit = radford_peak(bin_centers, *params)
ax_peak.plot(
bin_centers * rough_kev_per_adc + rough_kev_offset, fit, color="b"
)
diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py
index 0af80684f..905d126f0 100644
--- a/src/pygama/pargen/energy_optimisation.py
+++ b/src/pygama/pargen/energy_optimisation.py
@@ -13,7 +13,7 @@
import sys
from collections import namedtuple
-import lgdo.lh5_store as lh5
+import lgdo.lh5 as lh5
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
@@ -23,13 +23,13 @@
from matplotlib.colors import LogNorm
from scipy.optimize import curve_fit, minimize
from scipy.stats import chisquare, norm
+from sklearn.exceptions import ConvergenceWarning
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel
+from sklearn.utils._testing import ignore_warnings
-import pygama.math.binned_fitting as pgbf
-import pygama.math.distributions as pgd
import pygama.math.histogram as pgh
-import pygama.math.hpge_peak_fitting as pghpf
+import pygama.math.peak_fitting as pgf
import pygama.pargen.cuts as cts
import pygama.pargen.dsp_optimize as opt
import pygama.pargen.energy_cal as pgc
@@ -70,8 +70,8 @@ def run_optimisation(
Number of events to run over
"""
grid = set_par_space(opt_config)
- waveforms = sto.read_object(f"/raw/{wf_field}", file, idx=cuts, n_rows=n_events)[0]
- baseline = sto.read_object("/raw/baseline", file, idx=cuts, n_rows=n_events)[0]
+ waveforms = sto.read(f"/raw/{wf_field}", file, idx=cuts, n_rows=n_events)[0]
+ baseline = sto.read("/raw/baseline", file, idx=cuts, n_rows=n_events)[0]
tb_data = lh5.Table(col_dict={f"{wf_field}": waveforms, "baseline": baseline})
return opt.run_grid(tb_data, dsp_config, grid, fom, db_dict, **fom_kwargs)
@@ -140,12 +140,8 @@ def form_dict(in_dict, length):
fom_kwargs = fom_kwargs["fom_kwargs"]
fom_kwargs = form_dict(fom_kwargs, len(grid))
sto = lh5.LH5Store()
- waveforms = sto.read_object(
- f"{lh5_path}/{wf_field}", file, idx=cuts, n_rows=n_events
- )[0]
- baseline = sto.read_object(f"{lh5_path}/baseline", file, idx=cuts, n_rows=n_events)[
- 0
- ]
+ waveforms = sto.read(f"{lh5_path}/{wf_field}", file, idx=cuts, n_rows=n_events)[0]
+ baseline = sto.read(f"{lh5_path}/baseline", file, idx=cuts, n_rows=n_events)[0]
tb_data = lh5.Table(col_dict={f"{wf_field}": waveforms, "baseline": baseline})
return opt.run_grid_multiprocess_parallel(
tb_data,
@@ -189,7 +185,7 @@ def simple_guess(hist, bins, var, func_i, fit_range):
"""
Simple guess for peak fitting
"""
- if func_i == pgd.hpge_peak.pdf_ext:
+ if func_i == pgf.extended_radford_pdf:
bin_cs = (bins[1:] + bins[:-1]) / 2
_, sigma, amp = pgh.get_gaussian_guess(hist, bins)
i_0 = np.nanargmax(hist)
@@ -206,8 +202,6 @@ def simple_guess(hist, bins, var, func_i, fit_range):
nsig_guess = np.sum(hist[i_0 - n_bins_range : i_0 + n_bins_range])
nbkg_guess = np.sum(hist) - nsig_guess
parguess = [
- fit_range[0],
- fit_range[1],
nsig_guess,
mu,
sigma,
@@ -215,10 +209,13 @@ def simple_guess(hist, bins, var, func_i, fit_range):
tau,
nbkg_guess,
hstep,
- ]
+ fit_range[0],
+ fit_range[1],
+ 0,
+ ] #
return parguess
- elif func_i == pgd.gauss_on_step.pdf_ext:
+ elif func_i == pgf.extended_gauss_step_pdf:
mu, sigma, amp = pgh.get_gaussian_guess(hist, bins)
i_0 = np.argmax(hist)
bg = np.mean(hist[-10:])
@@ -228,7 +225,7 @@ def simple_guess(hist, bins, var, func_i, fit_range):
n_bins_range = int((4 * sigma) // dx)
nsig_guess = np.sum(hist[i_0 - n_bins_range : i_0 + n_bins_range])
nbkg_guess = np.sum(hist) - nsig_guess
- return [fit_range[0], fit_range[1], nsig_guess, mu, sigma, nbkg_guess, hstep]
+ return [nsig_guess, mu, sigma, nbkg_guess, hstep, fit_range[0], fit_range[1], 0]
def unbinned_energy_fit(
@@ -255,24 +252,24 @@ def unbinned_energy_fit(
)
bin_cs1 = (bins[:-1] + bins[1:]) / 2
if guess is not None:
- x0 = [*guess[:-2], fit_range[0], fit_range[1]]
+ x0 = [*guess[:-2], fit_range[0], fit_range[1], False]
else:
- if func == pgd.hpge_peak.pdf_ext:
- x0 = simple_guess(hist1, bins, var, pgd.gauss_on_step.pdf_ext, fit_range)
+ if func == pgf.extended_radford_pdf:
+ x0 = simple_guess(hist1, bins, var, pgf.extended_gauss_step_pdf, fit_range)
if verbose:
print(x0)
- c = cost.ExtendedUnbinnedNLL(energy, pgd.gauss_on_step.pdf_ext)
+ c = cost.ExtendedUnbinnedNLL(energy, pgf.extended_gauss_step_pdf)
m = Minuit(c, *x0)
- m.fixed[:2] = True
+ m.fixed[-3:] = True
m.simplex().migrad()
m.hesse()
if guess is not None:
- x0_rad = [fit_range[0], fit_range[1], *guess[2:]]
+ x0_rad = [*guess[:-2], fit_range[0], fit_range[1], False]
else:
x0_rad = simple_guess(hist1, bins, var, func, fit_range)
- x0 = m.values[:5]
- x0 += x0_rad[5:7]
- x0 += m.values[5:]
+ x0 = m.values[:3]
+ x0 += x0_rad[3:5]
+ x0 += m.values[3:]
else:
x0 = simple_guess(hist1, bins, var, func, fit_range)
if verbose:
@@ -281,7 +278,7 @@ def unbinned_energy_fit(
m = Minuit(c, *x0)
if tol is not None:
m.tol = tol
- m.fixed[:2] = True
+ m.fixed[-3:] = True
m.migrad()
m.hesse()
@@ -293,17 +290,17 @@ def unbinned_energy_fit(
m.valid
# & m.accurate
& (~np.isnan(m.errors).any())
- & (~(np.array(m.errors[2:]) == 0).all())
+ & (~(np.array(m.errors[:-3]) == 0).all())
)
- cs = pgbf.goodness_of_fit(
- hist, bins, None, gof_func, m.values[2:], method="Pearson"
+ cs = pgf.goodness_of_fit(
+ hist, bins, None, gof_func, m.values[:-3], method="Pearson"
)
cs = cs[0] / cs[1]
m2 = Minuit(c, *x0)
if tol is not None:
m2.tol = tol
- m2.fixed[:2] = True
+ m2.fixed[-3:] = True
m2.simplex().migrad()
m2.hesse()
m2_fit = func(bin_cs1, *m2.values)[1]
@@ -311,16 +308,16 @@ def unbinned_energy_fit(
m2.valid
# & m2.accurate
& (~np.isnan(m.errors).any())
- & (~(np.array(m2.errors[2:]) == 0).all())
+ & (~(np.array(m2.errors[:-3]) == 0).all())
)
- cs2 = pgbf.goodness_of_fit(
- hist, bins, None, gof_func, m2.values[2:], method="Pearson"
+ cs2 = pgf.goodness_of_fit(
+ hist, bins, None, gof_func, m2.values[:-3], method="Pearson"
)
cs2 = cs2[0] / cs2[1]
- frac_errors1 = np.sum(np.abs(np.array(m.errors)[2:] / np.array(m.values)[2:]))
- frac_errors2 = np.sum(np.abs(np.array(m2.errors)[2:] / np.array(m2.values)[2:]))
+ frac_errors1 = np.sum(np.abs(np.array(m.errors)[:-3] / np.array(m.values)[:-3]))
+ frac_errors2 = np.sum(np.abs(np.array(m2.errors)[:-3] / np.array(m2.values)[:-3]))
if verbose:
print(m)
@@ -343,21 +340,21 @@ def unbinned_energy_fit(
m = Minuit(c, *x0)
if tol is not None:
m.tol = tol
- m.fixed[:2] = True
+ m.fixed[-3:] = True
m.limits = pgc.get_hpge_E_bounds(func)
m.simplex().simplex().migrad()
m.hesse()
if verbose:
print(m)
- cs = pgbf.goodness_of_fit(
- hist, bins, None, gof_func, m.values[:-2], method="Pearson"
+ cs = pgf.goodness_of_fit(
+ hist, bins, None, gof_func, m.values[:-3], method="Pearson"
)
cs = cs[0] / cs[1]
valid3 = (
m.valid
# & m.accurate
& (~np.isnan(m.errors).any())
- & (~(np.array(m.errors[2:]) == 0).all())
+ & (~(np.array(m.errors[:-3]) == 0).all())
)
if valid3 is False:
try:
@@ -365,7 +362,7 @@ def unbinned_energy_fit(
valid3 = (
m.valid
& (~np.isnan(m.errors).any())
- & (~(np.array(m.errors[2:]) == 0).all())
+ & (~(np.array(m.errors[:-3]) == 0).all())
)
except:
raise RuntimeError
@@ -377,25 +374,25 @@ def unbinned_energy_fit(
elif valid2 == False or cs * 1.05 < cs2:
pars = np.array(m.values)[:-1]
- errs = np.array(m.errors)[:-2]
+ errs = np.array(m.errors)[:-3]
cov = np.array(m.covariance)[:-1, :-1]
csqr = cs
elif valid1 == False or cs2 * 1.05 < cs:
pars = np.array(m2.values)[:-1]
- errs = np.array(m2.errors)[:-2]
+ errs = np.array(m2.errors)[:-3]
cov = np.array(m2.covariance)[:-1, :-1]
csqr = cs2
elif frac_errors1 < frac_errors2:
pars = np.array(m.values)[:-1]
- errs = np.array(m.errors)[:-2]
+ errs = np.array(m.errors)[:-3]
cov = np.array(m.covariance)[:-1, :-1]
csqr = cs
elif frac_errors1 > frac_errors2:
pars = np.array(m2.values)[:-1]
- errs = np.array(m2.errors)[:-2]
+ errs = np.array(m2.errors)[:-3]
cov = np.array(m2.covariance)[:-1, :-1]
csqr = cs2
@@ -482,16 +479,14 @@ def get_peak_fwhm_with_dt_corr(
guess=guess,
tol=tol,
)
- if func == pgd.hpge_peak.pdf_ext:
+ if func == pgf.extended_radford_pdf:
if energy_pars[3] < 1e-6 and energy_err[3] < 1e-6:
fwhm = energy_pars[2] * 2 * np.sqrt(2 * np.log(2))
fwhm_err = np.sqrt(cov[2][2]) * 2 * np.sqrt(2 * np.log(2))
else:
- fwhm = pghpf.hpge_peak_fwhm(
- energy_pars[2], energy_pars[3], energy_pars[4]
- )
+ fwhm = pgf.radford_fwhm(energy_pars[2], energy_pars[3], energy_pars[4])
- elif func == pgd.gauss_on_step.pdf_ext:
+ elif func == pgf.extended_gauss_step_pdf:
fwhm = energy_pars[2] * 2 * np.sqrt(2 * np.log(2))
fwhm_err = np.sqrt(cov[2][2]) * 2 * np.sqrt(2 * np.log(2))
@@ -509,18 +504,18 @@ def get_peak_fwhm_with_dt_corr(
yerr_boot = np.nanstd(y_max, axis=0)
- if func == pgd.hpge_peak.pdf_ext and not (
+ if func == pgf.extended_radford_pdf and not (
energy_pars[3] < 1e-6 and energy_err[3] < 1e-6
):
y_b = np.zeros(len(par_b))
for i, p in enumerate(par_b):
try:
- y_b[i] = pghpf.hpge_peak_fwhm(p[2], p[3], p[4]) #
+ y_b[i] = pgf.radford_fwhm(p[2], p[3], p[4]) #
except:
y_b[i] = np.nan
fwhm_err = np.nanstd(y_b, axis=0)
if fwhm_err == 0:
- fwhm, fwhm_err = pghpf.hpge_peak_fwhm(
+ fwhm, fwhm_err = pgf.radford_fwhm(
energy_pars[2],
energy_pars[3],
energy_pars[4],
@@ -897,15 +892,15 @@ def get_wf_indexes(sorted_indexs, n_events):
return out_list
-def index_data(data, indexes):
+def index_data(data, indexes, wf_field="waveform"):
new_baselines = lh5.Array(data["baseline"].nda[indexes])
- new_waveform_values = data["waveform"]["values"].nda[indexes]
- new_waveform_dts = data["waveform"]["dt"].nda[indexes]
- new_waveform_t0 = data["waveform"]["t0"].nda[indexes]
+ new_waveform_values = data[wf_field]["values"].nda[indexes]
+ new_waveform_dts = data[wf_field]["dt"].nda[indexes]
+ new_waveform_t0 = data[wf_field]["t0"].nda[indexes]
new_waveform = lh5.WaveformTable(
None, new_waveform_t0, "ns", new_waveform_dts, "ns", new_waveform_values
)
- new_data = lh5.Table(col_dict={"waveform": new_waveform, "baseline": new_baselines})
+ new_data = lh5.Table(col_dict={wf_field: new_waveform, "baseline": new_baselines})
return new_data
@@ -929,8 +924,9 @@ def event_selection(
if not isinstance(kev_widths, list):
kev_widths = [kev_widths]
- sto = lh5.LH5Store()
- df = lh5.load_dfs(raw_files, ["daqenergy", "timestamp"], lh5_path)
+ df = sto.read(lh5_path, raw_files, field_mask=["daqenergy", "timestamp"])[
+ 0
+ ].view_as("pd")
if pulser_mask is None:
pulser_props = cts.find_pulser_properties(df, energy="daqenergy")
@@ -1000,13 +996,7 @@ def event_selection(
idx_list = get_wf_indexes(sort_index, idx_list_lens)
idxs = np.array(sorted(np.concatenate(masks)))
- waveforms = sto.read_object(
- f"{lh5_path}/{wf_field}", raw_files, idx=idxs, n_rows=len(idxs)
- )[0]
- baseline = sto.read_object(
- f"{lh5_path}/baseline", raw_files, idx=idxs, n_rows=len(idxs)
- )[0]
- input_data = lh5.Table(col_dict={f"{wf_field}": waveforms, "baseline": baseline})
+ input_data = sto.read(f"{lh5_path}", raw_files, idx=idxs, n_rows=len(idxs))[0]
if isinstance(dsp_config, str):
with open(dsp_config) as r:
@@ -1025,6 +1015,7 @@ def event_selection(
ct_mask = cts.get_cut_indexes(tb_data, cut_dict)
final_events = []
+ out_events = []
for peak_idx in peak_idxs:
peak = peaks_keV[peak_idx]
kev_width = kev_widths[peak_idx]
@@ -1073,18 +1064,16 @@ def event_selection(
log.info(f"lower lim is :{e_lower_lim}, upper lim is {e_upper_lim}")
final_mask = (energy > e_lower_lim) & (energy < e_upper_lim)
final_events.append(peak_ids[final_mask][:n_events])
+ out_events.append(idxs[final_events[-1]])
log.info(f"{len(peak_ids[final_mask][:n_events])} passed selections for {peak}")
if len(peak_ids[final_mask]) < 0.5 * n_events:
log.warning("Less than half number of specified events found")
elif len(peak_ids[final_mask]) < 0.1 * n_events:
log.error("Less than 10% number of specified events found")
-
+ out_events = np.unique(np.concatenate(out_events))
sort_index = np.argsort(np.concatenate(final_events))
idx_list = get_wf_indexes(sort_index, [len(mask) for mask in final_events])
- idxs = np.array(sorted(np.concatenate(final_events)))
-
- final_data = index_data(input_data, idxs)
- return final_data, idx_list
+ return out_events, idx_list
def fwhm_slope(x, m0, m1, m2):
@@ -1395,6 +1384,7 @@ def get_first_point(self):
self.optimal_ei = None
return self.optimal_x, self.optimal_ei
+ @ignore_warnings(category=ConvergenceWarning)
def iterate_values(self):
nan_idxs = np.isnan(self.y_init)
self.gauss_pr.fit(self.x_init[~nan_idxs], np.array(self.y_init)[~nan_idxs])
@@ -1465,6 +1455,7 @@ def get_best_vals(self):
out_dict[name][parameter] = value_str
return out_dict
+ @ignore_warnings(category=ConvergenceWarning)
def plot(self, init_samples=None):
nan_idxs = np.isnan(self.y_init)
fail_idxs = np.isnan(self.yerr_init)
@@ -1571,6 +1562,7 @@ def plot(self, init_samples=None):
plt.close()
return fig
+ @ignore_warnings(category=ConvergenceWarning)
def plot_acq(self, init_samples=None):
nan_idxs = np.isnan(self.y_init)
self.gauss_pr.fit(self.x_init[~nan_idxs], np.array(self.y_init)[~nan_idxs])
diff --git a/src/pygama/pargen/extract_tau.py b/src/pygama/pargen/extract_tau.py
index a265fdb13..61e833994 100644
--- a/src/pygama/pargen/extract_tau.py
+++ b/src/pygama/pargen/extract_tau.py
@@ -15,17 +15,18 @@
mpl.use("agg")
import lgdo
-import lgdo.lh5_store as lh5
+import lgdo.lh5 as lh5
import matplotlib.pyplot as plt
import numpy as np
-import pygama.math.binned_fitting as pgbf
import pygama.math.histogram as pgh
+import pygama.math.peak_fitting as pgf
import pygama.pargen.cuts as cts
import pygama.pargen.dsp_optimize as opt
import pygama.pargen.energy_optimisation as om
log = logging.getLogger(__name__)
+sto = lh5.LH5Store()
def load_data(
@@ -36,8 +37,9 @@ def load_data(
threshold: int = 5000,
wf_field: str = "waveform",
) -> lgdo.Table:
- sto = lh5.LH5Store()
- df = lh5.load_dfs(raw_file, ["daqenergy", "timestamp"], lh5_path)
+ df = sto.read(lh5_path, raw_file, field_mask=["daqenergy", "timestamp"])[0].view_as(
+ "pd"
+ )
if pulser_mask is None:
pulser_props = cts.find_pulser_properties(df, energy="daqenergy")
@@ -61,12 +63,10 @@ def load_data(
cuts = np.where((df.daqenergy.values > threshold) & (~ids))[0]
- waveforms = sto.read_object(
- f"{lh5_path}/{wf_field}", raw_file, idx=cuts, n_rows=n_events
- )[0]
- baseline = sto.read_object(
- f"{lh5_path}/baseline", raw_file, idx=cuts, n_rows=n_events
- )[0]
+ waveforms = sto.read(f"{lh5_path}/{wf_field}", raw_file, idx=cuts, n_rows=n_events)[
+ 0
+ ]
+ baseline = sto.read(f"{lh5_path}/baseline", raw_file, idx=cuts, n_rows=n_events)[0]
tb_data = lh5.Table(col_dict={f"{wf_field}": waveforms, "baseline": baseline})
return tb_data
@@ -99,7 +99,7 @@ def get_decay_constant(
bin_centres = pgh.get_bin_centers(bins)
high_bin = bin_centres[np.argmax(counts)]
try:
- pars, cov = pgbf.gauss_mode_width_max(
+ pars, cov = pgf.gauss_mode_width_max(
counts,
bins,
n_bins=10,
@@ -144,8 +144,8 @@ def get_decay_constant(
)
axins.axvline(high_bin, color="red")
axins.set_xlim(bins[in_min], bins[in_max])
- labels = ax.get_xticklabels()
- ax.set_xticklabels(labels=labels, rotation=45)
+ ax.set_xticks(ax.get_xticks())
+ ax.set_xticklabels(labels=ax.get_xticklabels(), rotation=45)
out_plot_dict["slope"] = fig
if display > 1:
plt.show()
@@ -162,7 +162,7 @@ def fom_dpz(tb_data, verbosity=0, rand_arg=None):
max_idx = np.argmax(counts)
mu = start_bins[max_idx]
try:
- pars, cov = pgbf.gauss_mode_width_max(
+ pars, cov = pgf.gauss_mode_width_max(
counts,
start_bins,
mode_guess=mu,
diff --git a/src/pygama/pargen/lq_cal.py b/src/pygama/pargen/lq_cal.py
new file mode 100644
index 000000000..aa3c15806
--- /dev/null
+++ b/src/pygama/pargen/lq_cal.py
@@ -0,0 +1,949 @@
+from __future__ import annotations
+
+import json
+import logging
+import os
+import pathlib
+import re
+from datetime import datetime
+from typing import Callable
+
+import matplotlib as mpl
+
+mpl.use("agg")
+
+import lgdo.lh5_store as lh5
+import matplotlib.cm as cmx
+import matplotlib.colors as mcolors
+import matplotlib.dates as mdates
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from iminuit import Minuit, cost, util
+from matplotlib.backends.backend_pdf import PdfPages
+from matplotlib.colors import LogNorm
+from scipy.stats import linregress
+
+import pygama.math.histogram as pgh
+import pygama.math.peak_fitting as pgf
+import pygama.pargen.AoE_cal as aoe
+from pygama.pargen.utils import *
+
+log = logging.getLogger(__name__)
+
+
+def get_fit_range(lq: np.array) -> tuple(float, float):
+ """
+ Function for determining the fit range for a given distribution of lq values
+ """
+
+ # Get an initial guess of mu and sigma, use these values to determine our final fit range
+ left_guess = np.nanpercentile(lq, 1)
+ right_guess = np.nanpercentile(lq, 95)
+ test_range = (left_guess, right_guess)
+
+ hist, bins, _ = pgh.get_hist(lq, bins=100, range=test_range)
+ bin_centers = (bins[:-1] + bins[1:]) / 2
+ mu = bin_centers[np.argmax(hist)]
+ _, sigma, _ = pgh.get_gaussian_guess(hist, bins)
+
+ left_edge = mu - 2.5 * sigma
+ right_edge = mu + 2.5 * sigma
+ fit_range = (left_edge, right_edge)
+
+ return fit_range
+
+
+def get_lq_hist(
+ df: pd.DataFrame(),
+ lq_param: str,
+ cal_energy_param: str,
+ peak: float,
+ sidebands: bool = True,
+):
+ """
+ Function for getting a distribution of LQ values for a given peak. Returns a histogram of the
+ LQ distribution as well as an array of bin edges
+ """
+
+ if sidebands:
+ # Get a histogram of events in the peak using sideband subtraction
+ # Uses a 6 keV window, and the sideband is to the right of the peak
+ # Default option
+
+ peak_window = (df[cal_energy_param] < peak + 3) & (
+ df[cal_energy_param] > peak - 3
+ )
+ sideband_window = (df[cal_energy_param] < peak + 18) & (
+ df[cal_energy_param] > peak + 12
+ )
+
+ fit_range = get_fit_range(df[lq_param][peak_window])
+
+ sideband_hist, bins, _ = pgh.get_hist(
+ df[lq_param][sideband_window], bins=100, range=fit_range
+ )
+ dep_hist, _, _ = pgh.get_hist(
+ df[lq_param][peak_window], bins=100, range=fit_range
+ )
+ final_hist = dep_hist - sideband_hist
+ var = np.sqrt(np.add(sideband_hist, dep_hist))
+
+ return final_hist, bins, var
+
+ else:
+ # Return a histogram in a 5 keV range surrounding the specified peak
+ # Only use if peak statistics are low
+
+ peak_window = (df[cal_energy_param] < peak + 2.5) & (
+ df[cal_energy_param] > peak - 2.5
+ )
+ fit_range = get_fit_range(df[lq_param][peak_window])
+ dep_hist, bins, var = pgh.get_hist(
+ df[lq_param][peak_window], bins=100, range=fit_range
+ )
+
+ return dep_hist, bins, var
+
+
+def binned_lq_fit(
+ df: pd.DataFrame,
+ lq_param: str,
+ cal_energy_param: str,
+ peak: float,
+ cdf=pgf.gauss_cdf,
+ sidebands: bool = True,
+):
+ """Function for fitting a distribution of LQ values within a specified
+ energy peak. Fits a gaussian to the distribution
+
+ Parameters
+ ----------
+ df: pd.DataFrame()
+ Dataframe containing the data for fitting. Data must
+ contain the desired lq parameter and the calibrated
+ energy
+ lq_param: string
+ Name of the LQ parameter to fit
+ cal_energy_param: string
+ Name of the calibrated energy parameter of choice
+ peak: float
+ Energy value, in keV, of the peak who's LQ
+ distribution will be fit
+ cdf: callable
+ Function to be used for the binned fit
+ sidebands: bool
+ Whether or not to perform a sideband subtraction when
+ fitting the LQ distribution
+
+ Returns
+ -------
+ m1.values: array-like object
+ Resulting parameter values from the peak fit
+ m1.errors: array-like object
+ Resulting parameter errors from the peak fit
+ hist: array
+ Histogram that was used for the binned fit
+ bins: array
+ Array of bin edges used for the binned fit
+ """
+
+ hist, bins, var = get_lq_hist(df, lq_param, cal_energy_param, peak, sidebands)
+
+ # Temporary fix for negative bin counts
+ # TODO: Adjust fitting to handle negative bin counts
+ hist[hist < 0] = 0
+
+ bin_centers = (bins[:-1] + bins[1:]) / 2
+
+ mu = bin_centers[np.argmax(hist)]
+ _, sigma, _ = pgh.get_gaussian_guess(hist, bins)
+
+ c1 = cost.BinnedNLL(hist, bins, pgf.gauss_cdf, verbose=0)
+ m1 = Minuit(c1, mu, sigma)
+ m1.simplex().migrad()
+ m1.hesse()
+
+ return m1.values, m1.errors, hist, bins
+
+
+def fit_time_means(tstamps, means, reses):
+ out_dict = {}
+ current_tstamps = []
+ current_means = []
+ current_reses = []
+
+ # Temporary fix
+ # TODO: Create better method of measuring time stability
+ rolling_mean = means[np.where(~np.isnan(means))[0][0]]
+ # rolling_mean = means[
+ # np.where(
+ # (np.abs(np.diff(means)) < (0.4 * np.array(reses)[1:]))
+ # & (~np.isnan(np.abs(np.diff(means)) < (0.4 * np.array(reses)[1:])))
+ # )[0][0]
+ # ]
+ for i, tstamp in enumerate(tstamps):
+ if (
+ (
+ (np.abs(means[i] - rolling_mean) > 0.4 * reses[i])
+ and (np.abs(means[i] - rolling_mean) > rolling_mean * 0.5)
+ )
+ or np.isnan(means[i])
+ or np.isnan(reses[i])
+ ):
+ if i + 1 == len(means):
+ out_dict[tstamp] = np.nan
+ else:
+ if (np.abs(means[i + 1] - means[i]) < 0.4 * reses[i + 1]) and not (
+ np.isnan(means[i])
+ or np.isnan(means[i + 1])
+ or np.isnan(reses[i])
+ or np.isnan(reses[i + 1])
+ ):
+ for ts in current_tstamps:
+ out_dict[ts] = rolling_mean
+ rolling_mean = means[i]
+ current_means = [means[i]]
+ current_tstamps = [tstamp]
+ current_reses = [reses[i]]
+ else:
+ out_dict[tstamp] = np.nan
+ else:
+ current_tstamps.append(tstamp)
+ current_means.append(means[i])
+ current_reses.append(reses[i])
+ rolling_mean = np.average(
+ current_means, weights=1 / np.array(current_reses)
+ )
+ for tstamp in current_tstamps:
+ out_dict[tstamp] = rolling_mean
+ return out_dict
+
+
+class cal_lq:
+
+ """A class for calibrating the LQ parameter and determining the LQ cut value"""
+
+ def __init__(
+ self,
+ cal_dicts: dict,
+ cal_energy_param: str,
+ eres_func: callable,
+ cdf: callable = pgf.gauss_cdf,
+ selection_string: str = "is_valid_cal&is_not_pulser",
+ plot_options: dict = {},
+ ):
+ """
+ Parameters
+ ----------
+ cal_dicts: dict
+ A dictionary containing the hit-level operations to apply
+ to the data.
+ cal_energy_param: string
+ The calibrated energy parameter of choice
+ eres_function: callable
+ The energy resolutions function
+ cdf: callable
+ The CDF used for the binned fits
+ selection_string: string
+ A string of flags to apply the data when running the calibration
+ plot_options: dict
+ A dict containing the plot functions the user wants to run,
+ and any user options to provide those plot functions
+ """
+
+ self.cal_dicts = cal_dicts
+ self.cal_energy_param = cal_energy_param
+ self.eres_func = eres_func
+ self.cdf = cdf
+ self.selection_string = selection_string
+ self.plot_options = plot_options
+
+ def update_cal_dicts(self, update_dict):
+ if re.match(r"(\d{8})T(\d{6})Z", list(self.cal_dicts)[0]):
+ for tstamp in self.cal_dicts:
+ if tstamp in update_dict:
+ self.cal_dicts[tstamp].update(update_dict[tstamp])
+ else:
+ self.cal_dicts[tstamp].update(update_dict)
+ else:
+ self.cal_dicts.update(update_dict)
+
+ def lq_timecorr(self, df, lq_param, output_name="LQ_Timecorr", display=0):
+ """
+ Calculates the average LQ value for DEP events for each specified run
+ run_timestamp. Applies a time normalization based on the average LQ value
+ in the DEP across all run_timestamps.
+ """
+
+ log.info("Starting LQ time correction")
+ self.timecorr_df = pd.DataFrame(
+ columns=["run_timestamp", "mean", "mean_err", "res", "res_err"]
+ )
+ try:
+ if "run_timestamp" in df:
+ tstamps = sorted(np.unique(df["run_timestamp"]))
+ means = []
+ errors = []
+ reses = []
+ res_errs = []
+ final_tstamps = []
+ for tstamp, time_df in df.groupby("run_timestamp", sort=True):
+ try:
+ pars, errs, _, _ = binned_lq_fit(
+ time_df.query(f"{self.selection_string}"),
+ lq_param,
+ self.cal_energy_param,
+ peak=1592.5,
+ cdf=self.cdf,
+ sidebands=False,
+ )
+ self.timecorr_df = pd.concat(
+ [
+ self.timecorr_df,
+ pd.DataFrame(
+ [
+ {
+ "run_timestamp": tstamp,
+ "mean": pars["mu"],
+ "mean_err": errs["mu"],
+ "res": pars["sigma"] / pars["mu"],
+ "res_err": (pars["sigma"] / pars["mu"])
+ * np.sqrt(
+ errs["sigma"] / pars["sigma"]
+ + errs["mu"] / pars["mu"]
+ ),
+ }
+ ]
+ ),
+ ]
+ )
+ except:
+ self.timecorr_df = pd.concat(
+ [
+ self.timecorr_df,
+ pd.DataFrame(
+ [
+ {
+ "run_timestamp": tstamp,
+ "mean": np.nan,
+ "mean_err": np.nan,
+ "res": np.nan,
+ "res_err": np.nan,
+ }
+ ]
+ ),
+ ]
+ )
+ self.timecorr_df.set_index("run_timestamp", inplace=True)
+ time_dict = fit_time_means(
+ np.array(self.timecorr_df.index),
+ np.array(self.timecorr_df["mean"]),
+ np.array(self.timecorr_df["res"]),
+ )
+
+ df[output_name] = df[lq_param] / np.array(
+ [time_dict[tstamp] for tstamp in df["run_timestamp"]]
+ )
+ self.update_cal_dicts(
+ {
+ tstamp: {
+ output_name: {
+ "expression": f"{lq_param}/a",
+ "parameters": {"a": t_dict},
+ }
+ }
+ for tstamp, t_dict in time_dict.items()
+ }
+ )
+ log.info("LQ time correction finished")
+ else:
+ try:
+ pars, errs, _, _ = binned_lq_fit(
+ df.query(f"{self.selection_string}"),
+ lq_param,
+ self.cal_energy_param,
+ peak=1592.5,
+ cdf=self.cdf,
+ sidebands=False,
+ )
+ self.timecorr_df = pd.concat(
+ [
+ self.timecorr_df,
+ pd.DataFrame(
+ [
+ {
+ "mean": pars["mu"],
+ "mean_err": errs["mu"],
+ "res": pars["sigma"] / pars["mu"],
+ "res_err": (pars["sigma"] / pars["mu"])
+ * np.sqrt(
+ errs["sigma"] / pars["sigma"]
+ + errs["mu"] / pars["mu"]
+ ),
+ }
+ ]
+ ),
+ ]
+ )
+ except:
+ self.timecorr_df = pd.concat(
+ [
+ self.timecorr_df,
+ pd.DataFrame(
+ [
+ {
+ "mean": np.nan,
+ "mean_err": np.nan,
+ "res": np.nan,
+ "res_err": np.nan,
+ }
+ ]
+ ),
+ ]
+ )
+ df[output_name] = df[lq_param] / pars["mu"]
+ self.update_cal_dicts(
+ {
+ output_name: {
+ "expression": f"{lq_param}/a",
+ "parameters": {"a": pars["mu"]},
+ }
+ }
+ )
+ log.info("LQ time correction finished")
+ except:
+ log.error("LQ time correction failed")
+ self.update_cal_dicts(
+ {
+ output_name: {
+ "expression": f"{lq_param}/a",
+ "parameters": {"a": np.nan},
+ }
+ }
+ )
+
+ def drift_time_correction(
+ self, df: pd.DataFrame(), lq_param, cal_energy_param: str, display: int = 0
+ ):
+ """
+ Deterimines the drift time correction parameters for LQ by fitting a degree 1 polynomial to
+ the LQ vs drift time distribution for DEP events. Corrects for any linear dependence and
+ centers the final LQ distribution to a mean of 0.
+ """
+
+ log.info("Starting LQ drift time correction")
+ try:
+ dt_dict = {}
+ pars = binned_lq_fit(df, lq_param, self.cal_energy_param, peak=1592.5)[0]
+ mean = pars[0]
+ sigma = pars[1]
+
+ lq_mask = (df[lq_param] < (2 * sigma + mean)) & (
+ df[lq_param] > (mean - 2 * sigma)
+ )
+ dep_mask = (df[cal_energy_param] < 1595) & (df[cal_energy_param] > 1590)
+
+ ids = np.isnan(df[lq_param]) | np.isnan(df["dt_eff"])
+ result = linregress(
+ df["dt_eff"][~ids & dep_mask & lq_mask],
+ df[lq_param][~ids & dep_mask & lq_mask],
+ alternative="greater",
+ )
+ self.dt_fit_pars = result
+
+ df["LQ_Classifier"] = (
+ df[lq_param] - df["dt_eff"] * self.dt_fit_pars[0] - self.dt_fit_pars[1]
+ )
+
+ except:
+ log.error("LQ drift time correction failed")
+ self.dt_fit_pars = (np.nan, np.nan)
+
+ self.update_cal_dicts(
+ {
+ "LQ_Classifier": {
+ "expression": f"{lq_param} - dt_eff*a - b",
+ "parameters": {"a": self.dt_fit_pars[0], "b": self.dt_fit_pars[1]},
+ }
+ }
+ )
+
+ def get_cut_lq_dep(self, df: pd.DataFrame(), lq_param: str, cal_energy_param: str):
+ """
+ Determines the cut value for LQ. Value is calculated by fitting the LQ distribution
+ for events in the DEP to a gaussian. The cut value is set at 3*sigma of the fit.
+ Sideband subtraction is used to determine the LQ distribution for DEP events.
+ Events greater than the cut value fail the cut.
+ """
+
+ log.info("Starting LQ Cut calculation")
+ try:
+ pars, errs, hist, bins = binned_lq_fit(
+ df, "LQ_Classifier", cal_energy_param, peak=1592.5
+ )
+ cut_val = 3 * pars[1]
+
+ self.cut_fit_pars = pars
+ self.cut_fit_errs = errs
+ self.fit_hist = (hist, bins)
+ self.cut_val = cut_val
+
+ df["LQ_Cut"] = df[lq_param] < self.cut_val
+
+ except:
+ log.error("LQ cut determination failed")
+ self.cut_val = np.nan
+
+ self.update_cal_dicts(
+ {
+ "LQ_Cut": {
+ "expression": f"({lq_param} < a)",
+ "parameters": {"a": self.cut_val},
+ }
+ }
+ )
+
+ def get_results_dict(self):
+ return {
+ "cal_energy_param": self.cal_energy_param,
+ "rt_correction": self.dt_fit_pars,
+ "cdf": self.cdf.__name__,
+ "1590-1596keV": self.timecorr_df.to_dict("index"),
+ "cut_value": self.cut_val,
+ "sfs": self.low_side_sf.to_dict("index"),
+ }
+
+ def fill_plot_dict(self, data, plot_dict={}):
+ for key, item in self.plot_options.items():
+ if item["options"] is not None:
+ plot_dict[key] = item["function"](self, data, **item["options"])
+ else:
+ plot_dict[key] = item["function"](self, data)
+ return plot_dict
+
+ def calibrate(self, df, initial_lq_param):
+ """Run the LQ calibration and calculate the cut value"""
+
+ self.lq_timecorr(df, lq_param="LQ_Ecorr")
+ log.info("Finished LQ Time Correction")
+
+ self.drift_time_correction(
+ df, lq_param="LQ_Timecorr", cal_energy_param=self.cal_energy_param
+ )
+ log.info("Finished LQ Drift Time Correction")
+
+ self.get_cut_lq_dep(
+ df, lq_param="LQ_Classifier", cal_energy_param=self.cal_energy_param
+ )
+ log.info("Finished Calculating the LQ Cut Value")
+
+ final_lq_param = "LQ_Classifier"
+ peaks_of_interest = [1592.5, 1620.5, 2039, 2103.53, 2614.50]
+ self.low_side_sf = pd.DataFrame(columns=["peak", "sf", "sf_err"])
+ fit_widths = [(40, 25), (25, 40), (0, 0), (25, 40), (50, 50)]
+ self.low_side_peak_dfs = {}
+
+ log.info("Calculating peak survival fractions")
+ for i, peak in enumerate(peaks_of_interest):
+ try:
+ select_df = df.query(f"{self.selection_string}")
+ fwhm = self.eres_func(peak)
+ if peak == 2039:
+ emin = 2 * fwhm
+ emax = 2 * fwhm
+ peak_df = select_df.query(
+ f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})"
+ )
+
+ cut_df, sf, sf_err = aoe.compton_sf_sweep(
+ peak_df[self.cal_energy_param].to_numpy(),
+ peak_df[final_lq_param].to_numpy(),
+ self.cut_val,
+ peak,
+ fwhm,
+ cut_range=(0, 0.6),
+ mode="less",
+ )
+ self.low_side_sf = pd.concat(
+ [
+ self.low_side_sf,
+ pd.DataFrame([{"peak": peak, "sf": sf, "sf_err": sf_err}]),
+ ]
+ )
+ self.low_side_peak_dfs[peak] = cut_df
+ else:
+ emin, emax = fit_widths[i]
+ peak_df = select_df.query(
+ f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})"
+ )
+ cut_df, sf, sf_err = aoe.get_sf_sweep(
+ peak_df[self.cal_energy_param].to_numpy(),
+ peak_df[final_lq_param].to_numpy(),
+ self.cut_val,
+ peak,
+ fwhm,
+ cut_range=(0, 0.6),
+ mode="less",
+ )
+ self.low_side_sf = pd.concat(
+ [
+ self.low_side_sf,
+ pd.DataFrame([{"peak": peak, "sf": sf, "sf_err": sf_err}]),
+ ]
+ )
+ self.low_side_peak_dfs[peak] = cut_df
+ log.info(f"{peak}keV: {sf:2.1f} +/- {sf_err:2.1f} %")
+ except:
+ self.low_side_sf = pd.concat(
+ [
+ self.low_side_sf,
+ pd.DataFrame([{"peak": peak, "sf": np.nan, "sf_err": np.nan}]),
+ ]
+ )
+ log.error(f"LQ Survival fraction determination failed for {peak} peak")
+ self.low_side_sf.set_index("peak", inplace=True)
+
+
+def plot_lq_mean_time(
+ lq_class, data, lq_param="LQ_Timecorr", figsize=[12, 8], fontsize=12
+) -> plt.figure:
+ """Plots the mean LQ value calculated for each given timestamp"""
+
+ plt.rcParams["figure.figsize"] = figsize
+ plt.rcParams["font.size"] = fontsize
+ fig, ax = plt.subplots(1, 1)
+ # try:
+ ax.errorbar(
+ [
+ datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ")
+ for tstamp in lq_class.timecorr_df.index
+ ],
+ lq_class.timecorr_df["mean"],
+ yerr=lq_class.timecorr_df["mean_err"],
+ linestyle=" ",
+ )
+
+ grouped_means = [
+ cal_dict["LQ_Timecorr"]["parameters"]["a"]
+ for tstamp, cal_dict in lq_class.cal_dicts.items()
+ ]
+ ax.step(
+ [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in lq_class.cal_dicts],
+ grouped_means,
+ where="post",
+ )
+ ax.fill_between(
+ [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in lq_class.cal_dicts],
+ y1=np.array(grouped_means) - 0.2 * np.array(lq_class.timecorr_df["res"]),
+ y2=np.array(grouped_means) + 0.2 * np.array(lq_class.timecorr_df["res"]),
+ color="green",
+ alpha=0.2,
+ )
+ ax.fill_between(
+ [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in lq_class.cal_dicts],
+ y1=np.array(grouped_means) - 0.4 * np.array(lq_class.timecorr_df["res"]),
+ y2=np.array(grouped_means) + 0.4 * np.array(lq_class.timecorr_df["res"]),
+ color="yellow",
+ alpha=0.2,
+ )
+ # except:
+ # pass
+ ax.set_xlabel("time")
+ ax.set_ylabel("LQ mean")
+ myFmt = mdates.DateFormatter("%b %d")
+ ax.xaxis.set_major_formatter(myFmt)
+ plt.close()
+ return fig
+
+
+def plot_drift_time_correction(
+ lq_class, data, lq_param="LQ_Timecorr", figsize=[12, 8], fontsize=12
+) -> plt.figure:
+ """Plots a 2D histogram of LQ versus effective drift time in a 6 keV
+ window around the DEP. Additionally plots the fit results for the
+ drift time correction."""
+
+ plt.rcParams["figure.figsize"] = figsize
+ plt.rcParams["font.size"] = fontsize
+ fig, ax = plt.subplots(1, 1)
+
+ try:
+ dep_range = (1590, 1595)
+
+ initial_df = data[
+ (data[lq_class.cal_energy_param] > dep_range[0])
+ & (data[lq_class.cal_energy_param] < dep_range[1])
+ ]
+ max_dt = 1500
+ max_lq = 2.5
+
+ plt.hist2d(
+ initial_df["dt_eff"],
+ initial_df[lq_param],
+ bins=100,
+ range=((0, max_dt), (0, max_lq)),
+ norm=mcolors.LogNorm(),
+ )
+
+ x = np.linspace(0, max_dt, 100)
+ model = lq_class.dt_fit_pars[0] * x + lq_class.dt_fit_pars[1]
+
+ plt.plot(x, model, color="r")
+
+ plt.xlabel("Drift Time (ns)")
+ plt.ylabel("LQ")
+
+ plt.title("LQ versus Drift Time for DEP")
+
+ except:
+ pass
+
+ plt.tight_layout()
+ plt.close()
+ return fig
+
+
+def plot_lq_cut_fit(lq_class, data, figsize=[12, 8], fontsize=12) -> plt.figure:
+ """Plots the final histogram of LQ values for events in the
+ DEP, and the fit results used for determining the cut
+ value"""
+
+ plt.rcParams["figure.figsize"] = figsize
+ plt.rcParams["font.size"] = fontsize
+ fig, (ax1, ax2) = plt.subplots(2, 1)
+
+ try:
+ hist, bins = lq_class.fit_hist
+ fit_pars = lq_class.cut_fit_pars
+
+ ax1.stairs(hist, bins, label="data")
+ xs = np.linspace(round(bins[0], 3), round(bins[-1], 3), len(bins) - 1)
+ ls = np.sum(hist)
+ dx = np.diff(bins)
+ ax1.plot(
+ xs,
+ pgf.gauss_pdf(xs, fit_pars[0], fit_pars[1], ls) * dx,
+ label="Gaussian Fit",
+ )
+
+ # ax1.set_xlabel('LQ')
+ ax1.set_title("Fit of LQ events in DEP")
+ ax1.legend()
+
+ bin_centers = (bins[:-1] + bins[1:]) / 2
+ reses = (
+ hist - (pgf.gauss_pdf(bin_centers, fit_pars[0], fit_pars[1], ls) * dx)
+ ) / (pgf.gauss_pdf(bin_centers, fit_pars[0], fit_pars[1], ls) * dx)
+ ax2.plot(bin_centers, reses, marker="s", linestyle="")
+ ax2.set_xlabel("LQ")
+ ax2.set_ylabel("residuals")
+
+ except:
+ pass
+
+ plt.tight_layout()
+ plt.close()
+ return fig
+
+
+def plot_survival_fraction_curves(
+ lq_class, data, figsize=[12, 8], fontsize=12
+) -> plt.figure:
+ """Plots the survival fraction curves as a function of
+ LQ cut values for every peak of interest"""
+
+ plt.rcParams["figure.figsize"] = figsize
+ plt.rcParams["font.size"] = fontsize
+
+ fig = plt.figure()
+ try:
+ plt.vlines(
+ lq_class.cut_val,
+ 0,
+ 100,
+ label=f"cut value: {lq_class.cut_val:1.2f}",
+ color="black",
+ )
+
+ for peak, survival_df in lq_class.low_side_peak_dfs.items():
+ try:
+ plt.errorbar(
+ survival_df.index,
+ survival_df["sf"],
+ yerr=survival_df["sf_err"],
+ label=f'{aoe.get_peak_label(peak)} {peak} keV: {lq_class.low_side_sf.loc[peak]["sf"]:2.1f} +/- {lq_class.low_side_sf.loc[peak]["sf_err"]:2.1f} %',
+ )
+ except:
+ pass
+ except:
+ pass
+ vals, labels = plt.yticks()
+ plt.yticks(vals, [f"{x:,.0f} %" for x in vals])
+ plt.legend(loc="lower right")
+ plt.xlabel("cut value")
+ plt.ylabel("survival percentage")
+ plt.ylim([0, 105])
+ plt.close()
+ return fig
+
+
+def plot_sf_vs_energy(
+ lq_class, data, xrange=(900, 3000), n_bins=701, figsize=[12, 8], fontsize=12
+) -> plt.figure:
+ """Plots the survival fraction as a function of energy"""
+
+ plt.rcParams["figure.figsize"] = figsize
+ plt.rcParams["font.size"] = fontsize
+
+ fig = plt.figure()
+ try:
+ bins = np.linspace(xrange[0], xrange[1], n_bins)
+ counts_pass, bins_pass, _ = pgh.get_hist(
+ data.query(f"{lq_class.selection_string}&LQ_Cut")[
+ lq_class.cal_energy_param
+ ],
+ bins=bins,
+ )
+ counts, bins, _ = pgh.get_hist(
+ data.query(lq_class.selection_string)[lq_class.cal_energy_param],
+ bins=bins,
+ )
+ survival_fracs = counts_pass / (counts + 10**-99)
+
+ plt.step(pgh.get_bin_centers(bins_pass), 100 * survival_fracs)
+ except:
+ pass
+ plt.ylim([0, 100])
+ vals, labels = plt.yticks()
+ plt.yticks(vals, [f"{x:,.0f} %" for x in vals])
+ plt.xlabel("energy (keV)")
+ plt.ylabel("survival percentage")
+ plt.close()
+ return fig
+
+
+def plot_spectra(
+ lq_class,
+ data,
+ xrange=(900, 3000),
+ n_bins=2101,
+ xrange_inset=(1580, 1640),
+ n_bins_inset=200,
+ figsize=[12, 8],
+ fontsize=12,
+) -> plt.figure:
+ """Plots a 2D histogram of the LQ classifier vs calibrated energy"""
+
+ plt.rcParams["figure.figsize"] = figsize
+ plt.rcParams["font.size"] = fontsize
+
+ fig, ax = plt.subplots()
+ try:
+ bins = np.linspace(xrange[0], xrange[1], n_bins)
+ ax.hist(
+ data.query(lq_class.selection_string)[lq_class.cal_energy_param],
+ bins=bins,
+ histtype="step",
+ label="before PSD",
+ )
+ # ax.hist(
+ # data.query(f"{lq_class.selection_string}&AoE_Double_Sided_Cut")[
+ # lq_class.cal_energy_param
+ # ],
+ # bins=bins,
+ # histtype="step",
+ # label="after double sided A/E cut",
+ # )
+ ax.hist(
+ data.query(f"{lq_class.selection_string}&LQ_Cut")[
+ lq_class.cal_energy_param
+ ],
+ bins=bins,
+ histtype="step",
+ label="after LQ cut",
+ )
+ ax.hist(
+ data.query(f"{lq_class.selection_string} & (~LQ_Cut)")[
+ lq_class.cal_energy_param
+ ],
+ bins=bins,
+ histtype="step",
+ label="rejected by LQ cut",
+ )
+
+ axins = ax.inset_axes([0.25, 0.07, 0.4, 0.3])
+ bins = np.linspace(xrange_inset[0], xrange_inset[1], n_bins_inset)
+ select_df = data.query(
+ f"{lq_class.cal_energy_param}<{xrange_inset[1]}&{lq_class.cal_energy_param}>{xrange_inset[0]}"
+ )
+ axins.hist(
+ select_df.query(lq_class.selection_string)[lq_class.cal_energy_param],
+ bins=bins,
+ histtype="step",
+ )
+ # axins.hist(
+ # select_df.query(f"{lq_class.selection_string}&AoE_Double_Sided_Cut")[
+ # lq_class.cal_energy_param
+ # ],
+ # bins=bins,
+ # histtype="step",
+ # )
+ axins.hist(
+ select_df.query(f"{lq_class.selection_string}&LQ_Cut")[
+ lq_class.cal_energy_param
+ ],
+ bins=bins,
+ histtype="step",
+ )
+ axins.hist(
+ select_df.query(f"{lq_class.selection_string} & (~LQ_Cut)")[
+ lq_class.cal_energy_param
+ ],
+ bins=bins,
+ histtype="step",
+ )
+ except:
+ pass
+ ax.set_xlim(xrange)
+ ax.set_yscale("log")
+ plt.xlabel("energy (keV)")
+ plt.ylabel("counts")
+ plt.legend(loc="upper left")
+ plt.close()
+ return fig
+
+
+def plot_classifier(
+ lq_class,
+ data,
+ lq_param="LQ_Classifier",
+ xrange=(800, 3000),
+ yrange=(-2, 8),
+ xn_bins=700,
+ yn_bins=500,
+ figsize=[12, 8],
+ fontsize=12,
+) -> plt.figure:
+ plt.rcParams["figure.figsize"] = figsize
+ plt.rcParams["font.size"] = fontsize
+
+ fig = plt.figure()
+ try:
+ plt.hist2d(
+ data.query(lq_class.selection_string)[lq_class.cal_energy_param],
+ data.query(lq_class.selection_string)[lq_param],
+ bins=[
+ np.linspace(xrange[0], xrange[1], xn_bins),
+ np.linspace(yrange[0], yrange[1], yn_bins),
+ ],
+ norm=LogNorm(),
+ )
+ except:
+ pass
+ plt.xlabel("energy (keV)")
+ plt.ylabel(lq_param)
+ plt.xlim(xrange)
+ plt.ylim(yrange)
+ plt.close()
+ return fig
diff --git a/src/pygama/pargen/mse_psd.py b/src/pygama/pargen/mse_psd.py
index 9b158f8b5..11a78f65b 100644
--- a/src/pygama/pargen/mse_psd.py
+++ b/src/pygama/pargen/mse_psd.py
@@ -8,8 +8,8 @@
import numpy as np
from matplotlib.colors import LogNorm
-from pygama.math.distributions import *
from pygama.math.histogram import get_bin_centers
+from pygama.math.peak_fitting import *
def get_avse_cut(e_cal, current, plotFigure=None):
@@ -59,17 +59,17 @@ def get_avse_cut(e_cal, current, plotFigure=None):
p0 = get_gaussian_guess(h, a_bins_cent)
fit_idxs = a_bins_cent > p0[0] - 5 * p0[1]
- p = fit_binned(nb_gauss, h[fit_idxs], a_bins_cent[fit_idxs], p0)
+ p = fit_binned(gauss, h[fit_idxs], a_bins_cent[fit_idxs], p0)
y_max[i] = p[0]
# plt.plot(a_bins_cent,h,ls="steps")
# plt.axvline(a_mode, c="r")
# plt.title("Energy: {} keV".format(e_cent[i]))
#
- # fit = nb_gauss(a_bins_cent[fit_idxs], *p)
+ # fit = gauss(a_bins_cent[fit_idxs], *p)
# plt.plot(a_bins_cent[fit_idxs], fit, c="g")
- # guess = nb_gauss(a_bins_cent[fit_idxs], *p0)
+ # guess = gauss(a_bins_cent[fit_idxs], *p0)
# plt.plot(a_bins_cent[fit_idxs], guess, c="r")
# inp = input("q to quit")
@@ -99,8 +99,8 @@ def get_avse_cut(e_cal, current, plotFigure=None):
h_bgs = h_dep - h_bg
# fit AvsE peak to gaussian to get the 90% cut
p0 = get_gaussian_guess(h_bgs, bin_centers)
- p = fit_binned(nb_gauss, h_bgs, bin_centers, p0)
- fit = nb_gauss(bin_centers, *p)
+ p = fit_binned(gauss, h_bgs, bin_centers, p0)
+ fit = gauss(bin_centers, *p)
ae_mean, ae_std = p[0], p[1]
ae_cut = p[0] - 1.28 * p[1] # cuts at 10% of CDF
@@ -250,8 +250,8 @@ def get_ae_cut(e_cal, current, plotFigure=None):
h_bgs = h_dep - h_bg
p0 = get_gaussian_guess(h_bgs, bin_centers)
- p = fit_binned(nb_gauss, h_bgs, bin_centers, p0)
- fit = nb_gauss(bin_centers, *p)
+ p = fit_binned(gauss, h_bgs, bin_centers, p0)
+ fit = gauss(bin_centers, *p)
ae_mean, ae_std = p[0], p[1]
ae_cut = p[0] - 1.28 * p[1] # cuts at 10% of CDF
diff --git a/src/pygama/pargen/noise_optimization.py b/src/pygama/pargen/noise_optimization.py
new file mode 100644
index 000000000..96c2ce65b
--- /dev/null
+++ b/src/pygama/pargen/noise_optimization.py
@@ -0,0 +1,389 @@
+"""
+This module contains the functions for performing the filter optimisation.
+This happens with a grid search performed on ENC peak.
+"""
+
+import inspect
+import json
+import logging
+import os
+import pathlib
+import pickle as pkl
+import sys
+import time
+from collections import namedtuple
+
+import lgdo
+import matplotlib as mpl
+
+mpl.use("agg")
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import scipy.stats
+from iminuit import Minuit, cost, util
+from matplotlib.backends.backend_pdf import PdfPages
+from matplotlib.colors import LogNorm
+from scipy.interpolate import splev, splrep
+from scipy.optimize import minimize
+
+import pygama.math.peak_fitting as pgf
+from pygama.math.histogram import get_hist
+from pygama.pargen.cuts import generate_cuts, get_cut_indexes
+from pygama.pargen.dsp_optimize import run_one_dsp
+from pygama.pargen.energy_optimisation import index_data
+
+log = logging.getLogger(__name__)
+
+
+def noise_optimization(
+ tb_data: lgdo.Table,
+ dsp_proc_chain: dict,
+ par_dsp: dict,
+ opt_dict: dict,
+ lh5_path: str,
+ verbose: bool = False,
+ display: int = 0,
+) -> dict:
+ """
+ This function calculates the optimal filter par.
+ Parameters
+ ----------
+ tb_data : str
+ raw table to run the macro on
+ dsp_proc_chain: str
+ Path to minimal dsp config file
+ par_dsp: str
+ Dictionary with default dsp parameters
+ opt_dict: str
+ Dictionary with parameters for optimization
+ lh5_path: str
+ Name of channel to process, should be name of lh5 group in raw files
+ Returns
+ -------
+ res_dict : dict
+ """
+
+ t0 = time.time()
+
+ samples = np.arange(opt_dict["start"], opt_dict["stop"], opt_dict["step"])
+ samples_val = np.arange(opt_dict["start"], opt_dict["stop"], opt_dict["step_val"])
+
+ opt_dict_par = opt_dict["optimization"]
+
+ res_dict = {}
+ if display > 0:
+ dsp_data = run_one_dsp(tb_data, dsp_proc_chain, db_dict=par_dsp)
+ psd = np.mean(dsp_data["wf_psd"].values.nda, axis=0)
+ sample_us = float(dsp_data["wf_presum"].dt.nda[0]) / 1000
+ freq = np.linspace(0, (1 / sample_us) / 2, len(psd))
+ fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white")
+ ax.plot(freq, psd)
+ ax.set_xscale("log")
+ ax.set_yscale("log")
+ ax.set_xlabel("frequency (MHz)")
+ ax.set_ylabel(f"power spectral density")
+
+ plot_dict = {}
+ plot_dict["nopt"] = {"fft": {"frequency": freq, "psd": psd, "fig": fig}}
+ plt.close()
+
+ result_dict = {}
+ ene_pars = [par for par in opt_dict_par.keys()]
+ log.info(f"\nRunning optimization for {ene_pars}")
+ if verbose:
+ print(f"\nRunning optimization for {ene_pars}")
+ for i, x in enumerate(samples):
+ x = f"{x:.1f}"
+ log.info(f"\nCase {i}, par = {x} us")
+ if verbose:
+ print(f"\nCase {i}, par = {x} us")
+ for ene_par in ene_pars:
+ dict_str = opt_dict_par[ene_par]["dict_str"]
+ filter_par = opt_dict_par[ene_par]["filter_par"]
+ if dict_str in par_dsp:
+ par_dsp[dict_str].update({filter_par: f"{x}*us"})
+ else:
+ par_dsp[dict_str] = {filter_par: f"{x}*us"}
+
+ t1 = time.time()
+ dsp_data = run_one_dsp(tb_data, dsp_proc_chain, db_dict=par_dsp)
+ log.info(f"Time to process dsp data {time.time()-t1:.2f} s")
+ if verbose:
+ print(f"Time to process dsp data {time.time()-t1:.2f} s")
+
+ for ene_par in ene_pars:
+ dict_str = opt_dict_par[ene_par]["dict_str"]
+ ene_str = opt_dict_par[ene_par]["ene_str"]
+ if dict_str not in result_dict:
+ result_dict[dict_str] = {}
+ par_dict_res = result_dict[dict_str]
+
+ energies = dsp_data[ene_str].nda
+
+ if opt_dict["perform_fit"]:
+ fom_results = simple_gaussian_fit(energies, dx=opt_dict["dx"])
+ else:
+ fom_results = calculate_spread(
+ energies,
+ opt_dict["percentile_low"],
+ opt_dict["percentile_high"],
+ opt_dict["n_bootstrap_samples"],
+ )
+
+ par_dict_res[x] = {}
+ par_dict_res[x]["energies"] = energies
+ par_dict_res[x]["fom"] = fom_results["fom"]
+ par_dict_res[x]["fom_err"] = fom_results["fom_err"]
+
+ for ene_par in ene_pars:
+ log.info(f"\nOptimization for {ene_par}")
+ if verbose:
+ print(f"\nOptimization for {ene_par}")
+ dict_str = opt_dict_par[ene_par]["dict_str"]
+ par_dict_res = result_dict[dict_str]
+ sample_list = np.array([float(x) for x in result_dict[dict_str].keys()])
+ fom_list = np.array(
+ [result_dict[dict_str][x]["fom"] for x in result_dict[dict_str].keys()]
+ )
+ fom_err_list = np.array(
+ [result_dict[dict_str][x]["fom_err"] for x in result_dict[dict_str].keys()]
+ )
+
+ guess_par = sample_list[np.nanargmin(fom_list)]
+ if verbose:
+ print(f"guess par: {guess_par:.2f} us")
+
+ tck = splrep(sample_list, fom_list, k=opt_dict["fit_deg"])
+
+ def spl_func(x_val):
+ return splev(x_val, tck)
+
+ result = minimize(spl_func, guess_par)
+ best_par = result.x[0]
+ if (best_par < np.min(sample_list)) or (best_par > np.max(sample_list)):
+ log.info(
+ f"Par from minimization not accepted {best_par:.2f}, setting par to guess"
+ )
+ if verbose:
+ print(
+ f"Par from minimization not accepted {best_par:.2f}, setting par to guess"
+ )
+ best_par = guess_par
+
+ best_val = spl_func(best_par)
+
+ b_best_pars = np.zeros(opt_dict["n_bootstrap_samples"])
+ for i in range(opt_dict["n_bootstrap_samples"]):
+ indices = np.random.choice(len(sample_list), len(sample_list), replace=True)
+ b_sample_list = sample_list[indices]
+ b_fom_list = fom_list[indices]
+ b_best_pars[i] = b_sample_list[np.nanargmin(b_fom_list)]
+ best_par_err = np.std(b_best_pars)
+ log.info(f"best par: {best_par:.2f} ± {best_par_err:.2f} us")
+ if verbose:
+ print(f"best par: {best_par:.2f} ± {best_par_err:.2f} us")
+
+ par_dict_res["best_par"] = best_par
+ par_dict_res["best_par_err"] = best_par_err
+ par_dict_res["best_val"] = best_val
+
+ filter_par = opt_dict_par[ene_par]["filter_par"]
+ res_dict[dict_str] = {
+ filter_par: f"{best_par:.2f}*us",
+ f"{filter_par}_err": f"{best_par_err:.2f}*us",
+ }
+
+ if display > 0:
+ plot_range = opt_dict["plot_range"]
+ fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white")
+ for i, x in enumerate(sample_list):
+ x = f"{x:.1f}"
+ energies = par_dict_res[x]["energies"]
+ par_dict_res[x].pop("energies")
+ hist, bins, var = get_hist(
+ energies, range=plot_range, dx=opt_dict["dx"]
+ )
+ bc = (bins[:-1] + bins[1:]) / 2.0
+ string_res = (
+ f"par = {x} us, FOM = {fom_list[i]:.3f} ± {fom_err_list[i]:.3f} ADC"
+ )
+ ax.plot(bc, hist, ds="steps", label=string_res)
+ log.info(string_res)
+ if verbose:
+ print(string_res)
+ ax.set_xlabel("energy (ADC)")
+ ax.set_ylabel("counts")
+ ax.legend(loc="upper right")
+ par_dict_res["distribution"] = fig
+ if display > 1:
+ plt.show()
+ else:
+ plt.close()
+
+ fig, ax = plt.subplots(figsize=(12, 6.75), facecolor="white")
+ ax.errorbar(
+ sample_list,
+ fom_list,
+ yerr=fom_err_list,
+ color="b",
+ fmt="x",
+ ms=4,
+ ls="",
+ capsize=4,
+ label="samples",
+ )
+ ax.plot(samples_val, spl_func(samples_val), "k:", label="fit")
+ ax.errorbar(
+ best_par,
+ best_val,
+ xerr=best_par_err,
+ color="r",
+ fmt="o",
+ ms=6,
+ ls="",
+ capsize=4,
+ label=rf"best par: {best_par:.2f} ± {best_par_err:.2f} $\mu$s",
+ )
+ ax.set_xlabel(rf"{ene_par} parameter ($\mu$s)")
+ ax.set_ylabel("FOM (ADC)")
+ ax.legend()
+ if display > 1:
+ plt.show()
+ else:
+ plt.close()
+ par_dict_res["optimization"] = fig
+ plot_dict["nopt"][dict_str] = par_dict_res
+
+ log.info(f"Time to complete the optimization {time.time()-t0:.2f} s")
+ if verbose:
+ print(f"Time to complete the optimization {time.time()-t0:.2f} s")
+ if display > 0:
+ return res_dict, plot_dict
+ else:
+ return res_dict
+
+
+def calculate_spread(energies, percentile_low, percentile_high, n_samples):
+ spreads = np.zeros(n_samples)
+ for i in range(n_samples):
+ resampled = np.random.choice(energies, size=len(energies), replace=True)
+ spread = np.percentile(resampled, percentile_high) - np.percentile(
+ resampled, percentile_low
+ )
+ spreads[i] = spread
+
+ mean_spread = np.mean(spreads)
+ std_spread = np.std(spreads, ddof=1) / np.sqrt(n_samples)
+
+ results = {}
+ results["fom"] = mean_spread
+ results["fom_err"] = std_spread
+ return results
+
+
+def simple_gaussian_fit(energies, dx=1, sigma_thr=4, allowed_p_val=1e-20):
+ fit_range = [np.percentile(energies, 0.2), np.percentile(energies, 99.8)]
+
+ hist, bins, var = get_hist(energies, range=fit_range, dx=dx)
+ guess, bounds = simple_gaussian_guess(hist, bins, pgf.extended_gauss_pdf)
+ fit_range = [guess[0] - sigma_thr * guess[1], guess[0] + sigma_thr * guess[1]]
+
+ energies_fit = energies[(energies > fit_range[0]) & (energies < fit_range[1])]
+ pars, errs, cov = pgf.fit_unbinned(
+ pgf.extended_gauss_pdf,
+ energies_fit,
+ guess=guess,
+ bounds=bounds,
+ )
+
+ mu, mu_err = pars[0], errs[0]
+ fwhm = pars[1] * 2 * np.sqrt(2 * np.log(2))
+ fwhm_err = errs[1] * 2 * np.sqrt(2 * np.log(2))
+
+ hist, bins, var = get_hist(energies_fit, range=fit_range, dx=dx)
+ gof_pars = pars
+ gof_pars[2] *= dx
+ chisq, dof = pgf.goodness_of_fit(
+ hist, bins, None, pgf.gauss_pdf, gof_pars, method="Pearson"
+ )
+ p_val = scipy.stats.chi2.sf(chisq, dof + len(gof_pars))
+
+ if (
+ sum(sum(c) if c is not None else 0 for c in cov[:3, :][:, :3]) == np.inf
+ or sum(sum(c) if c is not None else 0 for c in cov[:3, :][:, :3]) == 0
+ or np.isnan(sum(sum(c) if c is not None else 0 for c in cov[:3, :][:, :3]))
+ ):
+ log.debug("fit failed, cov estimation failed")
+ fit_failed = True
+ elif (np.abs(np.array(errs)[:3] / np.array(pars)[:3]) < 1e-7).any() or np.isnan(
+ np.array(errs)[:3]
+ ).any():
+ log.debug("fit failed, parameter error too low")
+ fit_failed = True
+ elif p_val < allowed_p_val or np.isnan(p_val):
+ log.debug("fit failed, parameter error too low")
+ fit_failed = True
+ else:
+ fit_failed = False
+
+ if fit_failed:
+ log.debug(f"Returning values from guess")
+ mu = guess[0]
+ mu_err = 0
+ fwhm = guess[1] * 2 * np.sqrt(2 * np.log(2))
+ fwhm_err = 0
+
+ results = {
+ "pars": pars[:3],
+ "errors": errs[:3],
+ "covariance": cov[:3],
+ "mu": mu,
+ "mu_err": mu_err,
+ "fom": fwhm,
+ "fom_err": fwhm_err,
+ "chisq": chisq / dof,
+ "p_val": p_val,
+ }
+ return results
+
+
+def simple_gaussian_guess(hist, bins, func, toll=0.2):
+ max_idx = np.argmax(hist)
+ mu = bins[max_idx]
+ max_amp = np.max(hist)
+
+ idx = np.where(hist > max_amp / 2)
+ ilo, ihi = idx[0][0], idx[0][-1]
+
+ sigma = (bins[ihi] - bins[ilo]) / 2.355
+
+ if sigma == 0:
+ log.debug("error in sigma evaluation, using 2*(bin width) as sigma")
+ sigma = 2 * (bins[1] - bins[0])
+
+ dx = np.diff(bins)[0]
+ n_bins_range = int((4 * sigma) // dx)
+
+ min_idx = max_idx - n_bins_range
+ max_idx = max_idx + n_bins_range
+ min_idx = max(0, min_idx)
+ max_idx = min(len(hist), max_idx)
+
+ n_sig = np.sum(hist[min_idx:max_idx])
+
+ guess = [mu, sigma, n_sig]
+ bounds = [
+ (mu - sigma, mu + sigma),
+ (sigma - sigma * toll, sigma + sigma * toll),
+ (n_sig + n_sig * toll, n_sig + n_sig * toll),
+ ]
+
+ for i, par in enumerate(inspect.getfullargspec(func)[0][1:]):
+ if par == "lower_range" or par == "upper_range":
+ guess.append(np.inf)
+ bounds.append(None)
+ elif par == "n_bkg" or par == "hstep" or par == "components":
+ guess.append(0)
+ bounds.append(None)
+ return guess, bounds
diff --git a/src/pygama/pargen/utils.py b/src/pygama/pargen/utils.py
index e6c9f3c75..a1ec229ab 100644
--- a/src/pygama/pargen/utils.py
+++ b/src/pygama/pargen/utils.py
@@ -3,12 +3,13 @@
import logging
from types import FunctionType
-import lgdo.lh5_store as lh5
import numpy as np
import pandas as pd
from iminuit import Minuit, cost, util
+from lgdo import Table, lh5
log = logging.getLogger(__name__)
+sto = lh5.LH5Store()
def return_nans(input):
@@ -50,35 +51,64 @@ def load_data(
Loads in the A/E parameters needed and applies calibration constants to energy
"""
- sto = lh5.LH5Store()
+ out_df = pd.DataFrame(columns=params)
if isinstance(files, dict):
+ keys = lh5.ls(
+ files[list(files)[0]][0],
+ lh5_path if lh5_path[-1] == "/" else lh5_path + "/",
+ )
+ keys = [key.split("/")[-1] for key in keys]
+ if list(files)[0] in cal_dict:
+ params = get_params(keys + list(cal_dict[list(files)[0]].keys()), params)
+ else:
+ params = get_params(keys + list(cal_dict.keys()), params)
+
df = []
all_files = []
masks = np.array([], dtype=bool)
for tstamp, tfiles in files.items():
- table = sto.read_object(lh5_path, tfiles)[0]
+ table = sto.read(lh5_path, tfiles)[0]
+
+ file_df = pd.DataFrame(columns=params)
if tstamp in cal_dict:
- file_df = table.eval(cal_dict[tstamp]).get_dataframe()
+ cal_dict_ts = cal_dict[tstamp]
else:
- file_df = table.eval(cal_dict).get_dataframe()
+ cal_dict_ts = cal_dict
+
+ for outname, info in cal_dict_ts.items():
+ outcol = table.eval(info["expression"], info.get("parameters", None))
+ table.add_column(outname, outcol)
+
+ for param in params:
+ file_df[param] = table[param]
+
file_df["run_timestamp"] = np.full(len(file_df), tstamp, dtype=object)
- params.append("run_timestamp")
- if threshold is not None:
- mask = file_df[cal_energy_param] < threshold
- file_df.drop(np.where(mask)[0], inplace=True)
+ if threshold is not None:
+ mask = file_df[cal_energy_param] > threshold
+ file_df.drop(np.where(~mask)[0], inplace=True)
else:
- mask = np.zeros(len(file_df), dtype=bool)
- masks = np.append(masks, ~mask)
+ mask = np.ones(len(file_df), dtype=bool)
+ masks = np.append(masks, mask)
df.append(file_df)
all_files += tfiles
+ params.append("run_timestamp")
df = pd.concat(df)
elif isinstance(files, list):
- table = sto.read_object(lh5_path, files)[0]
- df = table.eval(cal_dict).get_dataframe()
+ keys = lh5.ls(files[0], lh5_path if lh5_path[-1] == "/" else lh5_path + "/")
+ keys = [key.split("/")[-1] for key in keys]
+ params = get_params(keys + list(cal_dict.keys()), params)
+
+ table = sto.read(lh5_path, files)[0]
+ df = pd.DataFrame(columns=params)
+ for outname, info in cal_dict.items():
+ outcol = table.eval(info["expression"], info.get("parameters", None))
+ table.add_column(outname, outcol)
+ for param in params:
+ df[param] = table[param]
if threshold is not None:
masks = df[cal_energy_param] > threshold
df.drop(np.where(~masks)[0], inplace=True)
@@ -86,20 +116,10 @@ def load_data(
masks = np.ones(len(df), dtype=bool)
all_files = files
- if lh5_path[-1] != "/":
- lh5_path += "/"
- keys = lh5.ls(all_files[0], lh5_path)
- keys = [key.split("/")[-1] for key in keys]
- params = get_params(keys + list(df.keys()), params)
-
for col in list(df.keys()):
if col not in params:
df.drop(col, inplace=True, axis=1)
- param_dict = {}
- for param in params:
- if param not in df:
- df[param] = lh5.load_nda(all_files, [param], lh5_path)[param][masks]
log.debug(f"data loaded")
if return_selection_mask:
return df, masks
@@ -122,14 +142,23 @@ def get_tcm_pulser_ids(tcm_file, channel, multiplicity_threshold):
mask = np.append(mask, file_mask)
ids = np.where(mask)[0]
else:
- data = lh5.load_dfs(tcm_file, ["array_id", "array_idx"], "hardware_tcm_1")
- cum_length = lh5.load_nda(tcm_file, ["cumulative_length"], "hardware_tcm_1")[
- "cumulative_length"
- ]
- cum_length = np.append(np.array([0]), cum_length)
- n_channels = np.diff(cum_length)
- evt_numbers = np.repeat(np.arange(0, len(cum_length) - 1), np.diff(cum_length))
- evt_mult = np.repeat(np.diff(cum_length), np.diff(cum_length))
+ data = pd.DataFrame(
+ {
+ "array_id": sto.read("hardware_tcm_1/array_id", tcm_file)[0].view_as(
+ "np"
+ ),
+ "array_idx": sto.read("hardware_tcm_1/array_idx", tcm_file)[0].view_as(
+ "np"
+ ),
+ }
+ )
+ cumulength = sto.read("hardware_tcm_1/cumulative_length", tcm_file)[0].view_as(
+ "np"
+ )
+ cumulength = np.append(np.array([0]), cumulength)
+ n_channels = np.diff(cumulength)
+ evt_numbers = np.repeat(np.arange(0, len(cumulength) - 1), np.diff(cumulength))
+ evt_mult = np.repeat(np.diff(cumulength), np.diff(cumulength))
data["evt_number"] = evt_numbers
data["evt_mult"] = evt_mult
high_mult_events = np.where(n_channels > multiplicity_threshold)[0]
diff --git a/src/pygama/skm/__init__.py b/src/pygama/skm/__init__.py
new file mode 100644
index 000000000..7b9ae88d2
--- /dev/null
+++ b/src/pygama/skm/__init__.py
@@ -0,0 +1,7 @@
+"""
+Utilities for grouping hit data into events.
+"""
+
+from .build_skm import build_skm
+
+__all__ = ["build_skm"]
diff --git a/src/pygama/skm/build_skm.py b/src/pygama/skm/build_skm.py
new file mode 100644
index 000000000..83c601c3a
--- /dev/null
+++ b/src/pygama/skm/build_skm.py
@@ -0,0 +1,245 @@
+"""
+This module implements routines to build the `skm` tier, consisting of skimmed
+data from lower tiers.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+
+import awkward as ak
+import numpy as np
+from lgdo import Array, Table, lh5
+from lgdo.lh5 import LH5Store
+
+from pygama.evt import utils
+
+log = logging.getLogger(__name__)
+
+
+def build_skm(
+ f_evt: str,
+ f_hit: str,
+ f_dsp: str,
+ f_tcm: str,
+ skm_conf: dict | str,
+ f_skm: str | None = None,
+ wo_mode: str = "w",
+ skm_group: str = "skm",
+ evt_group: str = "evt",
+ tcm_group: str = "hardware_tcm_1",
+ dsp_group: str = "dsp",
+ hit_group: str = "hit",
+ tcm_id_table_pattern: str = "ch{}",
+) -> None | Table:
+ """Builds a skimmed file from a (set) of `evt/hit/dsp` tier file(s).
+
+ Parameters
+ ----------
+ f_evt
+ path of `evt` file.
+ f_hit
+ path of `hit` file.
+ f_dsp
+ path of `dsp` file.
+ f_tcm
+ path of `tcm` file.
+ skm_conf
+ name of configuration file or dictionary defining `skm` fields.
+
+ - ``multiplicity`` defines up to which row length
+ :class:`.VectorOfVector` fields should be kept.
+ - ``postfixes`` list of postfixes must be list of
+ ``len(multiplicity)``. If not given, numbers from 0 to
+ ``multiplicity -1`` are used
+ - ``operations`` are forwarded from lower tiers and clipped/padded
+ according to ``missing_value`` if needed. If the forwarded field
+ is not an evt tier, ``tcm_idx`` must be passed that specifies the
+ value to pick across channels.
+
+ For example:
+
+ .. code-block:: json
+
+ {
+ "multiplicity": 2,
+ "postfixes":["", "aux"],
+ "operations": {
+ "timestamp":{
+ "forward_field": "evt.timestamp"
+ },
+ "multiplicity":{
+ "forward_field": "evt.multiplicity"
+ },
+ "energy":{
+ "forward_field": "hit.cuspEmax_ctc_cal",
+ "missing_value": "np.nan",
+ "tcm_idx": "evt.energy_idx"
+ },
+ "energy_id":{
+ "forward_field": "tcm.array_id",
+ "missing_value": 0,
+ "tcm_idx": "evt.energy_idx"
+ }
+ }
+ }
+ f_skm
+ name of the `skm` output file. If ``None``, return the output
+ class:`.Table` instead of writing to disk.
+
+ wo_mode
+ writing mode.
+
+ - ``write_safe`` or ``w``: only proceed with writing if the file does
+ not already exists.
+ - ``append`` or ``a``: append to file.
+ - ``overwrite`` or ``o``: replaces existing file.
+
+ skm_group
+ `skm` LH5 root group name.
+ evt_group
+ `evt` LH5 root group name.
+ hit_group
+ `hit` LH5 root group name.
+ dsp_group
+ `dsp` LH5 root group name.
+ tcm_group
+ `tcm` LH5 root group name.
+ tcm_id_table_pattern
+ pattern to format `tcm` id values to table name in higher tiers. Must have one
+ placeholder which is the `tcm` id.
+ """
+ f_dict = {evt_group: f_evt, hit_group: f_hit, dsp_group: f_dsp, tcm_group: f_tcm}
+ log = logging.getLogger(__name__)
+ log.debug(f"I am skimming {len(f_evt) if isinstance(f_evt,list) else 1} files")
+
+ tbl_cfg = skm_conf
+ if not isinstance(tbl_cfg, (str, dict)):
+ raise TypeError()
+ if isinstance(tbl_cfg, str):
+ with open(tbl_cfg) as f:
+ tbl_cfg = json.load(f)
+
+ # Check if multiplicity is given
+ if "multiplicity" not in tbl_cfg.keys():
+ raise ValueError("multiplicity field missing")
+
+ multi = int(tbl_cfg["multiplicity"])
+ store = LH5Store()
+ # df = pd.DataFrame()
+ table = Table()
+ if "operations" in tbl_cfg.keys():
+ for op in tbl_cfg["operations"].keys():
+ miss_val = np.nan
+ if "missing_value" in tbl_cfg["operations"][op].keys():
+ miss_val = tbl_cfg["operations"][op]["missing_value"]
+ if isinstance(miss_val, str) and (
+ miss_val in ["np.nan", "np.inf", "-np.inf"]
+ ):
+ miss_val = eval(miss_val)
+
+ fw_fld = tbl_cfg["operations"][op]["forward_field"]
+
+ # load object if from evt tier
+ if evt_group in fw_fld.replace(".", "/"):
+ obj = store.read(
+ f"/{fw_fld.replace('.','/')}", f_dict[fw_fld.split(".", 1)[0]]
+ )[0].view_as("ak")
+
+ # else collect data from lower tier via tcm_idx
+ else:
+ if "tcm_idx" not in tbl_cfg["operations"][op].keys():
+ raise ValueError(
+ f"{op} is an sub evt level operation. tcm_idx field must be specified"
+ )
+ tcm_idx_fld = tbl_cfg["operations"][op]["tcm_idx"]
+ tcm_idx = store.read(
+ f"/{tcm_idx_fld.replace('.','/')}",
+ f_dict[tcm_idx_fld.split(".")[0]],
+ )[0].view_as("ak")[:, :multi]
+
+ obj = ak.Array([[] for x in range(len(tcm_idx))])
+
+ # load TCM data to define an event
+ ids = store.read(f"/{tcm_group}/array_id", f_tcm)[0].view_as("ak")
+ ids = ak.unflatten(ids[ak.flatten(tcm_idx)], ak.count(tcm_idx, axis=-1))
+
+ idx = store.read(f"/{tcm_group}/array_idx", f_tcm)[0].view_as("ak")
+ idx = ak.unflatten(idx[ak.flatten(tcm_idx)], ak.count(tcm_idx, axis=-1))
+
+ if "tcm.array_id" == tbl_cfg["operations"][op]["forward_field"]:
+ obj = ids
+ elif "tcm.array_idx" == tbl_cfg["operations"][op]["forward_field"]:
+ obj = idx
+
+ else:
+ chns = np.unique(
+ ak.to_numpy(ak.flatten(ids), allow_missing=False)
+ ).astype(int)
+
+ # Get the data
+ for ch in chns:
+ ch_idx = idx[ids == ch]
+ ct_idx = ak.count(ch_idx, axis=-1)
+ fl_idx = ak.to_numpy(ak.flatten(ch_idx), allow_missing=False)
+
+ if (
+ f"{utils.get_table_name_by_pattern(tcm_id_table_pattern,ch)}/{fw_fld.replace('.','/')}"
+ not in lh5.ls(
+ f_dict[[key for key in f_dict if key in fw_fld][0]],
+ f"ch{ch}/{fw_fld.rsplit('.',1)[0]}/",
+ )
+ ):
+ och = Array(nda=np.full(len(fl_idx), miss_val))
+ else:
+ och, _ = store.read(
+ f"{utils.get_table_name_by_pattern(tcm_id_table_pattern,ch)}/{fw_fld.replace('.','/')}",
+ f_dict[[key for key in f_dict if key in fw_fld][0]],
+ idx=fl_idx,
+ )
+ if not isinstance(och, Array):
+ raise ValueError(
+ f"{type(och)} not supported. Forward only Array fields"
+ )
+ och = och.view_as("ak")
+ och = ak.unflatten(och, ct_idx)
+ obj = ak.concatenate((obj, och), axis=-1)
+
+ # Pad, clip and numpyfy
+ if obj.ndim > 1:
+ obj = ak.pad_none(obj, multi, clip=True)
+ obj = ak.to_numpy(ak.fill_none(obj, miss_val))
+
+ if obj.ndim > 1:
+ if "postfixes" in tbl_cfg.keys():
+ nms = [f"{op}{x}" for x in tbl_cfg["postfixes"]]
+ else:
+ nms = [f"{op}_{x}" for x in range(multi)]
+
+ for i in range(len(nms)):
+ # add attribute if present
+ ob = Array(nda=obj[:, i])
+ if "lgdo_attrs" in tbl_cfg["operations"][op].keys():
+ ob.attrs |= tbl_cfg["operations"][op]["lgdo_attrs"]
+ table.add_field(nms[i], ob, True)
+ else:
+ obj = Array(nda=obj)
+ if "lgdo_attrs" in tbl_cfg["operations"][op].keys():
+ obj.attrs |= tbl_cfg["operations"][op]["lgdo_attrs"]
+ table.add_field(op, obj, True)
+
+ if not f_skm:
+ return table
+
+ # last thing missing is writing it out
+ if wo_mode not in ["w", "write_safe", "o", "overwrite", "a", "append"]:
+ raise ValueError(f"wo_mode {wo_mode} not valid.")
+
+ log.debug("saving skm file")
+ if (wo_mode in ["w", "write_safe"]) and os.path.exists(f_skm):
+ raise FileExistsError(f"Write_safe mode: {f_skm} exists.")
+
+ wo = wo_mode if wo_mode not in ["o", "overwrite"] else "of"
+ store.write(obj=table, name=f"/{skm_group}/", lh5_file=f_skm, wo_mode=wo)
diff --git a/tests/configs/icpc-dsp-config.json b/tests/configs/icpc-dsp-config.json
index 94c97dc57..28af29239 100644
--- a/tests/configs/icpc-dsp-config.json
+++ b/tests/configs/icpc-dsp-config.json
@@ -73,11 +73,25 @@
"args": ["wf_pz[1500:]", "pz_mean", "pz_std", "pz_slope", "pz_intercept"],
"unit": ["ADC", "ADC", "ADC", "ADC"]
},
- "wf_t0_filter": {
+ "t0_kernel": {
"function": "t0_filter",
"module": "dspeed.processors",
- "args": ["wf_pz", "wf_t0_filter(len(wf_pz), 'f', grid=wf_pz.grid)"],
- "init_args": ["128*ns/wf_pz.period", "2*us/wf_pz.period"],
+ "args": [
+ "128*ns/wf_pz.period",
+ "2*us/wf_pz.period",
+ "t0_kernel(round((128*ns+2*us)/wf_pz.period), 'f')"
+ ],
+ "unit": "ADC"
+ },
+ "wf_t0_filter": {
+ "function": "convolve_wf",
+ "module": "dspeed.processors",
+ "args": [
+ "wf_pz",
+ "t0_kernel",
+ "'s'",
+ "wf_t0_filter(len(wf_pz), 'f', grid=wf_pz.grid)"
+ ],
"unit": "ADC"
},
"wf_atrap": {
@@ -137,7 +151,7 @@
"module": "dspeed.processors",
"args": [
"wf_etrap",
- "tp_0_est+db.etrap.rise+db.etrap.flat*db.etrap.sample",
+ "round(tp_0_est+db.etrap.rise+db.etrap.flat*db.etrap.sample, wf_etrap.grid)",
"'l'",
"trapEftp"
],
@@ -148,23 +162,33 @@
"db.etrap.sample": "0.8"
}
},
- "wf_cusp": {
+ "cusp_kernel": {
"function": "cusp_filter",
"module": "dspeed.processors",
- "args": ["wf_blsub", "wf_cusp(101, 'f')"],
- "init_args": [
- "len(wf_blsub)-100",
+ "args": [
"db.cusp.sigma/wf_blsub.period",
"round(db.cusp.flat/wf_blsub.period)",
- "db.pz.tau"
+ "db.pz.tau/wf_blsub.period",
+ "cusp_kernel(round(len(wf_blsub)-(33.6*us/wf_blsub.period)-(4.8*us/wf_blsub.period)), 'f')"
],
"defaults": {
"db.cusp.sigma": "20*us",
"db.cusp.flat": "3*us",
- "db.pz.tau": "27460.5"
+ "db.pz.tau": "450*us"
},
"unit": "ADC"
},
+ "wf_cusp": {
+ "function": "fft_convolve_wf",
+ "module": "dspeed.processors",
+ "args": [
+ "wf_blsub[:round(len(wf_blsub)-(33.6*us/wf_blsub.period))]",
+ "cusp_kernel",
+ "'v'",
+ "wf_cusp(round((4.8*us/wf_blsub.period)+1), 'f')"
+ ],
+ "unit": "ADC"
+ },
"cuspEmax": {
"function": "amax",
"module": "numpy",
@@ -179,23 +203,33 @@
"unit": "ADC",
"defaults": { "db.cusp.sample": "50" }
},
- "wf_zac": {
+ "zac_kernel": {
"function": "zac_filter",
"module": "dspeed.processors",
- "args": ["wf_blsub", "wf_zac(101, 'f')"],
- "init_args": [
- "len(wf_blsub)-100",
+ "args": [
"db.zac.sigma/wf_blsub.period",
"round(db.zac.flat/wf_blsub.period)",
- "db.pz.tau"
+ "db.pz.tau/wf_blsub.period",
+ "zac_kernel(round(len(wf_blsub)-(33.6*us/wf_blsub.period)-(4.8*us/wf_blsub.period)), 'f')"
],
"defaults": {
"db.zac.sigma": "20*us",
"db.zac.flat": "3*us",
- "db.pz.tau": "27460.5"
+ "db.pz.tau": "450*us"
},
"unit": "ADC"
},
+ "wf_zac": {
+ "function": "fft_convolve_wf",
+ "module": "dspeed.processors",
+ "args": [
+ "wf_blsub[:round(len(wf_blsub)-(33.6*us/wf_blsub.period))]",
+ "zac_kernel",
+ "'v'",
+ "wf_zac(round((4.8*us/wf_blsub.period)+1), 'f')"
+ ],
+ "unit": "ADC"
+ },
"zacEmax": {
"function": "amax",
"module": "numpy",
diff --git a/tests/configs/sipm-dplms-config.json b/tests/configs/sipm-dplms-config.json
index 8040681fc..dd69bac0f 100644
--- a/tests/configs/sipm-dplms-config.json
+++ b/tests/configs/sipm-dplms-config.json
@@ -10,26 +10,26 @@
"processors": {
"wf_gaus": {
"function": "gaussian_filter1d",
- "module": "dspeed.processors",
+ "module": "dspeed.processors.gaussian_filter1d",
"args": ["waveform", "wf_gaus(len(waveform))"],
"init_args": ["1", "4.0"],
"unit": "ADC"
},
"curr": {
"function": "avg_current",
- "module": "dspeed.processors",
+ "module": "dspeed.processors.moving_windows",
"args": ["wf_gaus", 5, "curr(len(wf_gaus)-5)"],
"unit": "ADC"
},
"hist_weights , hist_borders": {
"function": "histogram",
- "module": "dspeed.processors",
+ "module": "dspeed.processors.histogram",
"args": ["curr", "hist_weights(100)", "hist_borders(101)"],
"unit": ["none", "ADC"]
},
"fwhm, idx_out_c, max_out": {
"function": "histogram_stats",
- "module": "dspeed.processors",
+ "module": "dspeed.processors.histogram",
"args": [
"hist_weights",
"hist_borders",
@@ -42,7 +42,7 @@
},
"vt_max_candidate_out, vt_min_out, n_max_out, n_min_out": {
"function": "get_multi_local_extrema",
- "module": "dspeed.processors",
+ "module": "dspeed.processors.get_multi_local_extrema",
"args": [
"curr",
5,
@@ -59,7 +59,7 @@
},
"trigger_pos, no_out": {
"function": "peak_snr_threshold",
- "module": "dspeed.processors",
+ "module": "dspeed.processors.peak_snr_threshold",
"args": [
"curr",
"vt_max_candidate_out",
@@ -72,7 +72,7 @@
},
"energies": {
"function": "multi_a_filter",
- "module": "dspeed.processors",
+ "module": "dspeed.processors.multi_a_filter",
"args": ["curr", "trigger_pos", "energies"],
"unit": ["ADC"]
},
@@ -94,30 +94,41 @@
"args": ["waveform", 1, "wf_diff(len(waveform)-1)"],
"unit": "ADC"
},
- "wf_dplms": {
+ "dplms_kernel": {
"function": "dplms_filter",
"module": "dspeed.processors",
- "args": ["wf_diff", "wf_dplms(len(wf_diff)-49, 'f')"],
- "unit": "ADC",
- "init_args": [
+ "args": [
"db.dplms.noise_matrix",
"db.dplms.reference",
- "50",
"0.01",
"1",
"0",
- "0"
- ]
+ "0",
+ "dplms_kernel(50, 'f')"
+ ],
+ "unit": "ADC"
+ },
+ "wf_dplms": {
+ "description": "convolve optimised cusp filter",
+ "function": "convolve_wf",
+ "module": "dspeed.processors",
+ "args": [
+ "wf_diff",
+ "dplms_kernel",
+ "'s'",
+ "wf_dplms(len(wf_diff)-49, 'f')"
+ ],
+ "unit": "ADC"
},
"h_weights , h_borders": {
"function": "histogram",
- "module": "dspeed.processors",
+ "module": "dspeed.processors.histogram",
"args": ["wf_dplms", "h_weights(100)", "h_borders(101)"],
"unit": ["none", "ADC"]
},
"fwhm_d, idx_out_d, max_out_d": {
"function": "histogram_stats",
- "module": "dspeed.processors",
+ "module": "dspeed.processors.histogram",
"args": [
"h_weights",
"h_borders",
@@ -130,7 +141,7 @@
},
"vt_max_candidate_out_d, vt_min_out_d, n_max_out_d, n_min_out_d": {
"function": "get_multi_local_extrema",
- "module": "dspeed.processors",
+ "module": "dspeed.processors.get_multi_local_extrema",
"args": [
"wf_dplms",
10,
@@ -145,7 +156,7 @@
},
"trigger_pos_dplms, no_out_d": {
"function": "peak_snr_threshold",
- "module": "dspeed.processors",
+ "module": "dspeed.processors.peak_snr_threshold",
"args": [
"wf_dplms",
"vt_max_candidate_out_d",
@@ -158,7 +169,7 @@
},
"energies_dplms": {
"function": "multi_a_filter",
- "module": "dspeed.processors",
+ "module": "dspeed.processors.multi_a_filter",
"args": ["wf_dplms", "trigger_pos_dplms", "energies_dplms"],
"unit": ["ADC"]
}
diff --git a/tests/configs/sipm-dsp-config.json b/tests/configs/sipm-dsp-config.json
index 5d9f3e6ca..bb7878a5d 100644
--- a/tests/configs/sipm-dsp-config.json
+++ b/tests/configs/sipm-dsp-config.json
@@ -3,26 +3,26 @@
"processors": {
"wf_gaus": {
"function": "gaussian_filter1d",
- "module": "dspeed.processors",
+ "module": "dspeed.processors.gaussian_filter1d",
"args": ["waveform", "wf_gaus(len(waveform))"],
"init_args": ["1", "4.0"],
"unit": "ADC"
},
"curr": {
"function": "avg_current",
- "module": "dspeed.processors",
+ "module": "dspeed.processors.moving_windows",
"args": ["wf_gaus", 5, "curr(len(wf_gaus)-5)"],
"unit": "ADC"
},
"hist_weights , hist_borders": {
"function": "histogram",
- "module": "dspeed.processors",
+ "module": "dspeed.processors.histogram",
"args": ["curr", "hist_weights(100)", "hist_borders(101)"],
"unit": ["none", "ADC"]
},
"fwhm, idx_out_c, max_out": {
"function": "histogram_stats",
- "module": "dspeed.processors",
+ "module": "dspeed.processors.histogram",
"args": [
"hist_weights",
"hist_borders",
@@ -35,7 +35,7 @@
},
"vt_max_candidate_out, vt_min_out, n_max_out, n_min_out": {
"function": "get_multi_local_extrema",
- "module": "dspeed.processors",
+ "module": "dspeed.processors.get_multi_local_extrema",
"args": [
"curr",
5,
@@ -52,7 +52,7 @@
},
"trigger_pos, no_out": {
"function": "peak_snr_threshold",
- "module": "dspeed.processors",
+ "module": "dspeed.processors.peak_snr_threshold",
"args": [
"curr",
"vt_max_candidate_out",
@@ -65,7 +65,7 @@
},
"energies": {
"function": "multi_a_filter",
- "module": "dspeed.processors",
+ "module": "dspeed.processors.multi_a_filter",
"args": ["curr", "trigger_pos", "energies"],
"unit": ["ADC"]
}
diff --git a/tests/evt/configs/basic-evt-config.json b/tests/evt/configs/basic-evt-config.json
new file mode 100644
index 000000000..3a8c62753
--- /dev/null
+++ b/tests/evt/configs/basic-evt-config.json
@@ -0,0 +1,90 @@
+{
+ "channels": {
+ "geds_on": ["ch1084803", "ch1084804", "ch1121600"]
+ },
+ "outputs": [
+ "multiplicity",
+ "energy",
+ "energy_id",
+ "energy_idx",
+ "energy_any_above1MeV",
+ "energy_all_above1MeV",
+ "energy_aux",
+ "energy_sum",
+ "is_usable_aoe",
+ "aoe",
+ "is_aoe_rejected"
+ ],
+ "operations": {
+ "multiplicity": {
+ "channels": "geds_on",
+ "aggregation_mode": "sum",
+ "expression": "hit.cuspEmax_ctc_cal > a",
+ "parameters": { "a": 25 },
+ "initial": 0,
+ "lgdo_attrs": { "statement": "0bb decay is real" }
+ },
+ "energy": {
+ "channels": "geds_on",
+ "aggregation_mode": "first_at:dsp.tp_0_est",
+ "query": "hit.cuspEmax_ctc_cal>25",
+ "expression": "hit.cuspEmax_ctc_cal",
+ "initial": "np.nan"
+ },
+ "energy_id": {
+ "channels": "geds_on",
+ "aggregation_mode": "first_at:dsp.tp_0_est",
+ "query": "hit.cuspEmax_ctc_cal>25",
+ "expression": "tcm.array_id",
+ "initial": 0
+ },
+ "energy_idx": {
+ "channels": "geds_on",
+ "aggregation_mode": "first_at:dsp.tp_0_est",
+ "query": "hit.cuspEmax_ctc_cal>25",
+ "expression": "tcm.index",
+ "initial": 999999999999
+ },
+ "energy_any_above1MeV": {
+ "channels": "geds_on",
+ "aggregation_mode": "any",
+ "expression": "hit.cuspEmax_ctc_cal>1000",
+ "initial": false
+ },
+ "energy_all_above1MeV": {
+ "channels": "geds_on",
+ "aggregation_mode": "all",
+ "expression": "hit.cuspEmax_ctc_cal>1000",
+ "initial": false
+ },
+ "energy_aux": {
+ "channels": "geds_on",
+ "aggregation_mode": "last_at:dsp.tp_0_est",
+ "query": "hit.cuspEmax_ctc_cal>25",
+ "expression": "hit.cuspEmax_ctc_cal",
+ "initial": "np.nan"
+ },
+ "energy_sum": {
+ "channels": "geds_on",
+ "aggregation_mode": "sum",
+ "query": "hit.cuspEmax_ctc_cal>25",
+ "expression": "hit.cuspEmax_ctc_cal",
+ "initial": 0.0
+ },
+ "is_usable_aoe": {
+ "aggregation_mode": "keep_at_ch:evt.energy_id",
+ "expression": "True",
+ "initial": false
+ },
+ "aoe": {
+ "aggregation_mode": "keep_at_ch:evt.energy_id",
+ "expression": "hit.AoE_Classifier",
+ "initial": "np.nan"
+ },
+ "is_aoe_rejected": {
+ "aggregation_mode": "keep_at_ch:evt.energy_id",
+ "expression": "~(hit.AoE_Double_Sided_Cut)",
+ "initial": false
+ }
+ }
+}
diff --git a/tests/evt/configs/module-test-evt-config.json b/tests/evt/configs/module-test-evt-config.json
new file mode 100644
index 000000000..0daa94658
--- /dev/null
+++ b/tests/evt/configs/module-test-evt-config.json
@@ -0,0 +1,72 @@
+{
+ "channels": {
+ "spms_on": ["ch1057600", "ch1059201", "ch1062405"],
+ "geds_on": ["ch1084803", "ch1084804", "ch1121600"]
+ },
+ "outputs": [
+ "energy_first",
+ "energy_first_id",
+ "t0",
+ "lar_energy",
+ "lar_multiplicity",
+ "is_lar_rejected",
+ "lar_classifier",
+ "lar_energy_dplms",
+ "lar_multiplicity_dplms",
+ "lar_time_shift"
+ ],
+ "operations": {
+ "energy_first": {
+ "channels": "geds_on",
+ "aggregation_mode": "first_at:dsp.tp_0_est",
+ "query": "hit.cuspEmax_ctc_cal>25",
+ "expression": "hit.cuspEmax_ctc_cal",
+ "initial": "np.nan"
+ },
+ "energy_first_id": {
+ "channels": "geds_on",
+ "aggregation_mode": "first_at:dsp.tp_0_est",
+ "query": "hit.cuspEmax_ctc_cal>25",
+ "expression": "tcm.array_id",
+ "initial": 0
+ },
+ "t0": {
+ "aggregation_mode": "keep_at_ch:evt.energy_first_id",
+ "expression": "dsp.tp_0_est",
+ "initial": 0.0
+ },
+ "lar_energy": {
+ "channels": "spms_on",
+ "aggregation_mode": "function",
+ "expression": "pygama.evt.modules.spm.get_energy(0.5,evt.t0,48000,1000,5000)"
+ },
+ "lar_multiplicity": {
+ "channels": "spms_on",
+ "aggregation_mode": "function",
+ "expression": ".modules.spm.get_majority(0.5,evt.t0,48000,1000,5000)"
+ },
+ "is_lar_rejected": {
+ "expression": "(evt.lar_energy >4) | (evt.lar_multiplicity > 4) "
+ },
+ "lar_classifier": {
+ "channels": "spms_on",
+ "aggregation_mode": "function",
+ "expression": ".modules.spm.get_etc(0.5,evt.t0,48000,100,6000,80,1,0,50)"
+ },
+ "lar_energy_dplms": {
+ "channels": "spms_on",
+ "aggregation_mode": "function",
+ "expression": ".modules.spm.get_energy_dplms(0.5,evt.t0,48000,1000,5000)"
+ },
+ "lar_multiplicity_dplms": {
+ "channels": "spms_on",
+ "aggregation_mode": "function",
+ "expression": ".modules.spm.get_majority_dplms(0.5,evt.t0,48000,1000,5000)"
+ },
+ "lar_time_shift": {
+ "channels": "spms_on",
+ "aggregation_mode": "function",
+ "expression": ".modules.spm.get_time_shift(0.5,evt.t0,48000,1000,5000)"
+ }
+ }
+}
diff --git a/tests/evt/configs/module-test-t0-vov-evt-config.json b/tests/evt/configs/module-test-t0-vov-evt-config.json
new file mode 100644
index 000000000..cda042337
--- /dev/null
+++ b/tests/evt/configs/module-test-t0-vov-evt-config.json
@@ -0,0 +1,82 @@
+{
+ "channels": {
+ "spms_on": ["ch1057600", "ch1059201", "ch1062405"],
+ "geds_on": ["ch1084803", "ch1084804", "ch1121600"]
+ },
+ "outputs": [
+ "energy",
+ "energy_id",
+ "t0",
+ "lar_energy",
+ "lar_multiplicity",
+ "is_lar_rejected",
+ "lar_classifier",
+ "lar_energy_dplms",
+ "lar_multiplicity_dplms",
+ "lar_time_shift",
+ "lar_tcm_index",
+ "lar_pulse_index"
+ ],
+ "operations": {
+ "energy": {
+ "channels": "geds_on",
+ "aggregation_mode": "gather",
+ "query": "hit.cuspEmax_ctc_cal>25",
+ "expression": "hit.cuspEmax_ctc_cal"
+ },
+ "energy_id": {
+ "channels": "geds_on",
+ "aggregation_mode": "gather",
+ "query": "hit.cuspEmax_ctc_cal>25",
+ "expression": "tcm.array_id"
+ },
+ "t0": {
+ "aggregation_mode": "keep_at_ch:evt.energy_id",
+ "expression": "dsp.tp_0_est",
+ "initial": 0.0
+ },
+ "lar_energy": {
+ "channels": "spms_on",
+ "aggregation_mode": "function",
+ "expression": ".modules.spm.get_energy(0.5,evt.t0,48000,1000,5000)"
+ },
+ "lar_multiplicity": {
+ "channels": "spms_on",
+ "aggregation_mode": "function",
+ "expression": ".modules.spm.get_majority(0.5,evt.t0,48000,1000,5000)"
+ },
+ "is_lar_rejected": {
+ "expression": "(evt.lar_energy >4) | (evt.lar_multiplicity > 4) "
+ },
+ "lar_classifier": {
+ "channels": "spms_on",
+ "aggregation_mode": "function",
+ "expression": ".modules.spm.get_etc(0.5,evt.t0,48000,100,6000,80,1,0,50)"
+ },
+ "lar_energy_dplms": {
+ "channels": "spms_on",
+ "aggregation_mode": "function",
+ "expression": ".modules.spm.get_energy_dplms(0.5,evt.t0,48000,1000,5000)"
+ },
+ "lar_multiplicity_dplms": {
+ "channels": "spms_on",
+ "aggregation_mode": "function",
+ "expression": ".modules.spm.get_majority_dplms(0.5,evt.t0,48000,1000,5000)"
+ },
+ "lar_time_shift": {
+ "channels": "spms_on",
+ "aggregation_mode": "function",
+ "expression": ".modules.spm.get_time_shift(0.5,evt.t0,48000,1000,5000)"
+ },
+ "lar_tcm_index": {
+ "channels": "spms_on",
+ "aggregation_mode": "function",
+ "expression": ".modules.spm.get_masked_tcm_idx(0.5,evt.t0,48000,1000,5000,1)"
+ },
+ "lar_pulse_index": {
+ "channels": "spms_on",
+ "aggregation_mode": "function",
+ "expression": ".modules.spm.get_masked_tcm_idx(0.5,evt.t0,48000,1000,5000,0)"
+ }
+ }
+}
diff --git a/tests/evt/configs/query-test-evt-config.json b/tests/evt/configs/query-test-evt-config.json
new file mode 100644
index 000000000..901d2d6c1
--- /dev/null
+++ b/tests/evt/configs/query-test-evt-config.json
@@ -0,0 +1,102 @@
+{
+ "channels": {
+ "geds_on": ["ch1084803", "ch1084804", "ch1121600"]
+ },
+ "outputs": [
+ "multiplicity",
+ "test_sum",
+ "test_first",
+ "test_first2",
+ "test_last",
+ "test_last2",
+ "test_any",
+ "test_any2",
+ "test_all",
+ "test_all2",
+ "test_vov",
+ "test_vov2"
+ ],
+ "operations": {
+ "multiplicity": {
+ "channels": "geds_on",
+ "aggregation_mode": "sum",
+ "expression": "hit.cuspEmax_ctc_cal > a",
+ "parameters": { "a": 25 },
+ "initial": 0
+ },
+ "test_sum": {
+ "channels": "geds_on",
+ "aggregation_mode": "sum",
+ "query": "evt.multiplicity == 1",
+ "expression": "True",
+ "initial": false
+ },
+ "test_first": {
+ "channels": "geds_on",
+ "aggregation_mode": "first_at:dsp.tp_0_est",
+ "query": "evt.multiplicity == 1",
+ "expression": "True",
+ "initial": false
+ },
+ "test_first2": {
+ "channels": "geds_on",
+ "aggregation_mode": "first_at:dsp.tp_0_est",
+ "expression": "True",
+ "initial": false
+ },
+ "test_last": {
+ "channels": "geds_on",
+ "aggregation_mode": "last_at:dsp.tp_0_est",
+ "query": "evt.multiplicity == 1",
+ "expression": "True",
+ "initial": false
+ },
+ "test_last2": {
+ "channels": "geds_on",
+ "aggregation_mode": "last_at:dsp.tp_0_est",
+ "expression": "True",
+ "initial": false
+ },
+ "test_any": {
+ "channels": "geds_on",
+ "aggregation_mode": "any",
+ "query": "evt.multiplicity == 1",
+ "expression": "True",
+ "initial": false
+ },
+ "test_any2": {
+ "channels": "geds_on",
+ "aggregation_mode": "any",
+ "query": "hit.cuspEmax_ctc_cal >25",
+ "expression": "True",
+ "initial": false
+ },
+ "test_all": {
+ "channels": "geds_on",
+ "aggregation_mode": "all",
+ "query": "evt.multiplicity == 1",
+ "expression": "True",
+ "initial": false
+ },
+ "test_all2": {
+ "channels": "geds_on",
+ "aggregation_mode": "all",
+ "query": "hit.cuspEmax_ctc_cal >25",
+ "expression": "True",
+ "initial": false
+ },
+ "test_vov": {
+ "channels": "geds_on",
+ "aggregation_mode": "gather",
+ "query": "evt.multiplicity == 1",
+ "expression": "True",
+ "initial": false
+ },
+ "test_vov2": {
+ "channels": "geds_on",
+ "aggregation_mode": "gather",
+ "expression": "True",
+ "initial": false
+ }
+ }
+}
diff --git a/tests/evt/configs/vov-test-evt-config.json b/tests/evt/configs/vov-test-evt-config.json
new file mode 100644
index 000000000..31334101e
--- /dev/null
+++ b/tests/evt/configs/vov-test-evt-config.json
@@ -0,0 +1,85 @@
+{
+ "channels": {
+ "geds_on": ["ch1084803", "ch1084804", "ch1121600"],
+ "ts_master": "ch1084803"
+ },
+ "outputs": [
+ "timestamp",
+ "energy",
+ "energy_sum",
+ "energy_id",
+ "energy_idx",
+ "aoe",
+ "aoe_idx",
+ "multiplicity",
+ "is_saturated",
+ "energy_times_aoe",
+ "energy_times_multiplicity",
+ "multiplicity_squared"
+ ],
+ "operations": {
+ "timestamp": {
+ "channels": "ts_master",
+ "aggregation_mode": "sum",
+ "expression": "dsp.timestamp",
+ "initial": 0.0
+ },
+ "energy": {
+ "channels": "geds_on",
+ "aggregation_mode": "gather",
+ "query": "hit.cuspEmax_ctc_cal>25",
+ "expression": "hit.cuspEmax_ctc_cal"
+ },
+ "energy_sum": {
+ "channels": "geds_on",
+ "aggregation_mode": "sum",
+ "query": "hit.cuspEmax_ctc_cal>25",
+ "expression": "hit.cuspEmax_ctc_cal",
+ "initial": 0.0
+ },
+ "energy_idx": {
+ "channels": "geds_on",
+ "aggregation_mode": "gather",
+ "query": "hit.cuspEmax_ctc_cal>25",
+ "expression": "tcm.index",
+ "sort": "ascend_by:dsp.tp_0_est",
+ "initial": 0
+ },
+ "energy_id": {
+ "channels": "geds_on",
+ "aggregation_mode": "gather",
+ "query": "hit.cuspEmax_ctc_cal>25",
+ "expression": "tcm.array_id",
+ "sort": "ascend_by:dsp.tp_0_est",
+ "initial": 0
+ },
+ "aoe": {
+ "aggregation_mode": "keep_at_ch:evt.energy_id",
+ "expression": "hit.AoE_Classifier"
+ },
+ "aoe_idx": {
+ "aggregation_mode": "keep_at_idx:evt.energy_idx",
+ "expression": "hit.AoE_Classifier"
+ },
+ "multiplicity": {
+ "channels": "geds_on",
+ "aggregation_mode": "sum",
+ "expression": "hit.cuspEmax_ctc_cal > a",
+ "parameters": { "a": 25 },
+ "initial": 0
+ },
+ "is_saturated": {
+ "aggregation_mode": "keep_at_ch:evt.energy_id",
+ "expression": "hit.is_saturated"
+ },
+ "energy_times_aoe": {
+ "expression": "evt.energy*evt.aoe"
+ },
+ "energy_times_multiplicity": {
+ "expression": "evt.energy*evt.multiplicity"
+ },
+ "multiplicity_squared": {
+ "expression": "evt.multiplicity*evt.multiplicity"
+ }
+ }
+}
diff --git a/tests/evt/test_build_evt.py b/tests/evt/test_build_evt.py
new file mode 100644
index 000000000..80a40d9a8
--- /dev/null
+++ b/tests/evt/test_build_evt.py
@@ -0,0 +1,315 @@
+import os
+from pathlib import Path
+
+import awkward as ak
+import numpy as np
+import pytest
+from lgdo import Array, VectorOfVectors, lh5
+from lgdo.lh5 import LH5Store
+
+from pygama.evt import build_evt
+
+config_dir = Path(__file__).parent / "configs"
+store = LH5Store()
+
+
+def test_basics(lgnd_test_data, tmptestdir):
+ outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5"
+ tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5"
+ if os.path.exists(outfile):
+ os.remove(outfile)
+
+ build_evt(
+ f_tcm=lgnd_test_data.get_path(tcm_path),
+ f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")),
+ f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")),
+ evt_config=f"{config_dir}/basic-evt-config.json",
+ f_evt=outfile,
+ wo_mode="o",
+ evt_group="evt",
+ hit_group="hit",
+ dsp_group="dsp",
+ tcm_group="hardware_tcm_1",
+ )
+
+ assert "statement" in store.read("/evt/multiplicity", outfile)[0].getattrs().keys()
+ assert (
+ store.read("/evt/multiplicity", outfile)[0].getattrs()["statement"]
+ == "0bb decay is real"
+ )
+ assert os.path.exists(outfile)
+ assert len(lh5.ls(outfile, "/evt/")) == 11
+ nda = {
+ e: store.read(f"/evt/{e}", outfile)[0].view_as("np")
+ for e in ["energy", "energy_aux", "energy_sum", "multiplicity"]
+ }
+ assert (
+ nda["energy"][nda["multiplicity"] == 1]
+ == nda["energy_aux"][nda["multiplicity"] == 1]
+ ).all()
+ assert (
+ nda["energy"][nda["multiplicity"] == 1]
+ == nda["energy_sum"][nda["multiplicity"] == 1]
+ ).all()
+ assert (
+ nda["energy_aux"][nda["multiplicity"] == 1]
+ == nda["energy_sum"][nda["multiplicity"] == 1]
+ ).all()
+
+ eid = store.read("/evt/energy_id", outfile)[0].view_as("np")
+ eidx = store.read("/evt/energy_idx", outfile)[0].view_as("np")
+ eidx = eidx[eidx != 999999999999]
+
+ ids = store.read("hardware_tcm_1/array_id", lgnd_test_data.get_path(tcm_path))[
+ 0
+ ].view_as("np")
+ ids = ids[eidx]
+ assert ak.all(ids == eid[eid != 0])
+
+
+def test_lar_module(lgnd_test_data, tmptestdir):
+ outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5"
+ tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5"
+ if os.path.exists(outfile):
+ os.remove(outfile)
+ build_evt(
+ f_tcm=lgnd_test_data.get_path(tcm_path),
+ f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")),
+ f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")),
+ evt_config=f"{config_dir}/module-test-evt-config.json",
+ f_evt=outfile,
+ wo_mode="o",
+ evt_group="evt",
+ hit_group="hit",
+ dsp_group="dsp",
+ tcm_group="hardware_tcm_1",
+ )
+
+ assert os.path.exists(outfile)
+ assert len(lh5.ls(outfile, "/evt/")) == 10
+ nda = {
+ e: store.read(f"/evt/{e}", outfile)[0].view_as("np")
+ for e in ["lar_multiplicity", "lar_multiplicity_dplms", "t0", "lar_time_shift"]
+ }
+ assert np.max(nda["lar_multiplicity"]) <= 3
+ assert np.max(nda["lar_multiplicity_dplms"]) <= 3
+ assert ((nda["lar_time_shift"] + nda["t0"]) >= 0).all()
+
+
+def test_lar_t0_vov_module(lgnd_test_data, tmptestdir):
+ outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5"
+ tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5"
+ if os.path.exists(outfile):
+ os.remove(outfile)
+ build_evt(
+ f_tcm=lgnd_test_data.get_path(tcm_path),
+ f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")),
+ f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")),
+ evt_config=f"{config_dir}/module-test-t0-vov-evt-config.json",
+ f_evt=outfile,
+ wo_mode="o",
+ evt_group="evt",
+ hit_group="hit",
+ dsp_group="dsp",
+ tcm_group="hardware_tcm_1",
+ )
+
+ assert os.path.exists(outfile)
+ assert len(lh5.ls(outfile, "/evt/")) == 12
+ nda = {
+ e: store.read(f"/evt/{e}", outfile)[0].view_as("np")
+ for e in ["lar_multiplicity", "lar_multiplicity_dplms", "lar_time_shift"]
+ }
+ assert np.max(nda["lar_multiplicity"]) <= 3
+ assert np.max(nda["lar_multiplicity_dplms"]) <= 3
+
+ ch_idx = store.read("/evt/lar_tcm_index", outfile)[0].view_as("ak")
+ pls_idx = store.read("/evt/lar_pulse_index", outfile)[0].view_as("ak")
+ assert ak.count(ch_idx) == ak.count(pls_idx)
+ assert ak.all(ak.count(ch_idx, axis=-1) == ak.count(pls_idx, axis=-1))
+
+
+def test_vov(lgnd_test_data, tmptestdir):
+ outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5"
+ tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5"
+ if os.path.exists(outfile):
+ os.remove(outfile)
+ build_evt(
+ f_tcm=lgnd_test_data.get_path(tcm_path),
+ f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")),
+ f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")),
+ evt_config=f"{config_dir}/vov-test-evt-config.json",
+ f_evt=outfile,
+ wo_mode="o",
+ evt_group="evt",
+ hit_group="hit",
+ dsp_group="dsp",
+ tcm_group="hardware_tcm_1",
+ )
+
+ assert os.path.exists(outfile)
+ assert len(lh5.ls(outfile, "/evt/")) == 12
+ vov_ene, _ = store.read("/evt/energy", outfile)
+ vov_aoe, _ = store.read("/evt/aoe", outfile)
+ arr_ac, _ = store.read("/evt/multiplicity", outfile)
+ vov_aoeene, _ = store.read("/evt/energy_times_aoe", outfile)
+ vov_eneac, _ = store.read("/evt/energy_times_multiplicity", outfile)
+ arr_ac2, _ = store.read("/evt/multiplicity_squared", outfile)
+ assert isinstance(vov_ene, VectorOfVectors)
+ assert isinstance(vov_aoe, VectorOfVectors)
+ assert isinstance(arr_ac, Array)
+ assert isinstance(vov_aoeene, VectorOfVectors)
+ assert isinstance(vov_eneac, VectorOfVectors)
+ assert isinstance(arr_ac2, Array)
+ assert (np.diff(vov_ene.cumulative_length.nda, prepend=[0]) == arr_ac.nda).all()
+
+ vov_eid = store.read("/evt/energy_id", outfile)[0].view_as("ak")
+ vov_eidx = store.read("/evt/energy_idx", outfile)[0].view_as("ak")
+ vov_aoe_idx = store.read("/evt/aoe_idx", outfile)[0].view_as("ak")
+
+ ids = store.read("hardware_tcm_1/array_id", lgnd_test_data.get_path(tcm_path))[
+ 0
+ ].view_as("ak")
+ ids = ak.unflatten(ids[ak.flatten(vov_eidx)], ak.count(vov_eidx, axis=-1))
+ assert ak.all(ids == vov_eid)
+
+ arr_ene = store.read("/evt/energy_sum", outfile)[0].view_as("ak")
+ assert ak.all(arr_ene == ak.nansum(vov_ene.view_as("ak"), axis=-1))
+ assert ak.all(vov_aoe.view_as("ak") == vov_aoe_idx)
+
+
+def test_graceful_crashing(lgnd_test_data, tmptestdir):
+ outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5"
+ tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5"
+ if os.path.exists(outfile):
+ os.remove(outfile)
+ f_tcm = lgnd_test_data.get_path(tcm_path)
+ f_dsp = lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp"))
+ f_hit = lgnd_test_data.get_path(tcm_path.replace("tcm", "hit"))
+ f_config = f"{config_dir}/basic-evt-config.json"
+
+ with pytest.raises(KeyError):
+ build_evt(f_dsp, f_tcm, f_hit, f_config, outfile)
+
+ with pytest.raises(KeyError):
+ build_evt(f_tcm, f_hit, f_dsp, f_config, outfile)
+
+ with pytest.raises(TypeError):
+ build_evt(f_tcm, f_dsp, f_hit, None, outfile)
+
+ conf = {"operations": {}}
+ with pytest.raises(ValueError):
+ build_evt(f_tcm, f_dsp, f_hit, conf, outfile)
+
+ conf = {"channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]}}
+ with pytest.raises(ValueError):
+ build_evt(f_tcm, f_dsp, f_hit, conf, outfile)
+
+ conf = {
+ "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]},
+ "outputs": ["foo"],
+ "operations": {
+ "foo": {
+ "channels": "geds_on",
+ "aggregation_mode": "banana",
+ "expression": "hit.cuspEmax_ctc_cal > a",
+ "parameters": {"a": 25},
+ "initial": 0,
+ }
+ },
+ }
+ with pytest.raises(ValueError):
+ build_evt(f_tcm, f_dsp, f_hit, conf, outfile)
+
+
+def test_query(lgnd_test_data, tmptestdir):
+ outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5"
+ tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5"
+ if os.path.exists(outfile):
+ os.remove(outfile)
+ build_evt(
+ f_tcm=lgnd_test_data.get_path(tcm_path),
+ f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")),
+ f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")),
+ evt_config=f"{config_dir}/query-test-evt-config.json",
+ f_evt=outfile,
+ wo_mode="o",
+ evt_group="evt",
+ hit_group="hit",
+ dsp_group="dsp",
+ tcm_group="hardware_tcm_1",
+ )
+ assert len(lh5.ls(outfile, "/evt/")) == 12
+
+
+def test_vector_sort(lgnd_test_data, tmptestdir):
+ outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5"
+ tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5"
+ if os.path.exists(outfile):
+ os.remove(outfile)
+ f_tcm = lgnd_test_data.get_path(tcm_path)
+ f_dsp = lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp"))
+ f_hit = lgnd_test_data.get_path(tcm_path.replace("tcm", "hit"))
+
+ conf = {
+ "channels": {"geds_on": ["ch1084803", "ch1084804", "ch1121600"]},
+ "outputs": ["acend_id", "t0_acend", "decend_id", "t0_decend"],
+ "operations": {
+ "acend_id": {
+ "channels": "geds_on",
+ "aggregation_mode": "gather",
+ "query": "hit.cuspEmax_ctc_cal>25",
+ "expression": "tcm.array_id",
+ "sort": "ascend_by:dsp.tp_0_est",
+ },
+ "t0_acend": {
+ "aggregation_mode": "keep_at_ch:evt.acend_id",
+ "expression": "dsp.tp_0_est",
+ },
+ "decend_id": {
+ "channels": "geds_on",
+ "aggregation_mode": "gather",
+ "query": "hit.cuspEmax_ctc_cal>25",
+ "expression": "tcm.array_id",
+ "sort": "descend_by:dsp.tp_0_est",
+ },
+ "t0_decend": {
+ "aggregation_mode": "keep_at_ch:evt.acend_id",
+ "expression": "dsp.tp_0_est",
+ },
+ },
+ }
+ build_evt(f_tcm, f_dsp, f_hit, conf, outfile)
+
+ assert os.path.exists(outfile)
+ assert len(lh5.ls(outfile, "/evt/")) == 4
+ vov_t0, _ = store.read("/evt/t0_acend", outfile)
+ nda_t0 = vov_t0.to_aoesa().view_as("np")
+ assert ((np.diff(nda_t0) >= 0) | (np.isnan(np.diff(nda_t0)))).all()
+ vov_t0, _ = store.read("/evt/t0_decend", outfile)
+ nda_t0 = vov_t0.to_aoesa().view_as("np")
+ assert ((np.diff(nda_t0) <= 0) | (np.isnan(np.diff(nda_t0)))).all()
+
+
+def test_tcm_id_table_pattern(lgnd_test_data, tmptestdir):
+ outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5"
+ tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5"
+ if os.path.exists(outfile):
+ os.remove(outfile)
+ f_tcm = lgnd_test_data.get_path(tcm_path)
+ f_dsp = lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp"))
+ f_hit = lgnd_test_data.get_path(tcm_path.replace("tcm", "hit"))
+ f_config = f"{config_dir}/basic-evt-config.json"
+
+ with pytest.raises(ValueError):
+ build_evt(f_tcm, f_dsp, f_hit, f_config, outfile, tcm_id_table_pattern="ch{{}}")
+ with pytest.raises(ValueError):
+ build_evt(f_tcm, f_dsp, f_hit, f_config, outfile, tcm_id_table_pattern="ch{}{}")
+ with pytest.raises(NotImplementedError):
+ build_evt(
+ f_tcm, f_dsp, f_hit, f_config, outfile, tcm_id_table_pattern="ch{tcm_id}"
+ )
+ with pytest.raises(ValueError):
+ build_evt(
+ f_tcm, f_dsp, f_hit, f_config, outfile, tcm_id_table_pattern="apple{}banana"
+ )
diff --git a/tests/evt/test_build_tcm.py b/tests/evt/test_build_tcm.py
new file mode 100644
index 000000000..c0ba352e0
--- /dev/null
+++ b/tests/evt/test_build_tcm.py
@@ -0,0 +1,72 @@
+import os
+
+import lgdo
+import numpy as np
+from lgdo import lh5
+
+from pygama import evt
+
+
+def test_generate_tcm_cols(lgnd_test_data):
+ f_raw = lgnd_test_data.get_path(
+ "lh5/prod-ref-l200/generated/tier/raw/cal/p03/r001/l200-p03-r001-cal-20230318T012144Z-tier_raw.lh5"
+ )
+ tables = lh5.ls(f_raw)
+ store = lh5.LH5Store()
+ coin_data = []
+ for tbl in tables:
+ ts, _ = store.read(f"{tbl}/raw/timestamp", f_raw)
+ coin_data.append(ts)
+
+ tcm_cols = evt.generate_tcm_cols(
+ coin_data, 0, "last", [int(tb[2:]) for tb in tables]
+ )
+ assert isinstance(tcm_cols, dict)
+ for v in tcm_cols.values():
+ assert np.issubdtype(v.dtype, np.integer)
+
+ # fmt: off
+ assert np.array_equal(
+ tcm_cols["cumulative_length"],
+ [
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+ ],
+ )
+ assert np.array_equal(
+ tcm_cols["array_id"],
+ [
+ 1084804, 1084803, 1121600, 1084804, 1121600, 1084804, 1121600,
+ 1084804, 1084804, 1084804, 1084803, 1084804, 1084804, 1121600,
+ 1121600, 1084804, 1121600, 1084804, 1121600, 1084803, 1084803,
+ 1121600, 1121600, 1121600, 1084803, 1084803, 1084803, 1084803,
+ 1084803, 1084803,
+ ],
+ )
+ assert np.array_equal(
+ tcm_cols["array_idx"],
+ [
+ 0, 0, 0, 1, 1, 2, 2, 3, 4, 5, 1, 6, 7, 3, 4, 8, 5, 9, 6, 2, 3, 7,
+ 8, 9, 4, 5, 6, 7, 8, 9,
+ ],
+ )
+ # fmt: on
+
+
+def test_build_tcm(lgnd_test_data, tmptestdir):
+ f_raw = lgnd_test_data.get_path(
+ "lh5/prod-ref-l200/generated/tier/raw/cal/p03/r001/l200-p03-r001-cal-20230318T012144Z-tier_raw.lh5"
+ )
+ out_file = f"{tmptestdir}/pygama-test-tcm.lh5"
+ evt.build_tcm(
+ [(f_raw, ["ch1084803/raw", "ch1084804/raw", "ch1121600/raw"])],
+ "timestamp",
+ out_file=out_file,
+ out_name="hardware_tcm",
+ wo_mode="of",
+ )
+ assert os.path.exists(out_file)
+ store = lh5.LH5Store()
+ obj, n_rows = store.read("hardware_tcm", out_file)
+ assert isinstance(obj, lgdo.Struct)
+ assert list(obj.keys()) == ["cumulative_length", "array_id", "array_idx"]
diff --git a/tests/flow/test_data_loader.py b/tests/flow/test_data_loader.py
index 3c3b857f9..9c7763c16 100644
--- a/tests/flow/test_data_loader.py
+++ b/tests/flow/test_data_loader.py
@@ -153,13 +153,13 @@ def test_setter_overwrite(test_dl):
test_dl.set_cuts({"hit": "trapEmax > 5000"})
test_dl.set_output(columns=["trapEmax"])
- data = test_dl.load().get_dataframe()
+ data = test_dl.load().view_as("pd")
test_dl.set_files("timestamp == '20230318T012144Z'")
test_dl.set_datastreams([1084803, 1121600], "ch")
test_dl.set_cuts({"hit": "trapEmax > 0"})
- data2 = test_dl.load().get_dataframe()
+ data2 = test_dl.load().view_as("pd")
assert 1084804 not in data2["hit_table"]
assert len(pd.unique(data2["file"])) == 1
diff --git a/tests/hit/configs/aggregations-hit-config.json b/tests/hit/configs/aggregations-hit-config.json
new file mode 100644
index 000000000..57237ce80
--- /dev/null
+++ b/tests/hit/configs/aggregations-hit-config.json
@@ -0,0 +1,28 @@
+{
+ "outputs": ["is_valid_rt", "is_valid_t0", "is_valid_tmax", "aggr1", "aggr2"],
+ "operations": {
+ "is_valid_rt": {
+ "expression": "((tp_90-tp_10)>96) & ((tp_50-tp_10)>=16)",
+ "parameters": {}
+ },
+ "is_valid_t0": {
+ "expression": "(tp_0_est>47000) & (tp_0_est<55000)",
+ "parameters": {}
+ },
+ "is_valid_tmax": {
+ "expression": "(tp_max>47000) & (tp_max<120000)",
+ "parameters": {}
+ }
+ },
+ "aggregations": {
+ "aggr1": {
+ "bit0": "is_valid_rt",
+ "bit1": "is_valid_t0",
+ "bit2": "is_valid_tmax"
+ },
+ "aggr2": {
+ "bit0": "is_valid_t0",
+ "bit1": "is_valid_tmax"
+ }
+ }
+}
diff --git a/tests/hit/configs/basic-hit-config.json b/tests/hit/configs/basic-hit-config.json
index 0cf98137e..1946f162c 100644
--- a/tests/hit/configs/basic-hit-config.json
+++ b/tests/hit/configs/basic-hit-config.json
@@ -1,18 +1,25 @@
{
"outputs": ["calE", "AoE", "A_max"],
"operations": {
- "twice_trap_e_max": {
- "expression": "2 * trapEmax"
+ "AoE": {
+ "expression": "A_max/calE"
},
"calE": {
"expression": "sqrt(a + b * twice_trap_e_max**2)",
"parameters": {
"a": 1.23,
"b": 42.69
+ },
+ "lgdo_attrs": {
+ "units": "keV",
+ "hdf5_settings": {
+ "compression": "gzip",
+ "shuffle": true
+ }
}
},
- "AoE": {
- "expression": "A_max/calE"
+ "twice_trap_e_max": {
+ "expression": "2 * trapEmax"
}
}
}
diff --git a/tests/hit/test_build_hit.py b/tests/hit/test_build_hit.py
index 668d320d4..7c918a229 100644
--- a/tests/hit/test_build_hit.py
+++ b/tests/hit/test_build_hit.py
@@ -2,16 +2,33 @@
import os
from pathlib import Path
-import lgdo.lh5_store as store
import numpy as np
import pytest
-from lgdo import LH5Store, ls
+from lgdo import lh5
from pygama.hit import build_hit
+from pygama.hit.build_hit import _reorder_table_operations
config_dir = Path(__file__).parent / "configs"
+def test_ops_reorder():
+ assert list(_reorder_table_operations({}).keys()) == []
+
+ ops = {
+ "out1": {"expression": "out2 + out3 * outy"},
+ "out2": {"expression": "log(out4)"},
+ "out3": {"expression": "outx + 2"},
+ "out4": {"expression": "outz + out3"},
+ }
+ assert list(_reorder_table_operations(ops).keys()) == [
+ "out3",
+ "out4",
+ "out2",
+ "out1",
+ ]
+
+
def test_basics(dsp_test_file, tmptestdir):
outfile = f"{tmptestdir}/LDQTA_r117_20200110T105115Z_cal_geds_hit.lh5"
@@ -23,7 +40,11 @@ def test_basics(dsp_test_file, tmptestdir):
)
assert os.path.exists(outfile)
- assert ls(outfile, "/geds/") == ["geds/hit"]
+ assert lh5.ls(outfile, "/geds/") == ["geds/hit"]
+
+ store = lh5.LH5Store()
+ tbl, _ = store.read("geds/hit", outfile)
+ assert tbl.calE.attrs == {"datatype": "array<1>{real}", "units": "keV"}
def test_illegal_arguments(dsp_test_file):
@@ -58,7 +79,7 @@ def test_lh5_table_configs(dsp_test_file, tmptestdir):
)
assert os.path.exists(outfile)
- assert ls(outfile, "/geds/") == ["geds/hit"]
+ assert lh5.ls(outfile, "/geds/") == ["geds/hit"]
lh5_tables_config = {
"/geds/dsp": {
@@ -81,7 +102,7 @@ def test_lh5_table_configs(dsp_test_file, tmptestdir):
)
assert os.path.exists(outfile)
- assert ls(outfile, "/geds/") == ["geds/hit"]
+ assert lh5.ls(outfile, "/geds/") == ["geds/hit"]
def test_outputs_specification(dsp_test_file, tmptestdir):
@@ -94,9 +115,52 @@ def test_outputs_specification(dsp_test_file, tmptestdir):
wo_mode="overwrite",
)
- store = LH5Store()
- obj, _ = store.read_object("/geds/hit", outfile)
- assert list(obj.keys()) == ["calE", "AoE", "A_max"]
+ store = lh5.LH5Store()
+ obj, _ = store.read("/geds/hit", outfile)
+ assert sorted(obj.keys()) == ["A_max", "AoE", "calE"]
+
+
+def test_aggregation_outputs(dsp_test_file, tmptestdir):
+ outfile = f"{tmptestdir}/LDQTA_r117_20200110T105115Z_cal_geds_hit.lh5"
+
+ build_hit(
+ dsp_test_file,
+ outfile=outfile,
+ hit_config=f"{config_dir}/aggregations-hit-config.json",
+ wo_mode="overwrite",
+ )
+
+ sto = lh5.LH5Store()
+ obj, _ = sto.read("/geds/hit", outfile)
+ assert list(obj.keys()) == [
+ "is_valid_rt",
+ "is_valid_t0",
+ "is_valid_tmax",
+ "aggr1",
+ "aggr2",
+ ]
+
+ df = sto.read("geds/hit", outfile)[0].view_as("pd")
+
+ # aggr1 consists of 3 bits --> max number can be 7, aggr2 consists of 2 bits so max number can be 3
+ assert not (df["aggr1"] > 7).any()
+ assert not (df["aggr2"] > 3).any()
+
+ def get_bit(x, n):
+ """bit numbering from right to left, starting with bit 0"""
+ return x & (1 << n) != 0
+
+ df["bit0_check"] = df.apply(lambda row: get_bit(row["aggr1"], 0), axis=1)
+ are_identical = df["bit0_check"].equals(df.is_valid_rt)
+ assert are_identical
+
+ df["bit1_check"] = df.apply(lambda row: get_bit(row["aggr1"], 1), axis=1)
+ are_identical = df["bit1_check"].equals(df.is_valid_t0)
+ assert are_identical
+
+ df["bit2_check"] = df.apply(lambda row: get_bit(row["aggr1"], 2), axis=1)
+ are_identical = df["bit2_check"].equals(df.is_valid_tmax)
+ assert are_identical
def test_build_hit_spms_basic(dsp_test_file_spm, tmptestdir):
@@ -107,9 +171,9 @@ def test_build_hit_spms_basic(dsp_test_file_spm, tmptestdir):
hit_config=f"{config_dir}/spms-hit-config.json",
wo_mode="overwrite_file",
)
- assert ls(out_file) == ["ch0", "ch1", "ch2"]
- assert ls(out_file, "ch0/") == ["ch0/hit"]
- assert ls(out_file, "ch0/hit/") == [
+ assert lh5.ls(out_file) == ["ch0", "ch1", "ch2"]
+ assert lh5.ls(out_file, "ch0/") == ["ch0/hit"]
+ assert lh5.ls(out_file, "ch0/hit/") == [
"ch0/hit/energy_in_pe",
"ch0/hit/quality_cut",
"ch0/hit/trigger_pos",
@@ -134,9 +198,9 @@ def test_build_hit_spms_multiconfig(dsp_test_file_spm, tmptestdir):
lh5_tables_config=f"{tmptestdir}/spms-hit-multi-config.json",
wo_mode="overwrite",
)
- assert ls(out_file) == ["ch0", "ch1", "ch2"]
- assert ls(out_file, "ch0/") == ["ch0/hit"]
- assert ls(out_file, "ch0/hit/") == [
+ assert lh5.ls(out_file) == ["ch0", "ch1", "ch2"]
+ assert lh5.ls(out_file, "ch0/") == ["ch0/hit"]
+ assert lh5.ls(out_file, "ch0/hit/") == [
"ch0/hit/energy_in_pe",
"ch0/hit/quality_cut",
"ch0/hit/trigger_pos",
@@ -152,22 +216,23 @@ def test_build_hit_spms_calc(dsp_test_file_spm, tmptestdir):
wo_mode="overwrite_file",
lh5_tables_config=f"{config_dir}/spms-hit-a-config.json",
)
- assert ls(out_file) == ["ch0", "ch1", "ch2"]
- assert ls(out_file, "ch0/") == ["ch0/hit"]
- assert ls(out_file, "ch0/hit/") == ["ch0/hit/energy_in_pe"]
-
- df0 = store.load_nda(out_file, ["energy_in_pe"], "ch0/hit/")
- df1 = store.load_nda(out_file, ["energy_in_pe"], "ch1/hit/")
- df2 = store.load_nda(out_file, ["energy_in_pe"], "ch2/hit/")
-
- assert len(df0["energy_in_pe"]) == 5
- assert len(df1["energy_in_pe"]) == 5
- assert len(df2["energy_in_pe"]) == 5
-
- assert len(df0["energy_in_pe"][0]) == 20
- assert len(df1["energy_in_pe"][0]) == 20
- assert len(df2["energy_in_pe"][0]) == 20
-
- assert np.nanmean(df0["energy_in_pe"]) == 0
- assert np.nanmean(df1["energy_in_pe"]) == 1
- assert np.nanmean(df2["energy_in_pe"]) == 2
+ assert lh5.ls(out_file) == ["ch0", "ch1", "ch2"]
+ assert lh5.ls(out_file, "ch0/") == ["ch0/hit"]
+ assert lh5.ls(out_file, "ch0/hit/") == ["ch0/hit/energy_in_pe"]
+
+ store = lh5.LH5Store()
+ df0 = store.read("ch0/hit/energy_in_pe", out_file)[0].view_as("np")
+ df1 = store.read("ch1/hit/energy_in_pe", out_file)[0].view_as("np")
+ df2 = store.read("ch2/hit/energy_in_pe", out_file)[0].view_as("np")
+
+ assert len(df0) == 5
+ assert len(df1) == 5
+ assert len(df2) == 5
+
+ assert len(df0[0]) == 20
+ assert len(df1[0]) == 20
+ assert len(df2[0]) == 20
+
+ assert np.nanmean(df0) == 0
+ assert np.nanmean(df1) == 1
+ assert np.nanmean(df2) == 2
diff --git a/tests/skm/configs/basic-skm-config.json b/tests/skm/configs/basic-skm-config.json
new file mode 100644
index 000000000..e1ffda941
--- /dev/null
+++ b/tests/skm/configs/basic-skm-config.json
@@ -0,0 +1,25 @@
+{
+ "multiplicity": 3,
+ "operations": {
+ "timestamp": {
+ "forward_field": "evt.timestamp",
+ "lgdo_attrs": { "info": "pk was here" }
+ },
+ "energy_sum": {
+ "forward_field": "evt.energy_sum"
+ },
+ "multiplicity": {
+ "forward_field": "evt.multiplicity"
+ },
+ "energy": {
+ "forward_field": "hit.cuspEmax_ctc_cal",
+ "missing_value": 0.0,
+ "tcm_idx": "evt.energy_idx"
+ },
+ "energy_id": {
+ "forward_field": "tcm.array_id",
+ "missing_value": 0,
+ "tcm_idx": "evt.energy_idx"
+ }
+ }
+}
diff --git a/tests/skm/test_build_skm.py b/tests/skm/test_build_skm.py
new file mode 100644
index 000000000..c60c460f0
--- /dev/null
+++ b/tests/skm/test_build_skm.py
@@ -0,0 +1,128 @@
+import os
+from pathlib import Path
+
+import awkward as ak
+import lgdo
+from lgdo.lh5 import LH5Store
+
+from pygama.evt import build_evt
+from pygama.skm import build_skm
+
+config_dir = Path(__file__).parent / "configs"
+evt_config_dir = Path(__file__).parent.parent / "evt" / "configs"
+store = LH5Store()
+
+
+def test_basics(lgnd_test_data, tmptestdir):
+ outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5"
+ tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5"
+ if os.path.exists(outfile):
+ os.remove(outfile)
+
+ build_evt(
+ f_tcm=lgnd_test_data.get_path(tcm_path),
+ f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")),
+ f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")),
+ evt_config=f"{evt_config_dir}/vov-test-evt-config.json",
+ f_evt=outfile,
+ wo_mode="o",
+ evt_group="evt",
+ hit_group="hit",
+ dsp_group="dsp",
+ tcm_group="hardware_tcm_1",
+ )
+
+ skm_conf = f"{config_dir}/basic-skm-config.json"
+ skm_out = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.lh5"
+
+ result = build_skm(
+ outfile,
+ lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")),
+ lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")),
+ lgnd_test_data.get_path(tcm_path),
+ skm_conf,
+ )
+
+ assert isinstance(result, lgdo.Table)
+
+ build_skm(
+ outfile,
+ lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")),
+ lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")),
+ lgnd_test_data.get_path(tcm_path),
+ skm_conf,
+ skm_out,
+ wo_mode="o",
+ )
+
+ assert os.path.exists(skm_out)
+ obj, _ = store.read("/skm/", skm_out)
+
+ assert obj == result
+
+ df = obj.view_as("pd")
+ assert "timestamp" in df.keys()
+ assert "energy_0" in df.keys()
+ assert "energy_1" in df.keys()
+ assert "energy_2" in df.keys()
+ assert "energy_id_0" in df.keys()
+ assert "energy_id_1" in df.keys()
+ assert "energy_id_2" in df.keys()
+ assert "multiplicity" in df.keys()
+ assert "energy_sum" in df.keys()
+ assert (df.multiplicity.to_numpy() <= 3).all()
+ assert (
+ df.energy_0.to_numpy() + df.energy_1.to_numpy() + df.energy_2.to_numpy()
+ == df.energy_sum.to_numpy()
+ ).all()
+
+ vov_eid = ak.to_numpy(
+ ak.fill_none(
+ ak.pad_none(
+ store.read("/evt/energy_id", outfile)[0].view_as("ak"), 3, clip=True
+ ),
+ 0,
+ ),
+ allow_missing=False,
+ )
+ assert (vov_eid[:, 0] == df.energy_id_0.to_numpy()).all()
+ assert (vov_eid[:, 1] == df.energy_id_1.to_numpy()).all()
+ assert (vov_eid[:, 2] == df.energy_id_2.to_numpy()).all()
+
+
+def test_attribute_passing(lgnd_test_data, tmptestdir):
+ outfile = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_evt.lh5"
+ tcm_path = "lh5/prod-ref-l200/generated/tier/tcm/phy/p03/r001/l200-p03-r001-phy-20230322T160139Z-tier_tcm.lh5"
+ if os.path.exists(outfile):
+ os.remove(outfile)
+
+ build_evt(
+ f_tcm=lgnd_test_data.get_path(tcm_path),
+ f_dsp=lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")),
+ f_hit=lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")),
+ evt_config=f"{evt_config_dir}/vov-test-evt-config.json",
+ f_evt=outfile,
+ wo_mode="o",
+ evt_group="evt",
+ hit_group="hit",
+ dsp_group="dsp",
+ tcm_group="hardware_tcm_1",
+ )
+
+ skm_conf = f"{config_dir}/basic-skm-config.json"
+
+ skm_out = f"{tmptestdir}/l200-p03-r001-phy-20230322T160139Z-tier_skm.lh5"
+
+ build_skm(
+ outfile,
+ lgnd_test_data.get_path(tcm_path.replace("tcm", "hit")),
+ lgnd_test_data.get_path(tcm_path.replace("tcm", "dsp")),
+ lgnd_test_data.get_path(tcm_path),
+ skm_conf,
+ f_skm=skm_out,
+ wo_mode="o",
+ )
+
+ assert os.path.exists(skm_out)
+ assert "info" in store.read("/skm/timestamp", skm_out)[0].getattrs().keys()
+ assert store.read("/skm/timestamp", skm_out)[0].getattrs()["info"] == "pk was here"