diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d47b91c28ae..c714522e019 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -99,7 +99,7 @@ jobs: uses: actions/cache@v4 with: path: ~/.cache/ctapipe - key: ctapipe-test-data + key: ctapipe-test-data-desy - name: Prepare mamba installation if: matrix.install-method == 'mamba' && contains(github.event.pull_request.labels.*.name, 'documentation-only') == false @@ -207,9 +207,5 @@ jobs: git describe --tags python -c 'import ctapipe; print(ctapipe.__version__)' - - name: Produce Changelog - run: | - towncrier build --yes - - name: Build docs run: make doc diff --git a/.github/workflows/sonar.yml b/.github/workflows/sonar.yml index 31f98209279..56addb1ab98 100644 --- a/.github/workflows/sonar.yml +++ b/.github/workflows/sonar.yml @@ -19,12 +19,26 @@ jobs: runs-on: ubuntu-latest if: github.event.workflow_run.conclusion == 'success' steps: - - uses: actions/checkout@v4 + - name: Checkout branch or tag + uses: actions/checkout@v4 + if: github.event.workflow_run.event != 'pull_request' with: repository: ${{ github.event.workflow_run.head_repository.full_name }} ref: ${{ github.event.workflow_run.head_branch }} fetch-depth: 0 + - name: Checkout Pull Request merge result + uses: actions/checkout@v4 + if: github.event.workflow_run.event == 'pull_request' + with: + ref: refs/pull/${{ github.event.workflow_run.pull_requests[0].number }}/merge + fetch-depth: 0 + + - name: Git info + run: | + git status + git log -n 5 --oneline + - name: 'Download code coverage' uses: actions/github-script@v7 with: diff --git a/.mailmap b/.mailmap index 2db728f1b2a..de3602bba5d 100644 --- a/.mailmap +++ b/.mailmap @@ -99,3 +99,5 @@ Vadym Voitsekhovskyi Tjark Miener Michael Punch + +Christian Arauner <69584019+ChAr-De@users.noreply.github.com> diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 6ffd378669e..a336585d0ae 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -1,7 +1,7 @@ version: 2 build: - os: ubuntu-22.04 + os: ubuntu-24.04 apt_packages: - ffmpeg - graphviz @@ -14,3 +14,6 @@ python: path: . extra_requirements: - docs + +sphinx: + configuration: docs/conf.py diff --git a/AUTHORS b/AUTHORS index 0d84602e6c6..4157d82bed1 100644 --- a/AUTHORS +++ b/AUTHORS @@ -6,8 +6,8 @@ Jean Jacquemier Tino Michael Lukas Nickel Noah Biederbeck -Tomas Bylund Lukas Beiske +Tomas Bylund Georg Schwefer Jonas Hackfeld Christoph Toennis @@ -43,19 +43,20 @@ Thomas Gasparetto Ruben Lopez-Coto Orel Gueta Tristan Carel +Daniel Morcuende Daniel Nieto Michael Punch Miguel Nievas Arnau Aguasca-Cabot Christophe COSSOU Cyril Alispach -Daniel Morcuende Julien Lefaucheur Konstantin Pfrang Moritz Hütten Thomas Armstrong Alice Donini <40267450+adonini@users.noreply.github.com> Andrés Baquero +Christian Arauner David Landriu Gernot Maier Henning Ptaszyk diff --git a/CHANGES.rst b/CHANGES.rst index e2fa7538ad0..1295264f159 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,59 @@ +ctapipe v0.23.2 (2025-01-21) +============================ + +Bug Fixes +--------- + +- Fill ``ctapipe.containers.SimulatedCameraContainer.true_image_sum`` in + ``HDF5EventSource``. Always returned default value of -1 before the fix. [`#2680 `__] + +Maintenance +----------- + +- Add compatibility with numpy>=2.1. [`#2682 `__] + + +ctapipe v0.23.1 (2024-12-04) +============================ + +Bug Fixes +--------- + +- Fix ``_uncert`` calculations in ``ctapipe.reco.StereoMeanCombiner``. + Add helper functions for vectorized numpy calculations as new ``ctapipe.reco.telescope_event_handling`` module. [`#2658 `__] + +- Fix error in ``ctapipe-process`` when in the middle of a simtel file + that has true images available, a telescope event is missing the true image. + This can happen rarely in case a telescope triggered on pure NSB or + is oversaturated to the point where the true pe didn't fit into memory constraints. + + The error was due to the ``DataWriter`` trying to write a ``None`` into an + already setup table for the true images. + + The ``SimTelEventSource`` will now create an invalid true image filled with ``-1`` + for such events. [`#2659 `__] + +- In ``SimTelEventSource``, ignore telescope events that did not take part in the stereo event trigger. + This happens rarely in Prod6 files in conjunction with the random mono trigger system. + +- Fix the order in which ``Tool`` runs final operations to fix an issue + of provenance not being correctly recorded. [`#2662 `__] + +- Fix data type of ``tel_id`` in the output of ``SubarrayDescription.to_table`` + +- Fixed a bug where if a configuration file with unknown file extension was passed + to a tool, e.g. ``--config myconf.conf`` instead of ``--config myconf.yaml``, it + was silently ignored, despite an info log saying "Loading config file + myconf.conf". Configuration files must now have one of the following extensions + to be recognized: yml, yaml, toml, json, py. If not a ``ToolConfigurationError`` + is raised. [`#2666 `__] + +Maintenance +----------- + +- Add support for astropy 7.0. [`#2639 `__] +- Change data server for test datasets from in2p3 to DESY hosted server. [`#2664 `__] + ctapipe v0.23.0 (2024-11-18) ============================ diff --git a/README.rst b/README.rst index 03d86e6b102..141bc442dee 100644 --- a/README.rst +++ b/README.rst @@ -2,14 +2,14 @@ ctapipe |pypi| |conda| |doilatest| |ci| |sonarqube_coverage| |sonarqube_quality| ================================================================================== -.. |ci| image:: https://github.com/cta-observatory/ctapipe/workflows/CI/badge.svg?branch=main - :target: https://github.com/cta-observatory/ctapipe/actions?query=workflow%3ACI+branch%3Amain +.. |ci| image:: https://github.com/cta-observatory/ctapipe/actions/workflows/ci.yml/badge.svg?branch=main + :target: https://github.com/cta-observatory/ctapipe/actions/workflows/ci.yml :alt: Test Status .. |sonarqube_quality| image:: https://sonar-cta-dpps.zeuthen.desy.de/api/project_badges/measure?project=cta-observatory_ctapipe_AY52EYhuvuGcMFidNyUs&metric=alert_status&token=sqb_a533204f382b350568e922385cab7c2394587458 :target: https://sonar-cta-dpps.zeuthen.desy.de/dashboard?id=cta-observatory_ctapipe_AY52EYhuvuGcMFidNyUs :alt: sonarqube quality gate .. |sonarqube_coverage| image:: https://sonar-cta-dpps.zeuthen.desy.de/api/project_badges/measure?project=cta-observatory_ctapipe_AY52EYhuvuGcMFidNyUs&metric=coverage&token=sqb_a533204f382b350568e922385cab7c2394587458 - :target: https://sonar-cta-dpps.zeuthen.desy.de/api/project_badges/measure?project=cta-observatory_ctapipe_AY52EYhuvuGcMFidNyUs&metric=coverage&token=sqb_a533204f382b350568e922385cab7c2394587458)](https://sonar-cta-dpps.zeuthen.desy.de/dashboard?id=cta-observatory_ctapipe_AY52EYhuvuGcMFidNyUs + :target: https://sonar-cta-dpps.zeuthen.desy.de/component_measures?metric=coverage&selected=cta-observatory_ctapipe_AY52EYhuvuGcMFidNyUs%3Asrc%2Fctapipe&view=treemap&id=cta-observatory_ctapipe_AY52EYhuvuGcMFidNyUs :alt: sonarqube code coverage .. |conda| image:: https://anaconda.org/conda-forge/ctapipe/badges/version.svg :target: https://anaconda.org/conda-forge/ctapipe diff --git a/docs/_static/switcher.json b/docs/_static/switcher.json index 01785099814..738a0560f3b 100644 --- a/docs/_static/switcher.json +++ b/docs/_static/switcher.json @@ -10,9 +10,9 @@ "url": "https://ctapipe.readthedocs.io/en/stable/" }, { - "name": "v0.23.0", - "version": "v0.23.0", - "url": "https://ctapipe.readthedocs.io/en/v0.23.0/" + "name": "v0.23.2", + "version": "v0.23.2", + "url": "https://ctapipe.readthedocs.io/en/v0.23.2/" }, { "name": "v0.22.0", diff --git a/docs/changelog.rst b/docs/changelog.rst index 2120f0791ea..818d3a60154 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -2,4 +2,8 @@ Change Log ********** -.. include:: ../CHANGES.rst + +.. changelog:: + :towncrier: ../ + :changelog_file: ../CHANGES.rst + :towncrier-skip-if-empty: diff --git a/docs/changes/2639.feature.rst b/docs/changes/2639.feature.rst deleted file mode 100644 index fbf57b791c7..00000000000 --- a/docs/changes/2639.feature.rst +++ /dev/null @@ -1 +0,0 @@ -Add support for astropy 7.0. diff --git a/docs/conf.py b/docs/conf.py index 378c02c3342..9a9d9866d81 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -60,6 +60,7 @@ "sphinx_design", "IPython.sphinxext.ipython_console_highlighting", "sphinx_gallery.gen_gallery", + "sphinx_changelog", ] diff --git a/docs/developer-guide/maintainer-info.rst b/docs/developer-guide/maintainer-info.rst index 05b84fd2bb6..45b60ff6c8a 100644 --- a/docs/developer-guide/maintainer-info.rst +++ b/docs/developer-guide/maintainer-info.rst @@ -37,30 +37,31 @@ The docs are automatically built and deployed using readthedocs. How To Make a Release? ====================== -1. Open a new pull request to prepare the release. +#. Open a new pull request to prepare the release. This should be the last pull request to be merged before making the actual release. - Run ``towncrier`` in to render the changelog: + #. Run ``towncrier`` to render the changelog: - .. code-block:: console + .. code-block:: console - $ git fetch - $ git switch -c prepare_ origin/main - $ towncrier build --version= + $ git fetch + $ git switch -c prepare_ origin/main + $ towncrier build --version= - Add the planned new version to the ``docs/_static/switcher.json`` file, so it will be - available from the version dropdown once the documentation is built. + #. Add the planned new version to the ``docs/_static/switcher.json`` file, so it will be + available from the version dropdown once the documentation is built. - Update the ``AUTHORS`` file using the following command: + #. Update the ``AUTHORS`` file using the following command: - .. code-block:: console + .. code-block:: console - $ bash -c "git shortlog -sne | grep -v dependabot | sed -E 's/^\s+[0-9]+\s+//' > AUTHORS" + $ bash -c "git shortlog -sne | grep -v dependabot | sed -E 's/^\s+[0-9]+\s+//' > AUTHORS" -2. Create a new github release, a good starting point should already be made by the - release drafter plugin. +#. Create a new GitHub release. + A good starting point should already be made by the release drafter plugin. -3. The PyPI upload will be done automatically by Github Actions. +#. The PyPI upload will be done automatically by GitHub Actions. -4. conda packages are built by conda-forge, the recipe is maintained here: https://github.com/conda-forge/ctapipe-feedstock/ - A pull request to update the recipe should be opened automatically by a conda-forge bot when a new version is published to PyPi. This can take a couple of hours. +#. conda packages are built by conda-forge, the recipe is maintained here: https://github.com/conda-forge/ctapipe-feedstock/. + A pull request to update the recipe should be opened automatically by a conda-forge bot when a new version is published to PyPI. This can take a couple of hours. + You can make it manually to speed things up. diff --git a/docs/developer-guide/style-guide.rst b/docs/developer-guide/style-guide.rst index e1493817bd1..3dbfa9ad7c6 100644 --- a/docs/developer-guide/style-guide.rst +++ b/docs/developer-guide/style-guide.rst @@ -23,7 +23,7 @@ All functions, classes, and modules should contain appropriate API documentation in their *docstrings*. The *docstrings* should be written in ReStructuredText format (same as the Sphinx high-level documentation), and should follow the `NumPy Docstring Standards -`_ +`_ Documentation for all algorithms should contain citations to external works, which should be collected in ``bibliography.rst``. An example of diff --git a/docs/references.bib b/docs/references.bib index dde248295fb..b6c2711fb0b 100644 --- a/docs/references.bib +++ b/docs/references.bib @@ -156,3 +156,17 @@ @manual{ctao-software-standards institution={CTAO}, url={https://redmine.cta-observatory.org/dmsf/files/8628/view}, } + +@article{lst1-crab-paper, + doi = {10.3847/1538-4357/ace89d}, + url = {https://dx.doi.org/10.3847/1538-4357/ace89d}, + year = {2023}, + month = {oct}, + publisher = {The American Astronomical Society}, + volume = {956}, + number = {2}, + pages = {80}, + author = {LST}, + title = {Observations of the Crab Nebula and Pulsar with the Large-sized Telescope Prototype of the Cherenkov Telescope Array}, + journal = {The Astrophysical Journal}, +} diff --git a/environment.yml b/environment.yml index 23622147621..73149a38db5 100644 --- a/environment.yml +++ b/environment.yml @@ -40,6 +40,7 @@ dependencies: - pydata-sphinx-theme - sphinx-design - sphinxcontrib-bibtex + - sphinx_changelog - tomli - towncrier - tqdm diff --git a/pyproject.toml b/pyproject.toml index 1e0dd60d651..27c6cf0b7fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -88,6 +88,7 @@ docs = [ "sphinx-gallery >= 0.16.0", "sphinx_automodapi", "sphinxcontrib-bibtex", + "sphinx-changelog", "tomli; python_version < '3.11'", ] diff --git a/src/ctapipe/calib/camera/calibrator.py b/src/ctapipe/calib/camera/calibrator.py index 3c7683da24d..f367856ccfe 100644 --- a/src/ctapipe/calib/camera/calibrator.py +++ b/src/ctapipe/calib/camera/calibrator.py @@ -204,7 +204,7 @@ def _calibrate_dl0(self, event, tel_id): dl0_pixel_status = r1.pixel_status.copy() # set dvr pixel bit in pixel_status for pixels kept by DVR - dl0_pixel_status[signal_pixels] |= PixelStatus.DVR_STORED_AS_SIGNAL + dl0_pixel_status[signal_pixels] |= np.uint8(PixelStatus.DVR_STORED_AS_SIGNAL) # unset dvr bits for removed pixels dl0_pixel_status[~signal_pixels] &= ~np.uint8(PixelStatus.DVR_STATUS) diff --git a/src/ctapipe/coordinates/tests/test_impact_distance.py b/src/ctapipe/coordinates/tests/test_impact_distance.py index c5093361cee..fb7b044c15c 100644 --- a/src/ctapipe/coordinates/tests/test_impact_distance.py +++ b/src/ctapipe/coordinates/tests/test_impact_distance.py @@ -75,7 +75,7 @@ def test_shower_impact_distance(reference_location): assert np.allclose(impact_distances, [0, 1, 2] * u.m) # alt=0 az=0 should be aligned to x-axis (north in ground frame) - # therefore the distances should also be just the y-offset of the telecope + # therefore the distances should also be just the y-offset of the telescope shower_geom = ReconstructedGeometryContainer( core_x=0 * u.m, core_y=0 * u.m, alt=0 * u.deg, az=0 * u.deg ) @@ -83,7 +83,7 @@ def test_shower_impact_distance(reference_location): assert np.allclose(impact_distances, [0, 1, 2] * u.m) # alt=0 az=90 should be aligned to y-axis (east in ground frame) - # therefore the distances should also be just the x-offset of the telecope + # therefore the distances should also be just the x-offset of the telescope shower_geom = ReconstructedGeometryContainer( core_x=0 * u.m, core_y=0 * u.m, alt=0 * u.deg, az=90 * u.deg ) diff --git a/src/ctapipe/core/tests/test_tool.py b/src/ctapipe/core/tests/test_tool.py index a9322eb514b..d431304814f 100644 --- a/src/ctapipe/core/tests/test_tool.py +++ b/src/ctapipe/core/tests/test_tool.py @@ -1,7 +1,10 @@ import json import logging import os +import signal +import sys import tempfile +from multiprocessing import Barrier, Process from pathlib import Path import pytest @@ -487,3 +490,113 @@ class MyTool(Tool): assert len(inputs) == 1 assert inputs[0]["role"] == "Tool Configuration" assert inputs[0]["url"] == str(config_path) + + +@pytest.mark.parametrize( + ("exit_code", "expected_status"), + [ + (0, "completed"), + (None, "completed"), + (1, "error"), + (2, "error"), + ], +) +def test_exit_status(exit_code, expected_status, tmp_path, provenance): + """check that the config is correctly in the provenance""" + + class MyTool(Tool): + exit_code = Int(allow_none=True, default_value=None).tag(config=True) + + def start(self): + if self.exit_code is None: + return + + if self.exit_code == 0: + sys.exit(0) + + if self.exit_code == 1: + raise ValueError("Some error happened") + + class CustomError(ValueError): + exit_code = self.exit_code + + raise CustomError("Some error with specific code happened") + + provenance_path = tmp_path / "provlog.json" + run_tool( + MyTool(exit_code=exit_code), + [f"--provenance-log={provenance_path}"], + raises=False, + ) + + activities = json.loads(provenance_path.read_text()) + assert len(activities) == 1 + provlog = activities[0] + assert provlog["status"] == expected_status + + +class InterruptTestTool(Tool): + name = "test-interrupt" + + def __init__(self, barrier): + super().__init__() + self.barrier = barrier + + def start(self): + self.barrier.wait() + signal.pause() + + +def test_exit_status_interrupted(tmp_path, provenance): + """check that the config is correctly in the provenance""" + + # to make sure we only kill the process once it is running + barrier = Barrier(2) + tool = InterruptTestTool(barrier) + + provenance_path = tmp_path / "provlog.json" + args = [f"--provenance-log={provenance_path}", "--log-level=INFO"] + process = Process(target=run_tool, args=(tool, args), kwargs=dict(raises=False)) + process.start() + barrier.wait() + + # process.terminate() + os.kill(process.pid, signal.SIGINT) + process.join() + + activities = json.loads(provenance_path.read_text()) + assert len(activities) == 1 + provlog = activities[0] + assert provlog["activity_name"] == InterruptTestTool.name + assert provlog["status"] == "interrupted" + + +def test_no_ignore_bad_config_type(tmp_path: Path): + """Check that if an unknown type of config file is given, an error is raised + instead of silently ignoring the file (which is the default for + traitlets.config) + """ + + class SomeTool(Tool): + float_option = Float(1.0, help="An option to change").tag(config=True) + + test_config_file = """ + SomeTool: + float_option: 200.0 + """ + + bad_conf_path = tmp_path / "test.conf" # note named "conf" not yaml. + bad_conf_path.write_text(test_config_file) + + good_conf_path = tmp_path / "test.yaml" + good_conf_path.write_text(test_config_file) + + tool = SomeTool() + + # here we should receive an error. + with pytest.raises(ToolConfigurationError): + tool.load_config_file(bad_conf_path) + + # test correct case: + tool.load_config_file(good_conf_path) + assert tool.float_option > 1 diff --git a/src/ctapipe/core/tool.py b/src/ctapipe/core/tool.py index e484c6ef71d..488dd1242b2 100644 --- a/src/ctapipe/core/tool.py +++ b/src/ctapipe/core/tool.py @@ -282,9 +282,16 @@ def load_config_file(self, path: str | pathlib.Path) -> None: with open(path, "rb") as infile: config = Config(toml.load(infile)) self.update_config(config) - else: - # fall back to traitlets.config.Application's implementation + elif path.suffix in [".json", ".py"]: + # fall back to traitlets.config.Application's implementation. Note + # that if we don't specify the file suffixes here, traitlets seems + # to silently ignore unknown ones. super().load_config_file(str(path)) + else: + raise ToolConfigurationError( + f"The config file '{path}' is not in a known format. " + "The file should end in one: yml, yaml, toml, json, py" + ) Provenance().add_input_file(path, role="Tool Configuration", add_meta=False) @@ -405,6 +412,7 @@ def run(self, argv=None, raises=False): # return codes are taken from: # https://tldp.org/LDP/abs/html/exitcodes.html + status = "completed" exit_status = 0 current_exception = None @@ -430,51 +438,42 @@ def run(self, argv=None, raises=False): self.start() self.finish() - self.log.info("Finished: %s", self.name) - Provenance().finish_activity(activity_name=self.name) except (ToolConfigurationError, TraitError) as err: current_exception = err self.log.error("%s", err) self.log.error("Use --help for more info") exit_status = 2 # wrong cmd line parameter - Provenance().finish_activity( - activity_name=self.name, status="error", exit_code=exit_status - ) + status = "error" except KeyboardInterrupt: self.log.warning("WAS INTERRUPTED BY CTRL-C") exit_status = 130 # Script terminated by Control-C - Provenance().finish_activity( - activity_name=self.name, status="interrupted", exit_code=exit_status - ) + status = "interrupted" except Exception as err: current_exception = err exit_status = getattr(err, "exit_code", 1) + status = "error" self.log.exception("Caught unexpected exception: %s", err) - Provenance().finish_activity( - activity_name=self.name, status="error", exit_code=exit_status - ) except SystemExit as err: exit_status = err.code - if exit_status == 0: - # Finish normally - Provenance().finish_activity(activity_name=self.name) - else: - # Finish with error + if exit_status != 0: + status = "error" current_exception = err self.log.critical( "Caught SystemExit with exit code %s", exit_status ) - Provenance().finish_activity( - activity_name=self.name, - status="error", - exit_code=exit_status, - ) finally: - if not {"-h", "--help", "--help-all"}.intersection(self.argv): - self.write_provenance() if raises and current_exception: + self.write_provenance() raise current_exception + Provenance().finish_activity( + activity_name=self.name, status=status, exit_code=exit_status + ) + + if not {"-h", "--help", "--help-all"}.intersection(self.argv): + self.write_provenance() + + self.log.info("Finished %s", self.name) self.exit(exit_status) def write_provenance(self): diff --git a/src/ctapipe/image/__init__.py b/src/ctapipe/image/__init__.py index 61799ac01a5..16f1171f501 100644 --- a/src/ctapipe/image/__init__.py +++ b/src/ctapipe/image/__init__.py @@ -1,11 +1,15 @@ from .cleaning import ( ImageCleaner, + NSBImageCleaner, TailcutsImageCleaner, apply_time_delta_cleaning, + bright_cleaning, dilate, fact_image_cleaning, mars_cleaning_1st_pass, + nsb_image_cleaning, tailcuts_clean, + time_constrained_clean, ) from .concentration import concentration_parameters from .extractor import ( @@ -18,6 +22,7 @@ NeighborPeakWindowSum, SlidingWindowMaxSum, TwoPassWindowSum, + VarianceExtractor, extract_around_peak, extract_sliding_window, integration_correction, @@ -79,12 +84,16 @@ "largest_island", "brightest_island", "tailcuts_clean", + "bright_cleaning", "dilate", "mars_cleaning_1st_pass", + "nsb_image_cleaning", "fact_image_cleaning", "apply_time_delta_cleaning", + "time_constrained_clean", "ImageCleaner", "TailcutsImageCleaner", + "NSBImageCleaner", "neg_log_likelihood_approx", "neg_log_likelihood_numeric", "neg_log_likelihood", @@ -109,6 +118,7 @@ "NeighborPeakWindowSum", "BaselineSubtractedNeighborPeakWindowSum", "TwoPassWindowSum", + "VarianceExtractor", "extract_around_peak", "extract_sliding_window", "neighbor_average_maximum", diff --git a/src/ctapipe/image/cleaning.py b/src/ctapipe/image/cleaning.py index 328fb4be1f3..95c200bf314 100644 --- a/src/ctapipe/image/cleaning.py +++ b/src/ctapipe/image/cleaning.py @@ -14,14 +14,20 @@ __all__ = [ "tailcuts_clean", + "bright_cleaning", "dilate", "mars_cleaning_1st_pass", "fact_image_cleaning", "apply_time_delta_cleaning", "apply_time_average_cleaning", "time_constrained_clean", + "nsb_image_cleaning", "ImageCleaner", "TailcutsImageCleaner", + "NSBImageCleaner", + "MARSImageCleaner", + "FACTImageCleaner", + "TimeConstrainedImageCleaner", ] from abc import abstractmethod @@ -60,7 +66,7 @@ def tailcuts_clean( Parameters ---------- - geom : ctapipe.instrument.CameraGeometry + geom : `ctapipe.instrument.CameraGeometry` Camera geometry information image : np.ndarray pixel charges @@ -79,7 +85,7 @@ def tailcuts_clean( Returns ------- - A boolean mask of *clean* pixels. + A boolean mask of selected pixels. """ pixels_above_picture = image >= picture_thresh @@ -116,7 +122,7 @@ def tailcuts_clean( def bright_cleaning(image, threshold, fraction, n_pixels=3): """ Clean an image by removing pixels below a fraction of the mean charge - in the `n_pixels` brightest pixels. + in the ``n_pixels`` brightest pixels. No pixels are removed instead if the mean charge of the brightest pixels are below a certain threshold. @@ -126,17 +132,17 @@ def bright_cleaning(image, threshold, fraction, n_pixels=3): image : np.ndarray pixel charges threshold : float - Minimum average charge in the `n_pixels` brightest pixels to apply + Minimum average charge in the ``n_pixels`` brightest pixels to apply cleaning fraction : float - Pixels below fraction * (average charge in the `n_pixels` brightest pixels) + Pixels below fraction * (average charge in the ``n_pixels`` brightest pixels) will be removed from the cleaned image n_pixels : int Consider this number of the brightest pixels to calculate the mean charge Returns ------- - A boolean mask of *clean* pixels. + A boolean mask of selected pixels. """ mean_brightest_signal = np.mean(n_largest(n_pixels, image)) @@ -170,7 +176,7 @@ def mars_cleaning_1st_pass( Parameters ---------- - geom : ctapipe.instrument.CameraGeometry + geom : `ctapipe.instrument.CameraGeometry` Camera geometry information image : np.ndarray pixel charges @@ -178,7 +184,7 @@ def mars_cleaning_1st_pass( threshold above which all pixels are retained boundary_thresh : float threshold above which pixels are retained if - they have a neighbor already above the picture_thresh; it is then + they have a neighbor already above the ``picture_thresh``; it is then reapplied iteratively to the neighbor of the neighbor keep_isolated_pixels : bool If True, pixels above the picture threshold will be included always, @@ -186,11 +192,11 @@ def mars_cleaning_1st_pass( boundary min_number_picture_neighbors : int A picture pixel survives cleaning only if it has at least this number - of picture neighbors. This has no effect in case keep_isolated_pixels is True + of picture neighbors. This has no effect in case ``keep_isolated_pixels`` is True Returns ------- - A boolean mask of *clean* pixels. + A boolean mask of selected pixels. """ pixels_from_tailcuts_clean = tailcuts_clean( @@ -231,7 +237,7 @@ def mars_cleaning_1st_pass( def dilate(geom, mask): """ - Add one row of neighbors to the True values of a pixel mask and return + Add one row of neighbors to the true values of a pixel mask and return the new mask. This can be used to include extra rows of pixels in a mask that was @@ -239,7 +245,7 @@ def dilate(geom, mask): Parameters ---------- - geom : ctapipe.instrument.CameraGeometry + geom : `ctapipe.instrument.CameraGeometry` Camera geometry information mask : np.ndarray input mask (array of booleans) to be dilated @@ -256,10 +262,10 @@ def apply_time_delta_cleaning( Parameters ---------- - geom : ctapipe.instrument.CameraGeometry + geom : `ctapipe.instrument.CameraGeometry` Camera geometry information mask : np.ndarray - boolean mask of *clean* pixels before time_delta_cleaning + boolean mask of selected pixels before `apply_time_delta_cleaning` arrival_times : np.ndarray pixel timing information min_number_neighbors : int @@ -270,7 +276,7 @@ def apply_time_delta_cleaning( Returns ------- - A boolean mask of *clean* pixels. + A boolean mask of selected pixels. """ pixels_to_keep = mask.copy() time_diffs = np.abs(arrival_times[mask, None] - arrival_times) @@ -293,22 +299,22 @@ def apply_time_average_cleaning( Parameters ---------- - geom : ctapipe.instrument.CameraGeometry + geom : `ctapipe.instrument.CameraGeometry` Camera geometry information mask : np.ndarray - boolean mask of *clean* pixels before time_delta_cleaning + boolean mask of selected pixels before `apply_time_delta_cleaning` image : np.ndarray pixel charges arrival_times : np.ndarray pixel timing information picture_thresh : float - threshold above which time limit is extended twice its value + threshold above which ``time_limit`` is extended twice its value time_limit : int | float arrival time limit w.r.t. the average time of the core pixels Returns ------- - A boolean mask of *clean* pixels. + A boolean mask of selected pixels. """ mask = mask.copy() if np.count_nonzero(mask) > 0: @@ -348,7 +354,7 @@ def fact_image_cleaning( Parameters ---------- - geom : ctapipe.instrument.CameraGeometry + geom : `ctapipe.instrument.CameraGeometry` Camera geometry information image : np.ndarray pixel charges @@ -358,7 +364,7 @@ def fact_image_cleaning( threshold above which all pixels are retained boundary_threshold : float | np.ndarray threshold above which pixels are retained if they have a neighbor - already above the picture_thresh + already above the ``picture_thresh`` min_number_neighbors : int Threshold to determine if a pixel survives cleaning steps. These steps include checks of neighbor arrival time and value @@ -367,7 +373,7 @@ def fact_image_cleaning( Returns ------- - A boolean mask of *clean* pixels. + A boolean mask of selected pixels. References ---------- @@ -433,7 +439,7 @@ def time_constrained_clean( Parameters ---------- - geom : ctapipe.instrument.CameraGeometry + geom : `ctapipe.instrument.CameraGeometry` Camera geometry information image : np.ndarray pixel charges @@ -443,7 +449,7 @@ def time_constrained_clean( threshold above which all pixels are retained boundary_threshold : float | np.ndarray threshold above which pixels are retained if they have a neighbor - already above the picture_thresh + already above the ``picture_thresh`` time_limit_core : int | float arrival time limit of core pixels w.r.t the average time time_limit_boundary : int | float @@ -454,7 +460,7 @@ def time_constrained_clean( Returns ------- - A boolean mask of *clean* pixels. + A boolean mask of selected pixels. """ # find core pixels that pass a picture threshold @@ -518,18 +524,16 @@ def nsb_image_cleaning( Clean an image in 5 Steps: 1) Get pixelwise picture thresholds for `tailcuts_clean` in step 2) from interleaved - pedestal events if `pedestal_std` is not None. - 2) Apply tailcuts image cleaning algorithm - `ctapipe.image.cleaning.tailcuts_clean`. - 3) Apply time_delta_cleaning algorithm - - `ctapipe.image.cleaning.apply_time_delta_cleaning` if time_limit is not None. - 4) Apply bright_cleaning - `ctapipe.image.cleaning.bright_cleaning` if - bright_cleaning_threshold is not None. - 5) Get only largest island - `ctapipe.image.morphology.largest_island` if - `largest_island_only` is set to true. + pedestal events if ``pedestal_std`` is not None. + 2) Apply `tailcuts_clean` algorithm. + 3) Apply `apply_time_delta_cleaning` algorithm if ``time_limit`` is not None. + 4) Apply `bright_cleaning` if ``bright_cleaning_threshold`` is not None. + 5) Get only `ctapipe.image.largest_island` if ``largest_island_only`` is + set to true. Parameters ---------- - geom : ctapipe.instrument.CameraGeometry + geom : `ctapipe.instrument.CameraGeometry` Camera geometry information image : np.ndarray Pixel charges @@ -537,39 +541,39 @@ def nsb_image_cleaning( Pixel timing information picture_thresh_min : float | np.ndarray Defines the minimum value used for the picture threshold for `tailcuts_clean`. - The threshold used will be at least this value, or higher if `pedestal_std` - and `pedestal_factor` are set. + The threshold used will be at least this value, or higher if ``pedestal_std`` + and ``pedestal_factor`` are set. boundary_thresh : float | np.ndarray Threshold above which pixels are retained if they have a neighbor - already above the picture_thresh_min. Used for `tailcuts_clean`. + already above the ``picture_thresh_min``. Used for `tailcuts_clean`. min_number_picture_neighbors : int A picture pixel survives cleaning only if it has at least this number - of picture neighbors. This has no effect in case keep_isolated_pixels is True. + of picture neighbors. This has no effect in case ``keep_isolated_pixels`` is True. Used for `tailcuts_clean`. keep_isolated_pixels : bool If True, pixels above the picture threshold will be included always, if not they are only included if a neighbor is in the picture or boundary. Used for `tailcuts_clean`. time_limit : float - Time limit for the `time_delta_cleaning`. Set to None if no `time_delta_cleaning` - should be applied. + Time limit for the `apply_time_delta_cleaning`. Set to None if no + `apply_time_delta_cleaning` should be applied. time_num_neighbors : int - Used for `time_delta_cleaning`. + Used for `apply_time_delta_cleaning`. A selected pixel needs at least this number of (already selected) neighbors - that arrived within a given time_limit to itself to survive this cleaning. + that arrived within a given ``time_limit`` to itself to survive this cleaning. bright_cleaning_n_pixels : int Consider this number of the brightest pixels for calculating the mean charge. - Pixels below fraction * (mean charge in the `bright_cleaning_n_pixels` + Pixels below fraction * (mean charge in the ``bright_cleaning_n_pixels`` brightest pixels) will be removed from the cleaned image. Set - `bright_cleaning_threshold` to None if no `bright_cleaning` should be applied. + ``bright_cleaning_threshold`` to None if no `bright_cleaning` should be applied. bright_cleaning_fraction : float Fraction parameter for `bright_cleaning`. Pixels below - fraction * (mean charge in the `bright_cleaning_n_pixels` brightest pixels) - will be removed from the cleaned image. Set `bright_cleaning_threshold` to None + fraction * (mean charge in the ``bright_cleaning_n_pixels`` brightest pixels) + will be removed from the cleaned image. Set ``bright_cleaning_threshold`` to None if no `bright_cleaning` should be applied. bright_cleaning_threshold : float Threshold parameter for `bright_cleaning`. Minimum mean charge - in the `bright_cleaning_n_pixels` brightest pixels to apply the cleaning. + in the ``bright_cleaning_n_pixels`` brightest pixels to apply the cleaning. Set to None if no `bright_cleaning` should be applied. largest_island_only : bool Set to true to get only largest island. @@ -577,17 +581,17 @@ def nsb_image_cleaning( Factor for interleaved pedestal cleaning. It is multiplied by the pedestal standard deviation for each pixel to calculate pixelwise picture threshold parameters for `tailcuts_clean` considering the current background. - Has no effect if `pedestal_std` is set to None. + Has no effect if ``pedestal_std`` is set to None. pedestal_std : np.ndarray | None Pedestal standard deviation for each pixel. See `ctapipe.containers.PedestalContainer`. Used to calculate pixelwise picture - threshold parameters for `tailcuts_clean` by multiplying it with `pedestal_factor` - and clip (limit) the product with `picture_thresh_min`. If set to None, only - `picture_thresh_min` is used to set the picture threshold for `tailcuts_clean`. + threshold parameters for `tailcuts_clean` by multiplying it with ``pedestal_factor`` + and clip (limit) the product with ``picture_thresh_min``. If set to None, only + ``picture_thresh_min`` is used to set the picture threshold for `tailcuts_clean`. Returns ------- - A boolean mask of *clean* pixels. + A boolean mask of selected pixels. """ # Step 1 @@ -638,7 +642,7 @@ def nsb_image_cleaning( class ImageCleaner(TelescopeComponent): """ - Abstract class for all configurable Image Cleaning algorithms. Use + Abstract class for all configurable Image Cleaning algorithms. Use ``ImageCleaner.from_name()`` to construct an instance of a particular algorithm """ @@ -663,9 +667,9 @@ def __call__( image pixel data corresponding to the camera geometry arrival_times : np.ndarray image of arrival time (not used in this method) - monitoring : ctapipe.containers.MonitoringCameraContainer - MonitoringCameraContainer to make use of additional parameters - from monitoring data e.g. pedestal std. + monitoring : `ctapipe.containers.MonitoringCameraContainer` + `ctapipe.containers.MonitoringCameraContainer` to make use of + additional parameters from monitoring data e.g. pedestal std. Returns ------- @@ -723,25 +727,20 @@ def __call__( class NSBImageCleaner(TailcutsImageCleaner): """ - Clean images based on lstchains image cleaning technique [1]_. See - `ctapipe.image.nsb_image_cleaning` - - References - ---------- - .. [1] https://arxiv.org/pdf/2306.12960 - + Clean images based on lstchains image cleaning technique described in + :cite:p:`lst1-crab-paper`. See `ctapipe.image.nsb_image_cleaning`. """ time_limit = FloatTelescopeParameter( default_value=2, - help="Time limit for the `time_delta_cleaning`. Set to None if no" - " `time_delta_cleaning` should be applied.", + help="Time limit for the `apply_time_delta_cleaning`. Set to None if no" + " `apply_time_delta_cleaning` should be applied.", allow_none=True, ).tag(config=True) time_num_neighbors = IntTelescopeParameter( default_value=1, - help="Used for `time_delta_cleaning`." + help="Used for `apply_time_delta_cleaning`." " A selected pixel needs at least this number of (already selected) neighbors" " that arrived within a given `time_limit` to itself to survive this cleaning.", ).tag(config=True) @@ -750,23 +749,23 @@ class NSBImageCleaner(TailcutsImageCleaner): default_value=3, help="Consider this number of the brightest pixels for calculating the" " mean charge. Pixels below fraction * (mean charge in the" - " `bright_cleaning_n_pixels` brightest pixels) will be removed from the" - " cleaned image. Set `bright_cleaning_threshold` to None if no" + " ``bright_cleaning_n_pixels`` brightest pixels) will be removed from the" + " cleaned image. Set ``bright_cleaning_threshold`` to None if no" " `bright_cleaning` should be applied.", ).tag(config=True) bright_cleaning_fraction = FloatTelescopeParameter( default_value=0.03, help="Fraction parameter for `bright_cleaning`. Pixels below" - " fraction * (mean charge in the `bright_cleaning_n_pixels` brightest pixels)" - " will be removed from the cleaned image. Set `bright_cleaning_threshold` to" + " fraction * (mean charge in the ``bright_cleaning_n_pixels`` brightest pixels)" + " will be removed from the cleaned image. Set ``bright_cleaning_threshold`` to" " None if no `bright_cleaning` should be applied.", ).tag(config=True) bright_cleaning_threshold = FloatTelescopeParameter( default_value=267, help="Threshold parameter for `bright_cleaning`. Minimum mean charge" - " in the `bright_cleaning_n_pixels` brightest pixels to apply the cleaning." + " in the ``bright_cleaning_n_pixels`` brightest pixels to apply the cleaning." " Set to None if no `bright_cleaning` should be applied.", allow_none=True, ).tag(config=True) @@ -779,10 +778,10 @@ class NSBImageCleaner(TailcutsImageCleaner): default_value=2.5, help="Factor for interleaved pedestal cleaning. It is multiplied by the" " pedestal standard deviation for each pixel to calculate pixelwise upper limit" - " picture thresholds and clip them with `picture_thresh_pe` of" + " picture thresholds and clip them with ``picture_thresh_pe`` of" " `TailcutsImageCleaner` for `tailcuts_clean` considering the current background." " If no pedestal standard deviation is given, interleaved pedestal cleaning is" - " not applied and `picture_thresh_pe` of `TailcutsImageCleaner` is used alone" + " not applied and ``picture_thresh_pe`` of `TailcutsImageCleaner` is used alone" " instead.", ).tag(config=True) @@ -850,11 +849,11 @@ def __call__( class FACTImageCleaner(TailcutsImageCleaner): """ Clean images using the FACT technique. See `ctapipe.image.fact_image_cleaning` - for algorithm details + for algorithm details. """ time_limit_ns = FloatTelescopeParameter( - default_value=5.0, help="arrival time limit for neighboring " "pixels, in ns" + default_value=5.0, help="arrival time limit for neighboring pixels, in ns" ).tag(config=True) def __call__( @@ -880,16 +879,17 @@ def __call__( class TimeConstrainedImageCleaner(TailcutsImageCleaner): """ - MAGIC-like Image cleaner with timing information (See `ctapipe.image.time_constrained_clean`) + MAGIC-like Image cleaner with timing information (See + `ctapipe.image.time_constrained_clean`). """ time_limit_core_ns = FloatTelescopeParameter( default_value=4.5, - help="arrival time limit for neighboring " "core pixels, in ns", + help="arrival time limit for neighboring core pixels, in ns", ).tag(config=True) time_limit_boundary_ns = FloatTelescopeParameter( default_value=1.5, - help="arrival time limit for neighboring " "boundary pixels, in ns", + help="arrival time limit for neighboring boundary pixels, in ns", ).tag(config=True) def __call__( diff --git a/src/ctapipe/instrument/subarray.py b/src/ctapipe/instrument/subarray.py index eafd56c2a95..d2500ab6f33 100644 --- a/src/ctapipe/instrument/subarray.py +++ b/src/ctapipe/instrument/subarray.py @@ -310,7 +310,7 @@ def to_table(self, kind="subarray"): tab = Table( dict( - tel_id=np.array(ids, dtype=np.short), + tel_id=np.array(ids, dtype=np.uint16), name=tel_names, type=tel_types, pos_x=tel_coords.x, diff --git a/src/ctapipe/io/hdf5eventsource.py b/src/ctapipe/io/hdf5eventsource.py index d18f7db436f..8e8cb3ad6b8 100644 --- a/src/ctapipe/io/hdf5eventsource.py +++ b/src/ctapipe/io/hdf5eventsource.py @@ -708,6 +708,7 @@ def _generator(self): if self.has_simulated_dl1: simulated_image_row = next(simulated_image_iterators[key]) simulated.true_image = simulated_image_row["true_image"] + simulated.true_image_sum = simulated_image_row["true_image_sum"] if DataLevel.DL1_PARAMETERS in self.datalevels: # Is there a smarter way to unpack this? diff --git a/src/ctapipe/io/simteleventsource.py b/src/ctapipe/io/simteleventsource.py index 45d35b03b51..28bfe5bfbd2 100644 --- a/src/ctapipe/io/simteleventsource.py +++ b/src/ctapipe/io/simteleventsource.py @@ -83,6 +83,11 @@ "MirrorClass", ] +# default for MAX_PHOTOELECTRONS in simtel_array is 2_500_000 +# so this should be a safe limit to distinguish "dim image true missing" +# from "extremely bright true image missing", see issue #2344 +MISSING_IMAGE_BRIGHTNESS_LIMIT = 1_000_000 + # Mapping of SimTelArray Calibration trigger types to EventType: # from simtelarray: type Dark (0), pedestal (1), in-lid LED (2) or laser/LED (3+) data. SIMTEL_TO_CTA_EVENT_TYPE = { @@ -570,6 +575,7 @@ def __init__(self, input_url=Undefined, config=None, parent=None, **kwargs): self.file_, kind=self.atmosphere_profile_choice ) + self._has_true_image = None self.log.debug(f"Using gain selector {self.gain_selector}") def __exit__(self, exc_type, exc_val, exc_tb): @@ -861,6 +867,20 @@ def _generate_events(self): impact_distances = np.full(len(self.subarray), np.nan) * u.m for tel_id, telescope_event in telescope_events.items(): + if tel_id not in trigger.tels_with_trigger: + # skip additional telescopes that have data but did not + # participate in the subarray trigger decision for this stereo event + # see #2660 for details + self.log.warning( + "Encountered telescope event not present in" + " stereo trigger information, skipping." + " event_id = %d, tel_id = %d, tels_with_trigger: %s", + event_id, + tel_id, + trigger.tels_with_trigger, + ) + continue + adc_samples = telescope_event.get("adc_samples") if adc_samples is None: adc_samples = telescope_event["adc_sums"][:, :, np.newaxis] @@ -871,6 +891,30 @@ def _generate_events(self): .get(tel_id - 1, {}) .get("photoelectrons", None) ) + true_image_sum = true_image_sums[ + self.telescope_indices_original[tel_id] + ] + + if self._has_true_image is None: + self._has_true_image = true_image is not None + + if self._has_true_image and true_image is None: + if true_image_sum > MISSING_IMAGE_BRIGHTNESS_LIMIT: + self.log.warning( + "Encountered extremely bright telescope event with missing true_image in" + "file that has true images: event_id = %d, tel_id = %d." + "event might be truncated, skipping telescope event", + event_id, + tel_id, + ) + continue + else: + self.log.info( + "telescope event event_id = %d, tel_id = %d is missing true image", + event_id, + tel_id, + ) + true_image = np.full(n_pixels, -1, dtype=np.int32) if data.simulation is not None: if data.simulation.shower is not None: @@ -887,9 +931,7 @@ def _generate_events(self): ) data.simulation.tel[tel_id] = SimulatedCameraContainer( - true_image_sum=true_image_sums[ - self.telescope_indices_original[tel_id] - ], + true_image_sum=true_image_sum, true_image=true_image, impact=impact_container, ) @@ -956,8 +998,8 @@ def _get_r1_pixel_status(self, tel_id, selected_gain_channel): low_gain_stored = selected_gain_channel == GainChannel.LOW # set gain bits - pixel_status[high_gain_stored] |= PixelStatus.HIGH_GAIN_STORED - pixel_status[low_gain_stored] |= PixelStatus.LOW_GAIN_STORED + pixel_status[high_gain_stored] |= np.uint8(PixelStatus.HIGH_GAIN_STORED) + pixel_status[low_gain_stored] |= np.uint8(PixelStatus.LOW_GAIN_STORED) # reset gain bits for completely disabled pixels disabled = tel_desc["disabled_pixels"]["HV_disabled"] diff --git a/src/ctapipe/io/tableloader.py b/src/ctapipe/io/tableloader.py index 2b10ad96506..041f68f5151 100644 --- a/src/ctapipe/io/tableloader.py +++ b/src/ctapipe/io/tableloader.py @@ -125,7 +125,9 @@ def _join_telescope_events(table1, table2): def _merge_table_same_index(table1, table2, index_keys, fallback_join_type="left"): """Merge two tables assuming their primary keys are identical""" if len(table1) != len(table2): - raise ValueError("Tables must have identical length") + raise ValueError( + f"Tables must have identical length, {len(table1)} != {len(table2)}" + ) if len(table1) == 0: return table1 diff --git a/src/ctapipe/io/tests/test_datawriter.py b/src/ctapipe/io/tests/test_datawriter.py index c42dcca6858..f4924b4c3dd 100644 --- a/src/ctapipe/io/tests/test_datawriter.py +++ b/src/ctapipe/io/tests/test_datawriter.py @@ -201,15 +201,16 @@ def test_roundtrip(tmpdir: Path): # make sure it is readable by the event source and matches the images - for event in EventSource(output_path): - for tel_id, dl1 in event.dl1.tel.items(): - original_image = events[event.count].dl1.tel[tel_id].image - read_image = dl1.image - assert np.allclose(original_image, read_image, atol=0.1) - - original_peaktime = events[event.count].dl1.tel[tel_id].peak_time - read_peaktime = dl1.peak_time - assert np.allclose(original_peaktime, read_peaktime, atol=0.01) + with EventSource(output_path) as source: + for event in source: + for tel_id, dl1 in event.dl1.tel.items(): + original_image = events[event.count].dl1.tel[tel_id].image + read_image = dl1.image + assert np.allclose(original_image, read_image, atol=0.1) + + original_peaktime = events[event.count].dl1.tel[tel_id].peak_time + read_peaktime = dl1.peak_time + assert np.allclose(original_peaktime, read_peaktime, atol=0.01) def test_dl1writer_no_events(tmpdir: Path): diff --git a/src/ctapipe/io/tests/test_hdf5.py b/src/ctapipe/io/tests/test_hdf5.py index 681567068a6..65b9e9088da 100644 --- a/src/ctapipe/io/tests/test_hdf5.py +++ b/src/ctapipe/io/tests/test_hdf5.py @@ -988,19 +988,19 @@ class Container1(Container): # But when explicitly giving the prefixes, this works and order # should not be important - reader = HDF5TableReader(path) - generator = reader.read( - "/values", - [Container1, Container1], - prefixes=["bar", "foo"], - ) + with HDF5TableReader(path) as reader: + generator = reader.read( + "/values", + [Container1, Container1], + prefixes=["bar", "foo"], + ) - for value in (1, 2, 3): - c1, c2 = next(generator) - assert c1.value == 5 * value - assert c1.prefix == "bar" - assert c2.value == value - assert c2.prefix == "foo" + for value in (1, 2, 3): + c1, c2 = next(generator) + assert c1.value == 5 * value + assert c1.prefix == "bar" + assert c2.value == value + assert c2.prefix == "foo" @pytest.mark.parametrize("input_type", (str, Path, tables.File)) diff --git a/src/ctapipe/io/tests/test_simteleventsource.py b/src/ctapipe/io/tests/test_simteleventsource.py index 7296fb7def5..a285160c056 100644 --- a/src/ctapipe/io/tests/test_simteleventsource.py +++ b/src/ctapipe/io/tests/test_simteleventsource.py @@ -653,3 +653,38 @@ def test_provenance(provenance, prod5_gamma_simtel_path): assert len(inputs) == 1 assert inputs[0]["url"] == str(prod5_gamma_simtel_path) assert inputs[0]["reference_meta"] is None + + +def test_prod6_issues(): + """Test behavior of source on file from prod6, see issues #2344 and #2660""" + input_url = "dataset://prod6_issues.simtel.zst" + + events_checked_trigger = set() + events_checked_image = set() + + # events with two telescope events but only one in stereo trigger in simtel + strange_trigger_events = { + 1548602: 3, + 2247909: 32, + 3974908: 2, + 4839806: 1, + } + missing_true_images = {1664106: 32} + + with SimTelEventSource(input_url) as source: + for e in source: + event_id = e.index.event_id + if event_id in strange_trigger_events: + expected_tel_id = strange_trigger_events[event_id] + np.testing.assert_equal(e.trigger.tels_with_trigger, [expected_tel_id]) + assert e.trigger.tel.keys() == {expected_tel_id} + assert e.r1.tel.keys() == {expected_tel_id} + events_checked_trigger.add(event_id) + + if event_id in missing_true_images: + tel_id = missing_true_images[event_id] + np.testing.assert_equal(e.simulation.tel[tel_id].true_image, -1) + events_checked_image.add(event_id) + + assert strange_trigger_events.keys() == events_checked_trigger + assert missing_true_images.keys() == events_checked_image diff --git a/src/ctapipe/reco/stereo_combination.py b/src/ctapipe/reco/stereo_combination.py index 19c561d89b2..6261af07476 100644 --- a/src/ctapipe/reco/stereo_combination.py +++ b/src/ctapipe/reco/stereo_combination.py @@ -17,6 +17,7 @@ ReconstructedEnergyContainer, ReconstructedGeometryContainer, ) +from .telescope_event_handling import get_subarray_index, weighted_mean_std_ufunc from .utils import add_defaults_and_meta _containers = { @@ -31,38 +32,6 @@ ] -def _grouped_add(tel_data, n_array_events, indices): - """ - Calculate the group-wise sum for each array event over the - corresponding telescope events. ``indices`` is an array - that gives the index of the subarray event for each telescope event, - returned by - ``np.unique(tel_events[["obs_id", "event_id"]], return_inverse=True)`` - """ - combined_values = np.zeros(n_array_events) - np.add.at(combined_values, indices, tel_data) - return combined_values - - -def _weighted_mean_ufunc(tel_values, weights, n_array_events, indices): - # avoid numerical problems by very large or small weights - weights = weights / weights.max() - sum_prediction = _grouped_add( - tel_values * weights, - n_array_events, - indices, - ) - sum_of_weights = _grouped_add( - weights, - n_array_events, - indices, - ) - mean = np.full(n_array_events, np.nan) - valid = sum_of_weights > 0 - mean[valid] = sum_prediction[valid] / sum_of_weights[valid] - return mean - - class StereoCombiner(Component): """ Base Class for algorithms combining telescope-wise predictions to common prediction. @@ -299,26 +268,26 @@ def predict_table(self, mono_predictions: Table) -> Table: prefix = f"{self.prefix}_tel" # TODO: Integrate table quality query once its done valid = mono_predictions[f"{prefix}_is_valid"] - valid_predictions = mono_predictions[valid] - array_events, indices, multiplicity = np.unique( - mono_predictions[["obs_id", "event_id"]], - return_inverse=True, - return_counts=True, + obs_ids, event_ids, multiplicity, tel_to_array_indices = get_subarray_index( + mono_predictions ) - stereo_table = Table(array_events) + n_array_events = len(obs_ids) + stereo_table = Table({"obs_id": obs_ids, "event_id": event_ids}) # copy metadata for colname in ("obs_id", "event_id"): stereo_table[colname].description = mono_predictions[colname].description - n_array_events = len(array_events) - weights = self._calculate_weights(valid_predictions) + weights = self._calculate_weights(mono_predictions[valid]) if self.property is ReconstructionProperty.PARTICLE_TYPE: - if len(valid_predictions) > 0: - mono_predictions = valid_predictions[f"{prefix}_prediction"] - stereo_predictions = _weighted_mean_ufunc( - mono_predictions, weights, n_array_events, indices[valid] + if np.count_nonzero(valid) > 0: + stereo_predictions, _ = weighted_mean_std_ufunc( + mono_predictions[f"{prefix}_prediction"], + valid, + tel_to_array_indices, + multiplicity, + weights=weights, ) else: stereo_predictions = np.full(n_array_events, np.nan) @@ -328,29 +297,20 @@ def predict_table(self, mono_predictions: Table) -> Table: stereo_table[f"{self.prefix}_goodness_of_fit"] = np.nan elif self.property is ReconstructionProperty.ENERGY: - if len(valid_predictions) > 0: - mono_energies = valid_predictions[f"{prefix}_energy"].quantity.to_value( + if np.count_nonzero(valid) > 0: + mono_energies = mono_predictions[f"{prefix}_energy"].quantity.to_value( u.TeV ) - if self.log_target: mono_energies = np.log(mono_energies) - stereo_energy = _weighted_mean_ufunc( + stereo_energy, std = weighted_mean_std_ufunc( mono_energies, - weights, - n_array_events, - indices[valid], - ) - variance = _weighted_mean_ufunc( - (mono_energies - np.repeat(stereo_energy, multiplicity)[valid]) - ** 2, - weights, - n_array_events, - indices[valid], + valid, + tel_to_array_indices, + multiplicity, + weights=weights, ) - std = np.sqrt(variance) - if self.log_target: stereo_energy = np.exp(stereo_energy) std = np.exp(std) @@ -369,22 +329,34 @@ def predict_table(self, mono_predictions: Table) -> Table: stereo_table[f"{self.prefix}_goodness_of_fit"] = np.nan elif self.property is ReconstructionProperty.GEOMETRY: - if len(valid_predictions) > 0: + if np.count_nonzero(valid) > 0: coord = AltAz( - alt=valid_predictions[f"{prefix}_alt"], - az=valid_predictions[f"{prefix}_az"], + alt=mono_predictions[f"{prefix}_alt"], + az=mono_predictions[f"{prefix}_az"], ) # https://en.wikipedia.org/wiki/Von_Mises%E2%80%93Fisher_distribution#Mean_direction mono_x, mono_y, mono_z = coord.cartesian.get_xyz() - stereo_x = _weighted_mean_ufunc( - mono_x, weights, n_array_events, indices[valid] + stereo_x, _ = weighted_mean_std_ufunc( + mono_x, + valid, + tel_to_array_indices, + multiplicity, + weights=weights, ) - stereo_y = _weighted_mean_ufunc( - mono_y, weights, n_array_events, indices[valid] + stereo_y, _ = weighted_mean_std_ufunc( + mono_y, + valid, + tel_to_array_indices, + multiplicity, + weights=weights, ) - stereo_z = _weighted_mean_ufunc( - mono_z, weights, n_array_events, indices[valid] + stereo_z, _ = weighted_mean_std_ufunc( + mono_z, + valid, + tel_to_array_indices, + multiplicity, + weights=weights, ) mean_cartesian = CartesianRepresentation( @@ -425,7 +397,9 @@ def predict_table(self, mono_predictions: Table) -> Table: tel_ids = [[] for _ in range(n_array_events)] - for index, tel_id in zip(indices[valid], valid_predictions["tel_id"]): + for index, tel_id in zip( + tel_to_array_indices[valid], mono_predictions["tel_id"][valid] + ): tel_ids[index].append(tel_id) stereo_table[f"{self.prefix}_telescopes"] = tel_ids diff --git a/src/ctapipe/reco/telescope_event_handling.py b/src/ctapipe/reco/telescope_event_handling.py new file mode 100644 index 00000000000..5e9da9621e8 --- /dev/null +++ b/src/ctapipe/reco/telescope_event_handling.py @@ -0,0 +1,146 @@ +"""Helper functions for array-event-wise aggregation of telescope events.""" + +import numpy as np +from numba import njit, uint64 + +__all__ = ["get_subarray_index", "weighted_mean_std_ufunc"] + + +@njit +def _get_subarray_index(obs_ids, event_ids): + n_tel_events = len(obs_ids) + idx = np.zeros(n_tel_events, dtype=uint64) + current_idx = 0 + subarray_obs_index = [] + subarray_event_index = [] + multiplicities = [] + multiplicity = 0 + + if n_tel_events > 0: + subarray_obs_index.append(obs_ids[0]) + subarray_event_index.append(event_ids[0]) + multiplicity += 1 + + for i in range(1, n_tel_events): + if obs_ids[i] != obs_ids[i - 1] or event_ids[i] != event_ids[i - 1]: + # append to subarray events + multiplicities.append(multiplicity) + subarray_obs_index.append(obs_ids[i]) + subarray_event_index.append(event_ids[i]) + # reset state + current_idx += 1 + multiplicity = 0 + + multiplicity += 1 + idx[i] = current_idx + + # Append multiplicity of the last subarray event + if n_tel_events > 0: + multiplicities.append(multiplicity) + + return ( + np.asarray(subarray_obs_index), + np.asarray(subarray_event_index), + np.asarray(multiplicities), + idx, + ) + + +def get_subarray_index(tel_table): + """ + Get the subarray-event-wise information from a table of telescope events. + + Extract obs_ids and event_ids of all subarray events contained + in a table of telescope events, their multiplicity and an array + giving the index of the subarray event for each telescope event. + + This requires the telescope events of one subarray event to be come + in a single block. + + Parameters + ---------- + tel_table: astropy.table.Table + table with telescope events as rows + + Returns + ------- + Tuple(np.ndarray, np.ndarray, np.ndarray, np.ndarray) + obs_ids of subarray events, event_ids of subarray events, + multiplicity of subarray events, index of the subarray event + for each telescope event + """ + obs_idx = tel_table["obs_id"] + event_idx = tel_table["event_id"] + return _get_subarray_index(obs_idx, event_idx) + + +def _grouped_add(tel_data, n_array_events, indices): + """ + Calculate the group-wise sum for each array event over the + corresponding telescope events. ``indices`` is an array + that gives the index of the subarray event for each telescope event. + """ + combined_values = np.zeros(n_array_events) + np.add.at(combined_values, indices, tel_data) + return combined_values + + +def weighted_mean_std_ufunc( + tel_values, + valid_tel, + indices, + multiplicity, + weights=np.array([1]), +): + """ + Calculate the weighted mean and standard deviation for each array event + over the corresponding telescope events. + + Parameters + ---------- + tel_values: np.ndarray + values for each telescope event + valid_tel: array-like + boolean mask giving the valid values of ``tel_values`` + indices: np.ndarray + index of the subarray event for each telescope event, returned as + the fourth return value of ``get_subarray_index`` + multiplicity: np.ndarray + multiplicity of the subarray events in the same order as the order of + subarray events in ``indices`` + weights: np.ndarray + weights used for averaging (equal/no weights are used by default) + + Returns + ------- + Tuple(np.ndarray, np.ndarray) + weighted mean and standard deviation for each array event + """ + n_array_events = len(multiplicity) + # avoid numerical problems by very large or small weights + weights = weights / weights.max() + tel_values = tel_values[valid_tel] + indices = indices[valid_tel] + + sum_prediction = _grouped_add( + tel_values * weights, + n_array_events, + indices, + ) + sum_of_weights = _grouped_add( + weights, + n_array_events, + indices, + ) + mean = np.full(n_array_events, np.nan) + valid = sum_of_weights > 0 + mean[valid] = sum_prediction[valid] / sum_of_weights[valid] + + sum_sq_residulas = _grouped_add( + (tel_values - np.repeat(mean, multiplicity)[valid_tel]) ** 2 * weights, + n_array_events, + indices, + ) + variance = np.full(n_array_events, np.nan) + variance[valid] = sum_sq_residulas[valid] / sum_of_weights[valid] + return mean, np.sqrt(variance) diff --git a/src/ctapipe/reco/tests/test_stereo_combination.py b/src/ctapipe/reco/tests/test_stereo_combination.py index 7187262af27..63ced2a980c 100644 --- a/src/ctapipe/reco/tests/test_stereo_combination.py +++ b/src/ctapipe/reco/tests/test_stereo_combination.py @@ -84,8 +84,18 @@ def test_predict_mean_energy(weights, mono_table): assert_array_equal(stereo["event_id"], np.array([1, 2, 1])) if weights == "intensity": assert_array_equal(stereo["dummy_energy"], [7, 0.5, np.nan] * u.TeV) + assert_allclose( + stereo["dummy_energy_uncert"].quantity, + [4.242641, 0, np.nan] * u.TeV, + atol=1e-7, + ) elif weights == "none": assert_array_equal(stereo["dummy_energy"], [5, 0.5, np.nan] * u.TeV) + assert_allclose( + stereo["dummy_energy_uncert"].quantity, + [3.741657, 0, np.nan] * u.TeV, + atol=1e-7, + ) assert_array_equal(stereo["dummy_telescopes"][0], np.array([1, 2, 3])) assert_array_equal(stereo["dummy_telescopes"][1], 5) diff --git a/src/ctapipe/reco/tests/test_telescope_event_handling.py b/src/ctapipe/reco/tests/test_telescope_event_handling.py new file mode 100644 index 00000000000..1faa0f4f65c --- /dev/null +++ b/src/ctapipe/reco/tests/test_telescope_event_handling.py @@ -0,0 +1,51 @@ +import numpy as np +from astropy.table import Table + +from ctapipe.io import TableLoader, read_table +from ctapipe.io.tests.test_table_loader import check_equal_array_event_order + + +def test_get_subarray_index(dl1_parameters_file): + from ctapipe.reco.telescope_event_handling import get_subarray_index + + opts = dict(simulated=False, true_parameters=False, dl2=False, pointing=False) + with TableLoader(dl1_parameters_file, **opts) as loader: + tel_events = loader.read_telescope_events() + + subarray_obs_ids, subarray_event_ids, _, _ = get_subarray_index(tel_events) + trigger = read_table(dl1_parameters_file, "/dl1/event/subarray/trigger") + + assert len(subarray_obs_ids) == len(subarray_event_ids) + assert len(subarray_obs_ids) == len(trigger) + check_equal_array_event_order( + Table({"obs_id": subarray_obs_ids, "event_id": subarray_event_ids}), trigger + ) + + +def test_mean_std_ufunc(dl1_parameters_file): + from ctapipe.reco.telescope_event_handling import ( + get_subarray_index, + weighted_mean_std_ufunc, + ) + + opts = dict(simulated=False, true_parameters=False, dl2=False, pointing=False) + with TableLoader(dl1_parameters_file, **opts) as loader: + tel_events = loader.read_telescope_events() + + valid = np.isfinite(tel_events["hillas_length"]) + + _, _, multiplicity, tel_to_subarray_idx = get_subarray_index(tel_events) + + # test only default uniform weights, + # other weights are tested in test_stereo_combination + mean, std = weighted_mean_std_ufunc( + tel_events["hillas_length"], valid, tel_to_subarray_idx, multiplicity + ) + + # check if result is identical with np.mean/np.std + grouped = tel_events.group_by(["obs_id", "event_id"]) + true_mean = grouped["hillas_length"].groups.aggregate(np.nanmean) + true_std = grouped["hillas_length"].groups.aggregate(np.nanstd) + + assert np.allclose(mean, true_mean, equal_nan=True) + assert np.allclose(std, true_std, equal_nan=True) diff --git a/src/ctapipe/tools/apply_models.py b/src/ctapipe/tools/apply_models.py index 6ec237006e0..3caff55fed1 100644 --- a/src/ctapipe/tools/apply_models.py +++ b/src/ctapipe/tools/apply_models.py @@ -1,6 +1,7 @@ """ Tool to apply machine learning models in bulk (as opposed to event by event). """ + import numpy as np import tables from astropy.table import Table, join, vstack @@ -9,9 +10,8 @@ from ctapipe.core.tool import Tool from ctapipe.core.traits import Bool, Integer, List, Path, classes_with_traits, flag from ctapipe.io import HDF5Merger, TableLoader, write_table -from ctapipe.io.astropy_helpers import read_table +from ctapipe.io.astropy_helpers import join_allow_empty, read_table from ctapipe.io.tableio import TelListToMaskTransform -from ctapipe.io.tableloader import _join_subarray_events from ctapipe.reco import Reconstructor __all__ = [ @@ -218,6 +218,23 @@ def _apply(self, reconstructor, tel_tables, start, stop): def _combine(self, reconstructor, tel_tables, start, stop): stereo_table = vstack(list(tel_tables.values())) + # stacking the single telescope tables and joining + # potentially changes the order of the subarray events. + # to ensure events are stored in the correct order, + # we resort to trigger table order + trigger = read_table( + self.h5file, "/dl1/event/subarray/trigger", start=start, stop=stop + )[["obs_id", "event_id"]] + trigger["__sort_index__"] = np.arange(len(trigger)) + + stereo_table = join_allow_empty( + stereo_table, + trigger, + keys=["obs_id", "event_id"], + join_type="left", + ) + stereo_table.sort("__sort_index__") + combiner = reconstructor.stereo_combiner stereo_predictions = combiner.predict_table(stereo_table) del stereo_table @@ -230,19 +247,6 @@ def _combine(self, reconstructor, tel_tables, start, stop): stereo_predictions[c.name] = np.array([trafo(r) for r in c]) stereo_predictions[c.name].description = c.description - # stacking the single telescope tables and joining - # potentially changes the order of the subarray events. - # to ensure events are stored in the correct order, - # we resort to trigger table order - trigger = read_table( - self.h5file, "/dl1/event/subarray/trigger", start=start, stop=stop - )[["obs_id", "event_id"]] - trigger["__sort_index__"] = np.arange(len(trigger)) - - stereo_predictions = _join_subarray_events(trigger, stereo_predictions) - stereo_predictions.sort("__sort_index__") - del stereo_predictions["__sort_index__"] - write_table( stereo_predictions, self.output_path, diff --git a/src/ctapipe/tools/tests/test_process.py b/src/ctapipe/tools/tests/test_process.py index 49e70144602..424e10d1b9d 100644 --- a/src/ctapipe/tools/tests/test_process.py +++ b/src/ctapipe/tools/tests/test_process.py @@ -3,6 +3,7 @@ Test ctapipe-process on a few different use cases """ +import json from subprocess import CalledProcessError import astropy.units as u @@ -160,17 +161,20 @@ def test_stage1_datalevels(tmp_path): assert isinstance(tool.event_source, DummyEventSource) -def test_stage_2_from_simtel(tmp_path): +def test_stage_2_from_simtel(tmp_path, provenance): """check we can go to DL2 geometry from simtel file""" config = resource_file("stage2_config.json") output = tmp_path / "test_stage2_from_simtel.DL2.h5" + provenance_log = tmp_path / "provenance.log" + input_path = get_dataset_path("gamma_prod5.simtel.zst") run_tool( ProcessorTool(), argv=[ f"--config={config}", - "--input=dataset://gamma_prod5.simtel.zst", + f"--input={input_path}", f"--output={output}", + f"--provenance-log={provenance_log}", "--overwrite", ], cwd=tmp_path, @@ -190,6 +194,18 @@ def test_stage_2_from_simtel(tmp_path): assert dl2["HillasReconstructor_telescopes"].dtype == np.bool_ assert dl2["HillasReconstructor_telescopes"].shape[1] == len(subarray) + activities = json.loads(provenance_log.read_text()) + assert len(activities) == 1 + + activity = activities[0] + assert activity["status"] == "completed" + assert len(activity["input"]) == 2 + assert activity["input"][0]["url"] == str(config) + assert activity["input"][1]["url"] == str(input_path) + + assert len(activity["output"]) == 1 + assert activity["output"][0]["url"] == str(output) + def test_stage_2_from_dl1_images(tmp_path, dl1_image_file): """check we can go to DL2 geometry from DL1 images""" @@ -535,3 +551,36 @@ def test_on_old_file(input_url, args, tmp_path): np.testing.assert_array_equal( finite_reco, events["HillasReconstructor_is_valid"] ) + + +def test_prod6_issues(tmp_path): + """Test behavior of source on file from prod6, see issues #2344 and #2660""" + input_url = "dataset://prod6_issues.simtel.zst" + output_path = tmp_path / "test.dl1.h5" + + run_tool( + ProcessorTool(), + argv=[ + f"--input={input_url}", + f"--output={output_path}", + "--write-images", + "--write-showers", + "--overwrite", + ], + cwd=tmp_path, + raises=True, + ) + + with TableLoader(output_path) as loader: + tel_events = loader.read_telescope_events() + subarray_events = loader.read_subarray_events() + + trigger_counts = np.count_nonzero(subarray_events["tels_with_trigger"], axis=0) + _, tel_event_counts = np.unique(tel_events["tel_id"], return_counts=True) + + mask = trigger_counts > 0 + np.testing.assert_equal(trigger_counts[mask], tel_event_counts) + + images = loader.read_telescope_events([32], true_images=True) + images.add_index("event_id") + np.testing.assert_array_equal(images.loc[1664106]["true_image"], -1) diff --git a/src/ctapipe/utils/datasets.py b/src/ctapipe/utils/datasets.py index 03192668cc6..cb646f7b2d1 100644 --- a/src/ctapipe/utils/datasets.py +++ b/src/ctapipe/utils/datasets.py @@ -35,7 +35,7 @@ #: default base URL for downloading datasets -DEFAULT_URL = "http://cccta-dataserver.in2p3.fr/data/ctapipe-test-data/v1.1.0/" +DEFAULT_URL = "https://minio-cta.zeuthen.desy.de/dpps-testdata-public/data/ctapipe-test-data/v1.1.0/" def get_default_url():