From 81387b4c0f83ef6d004a1d63ee3037858fd91aff Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Wed, 27 Nov 2024 09:38:00 +0100 Subject: [PATCH 1/9] Fix error in ctapipe-process in case telescope event is missing true image --- docs/changes/2659.bugfix.rst | 10 ++++++++++ src/ctapipe/io/simteleventsource.py | 13 +++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 docs/changes/2659.bugfix.rst diff --git a/docs/changes/2659.bugfix.rst b/docs/changes/2659.bugfix.rst new file mode 100644 index 00000000000..775f5724be7 --- /dev/null +++ b/docs/changes/2659.bugfix.rst @@ -0,0 +1,10 @@ +Fix error in ``ctapipe-process`` when in the middle of a simtel file +that has true images available, a telescope event is missing the true image. +This can happen rarely in case a telescope triggered on pure NSB or +is oversaturated to the point where the true pe didn't fit into memory constraints. + +The error was due to the ``DataWriter`` trying to write a ``None`` into an +already setup table for the true images. + +The ``SimTelEventSource`` will no create an invalid true image filled with ``-1`` +for such events. diff --git a/src/ctapipe/io/simteleventsource.py b/src/ctapipe/io/simteleventsource.py index 45d35b03b51..5bc1c316f97 100644 --- a/src/ctapipe/io/simteleventsource.py +++ b/src/ctapipe/io/simteleventsource.py @@ -570,6 +570,7 @@ def __init__(self, input_url=Undefined, config=None, parent=None, **kwargs): self.file_, kind=self.atmosphere_profile_choice ) + self._has_true_image = None self.log.debug(f"Using gain selector {self.gain_selector}") def __exit__(self, exc_type, exc_val, exc_tb): @@ -872,6 +873,18 @@ def _generate_events(self): .get("photoelectrons", None) ) + if self._has_true_image is None: + self._has_true_image = true_image is not None + + if self._has_true_image and true_image is None: + self.log.warning( + "Encountered telescope event with missing true_image in" + "file that has true images: event_id = %d, tel_id = %d", + event_id, + tel_id, + ) + true_image = np.full(n_pixels, -1, dtype=np.int32) + if data.simulation is not None: if data.simulation.shower is not None: impact_container = TelescopeImpactParameterContainer( From cfadbabb0a3bb53f0e8708a3c669fd96d26491a0 Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Thu, 28 Nov 2024 13:19:49 +0100 Subject: [PATCH 2/9] Mention table lengths in mismatch error --- src/ctapipe/io/tableloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ctapipe/io/tableloader.py b/src/ctapipe/io/tableloader.py index 2b10ad96506..613b7c66907 100644 --- a/src/ctapipe/io/tableloader.py +++ b/src/ctapipe/io/tableloader.py @@ -125,7 +125,7 @@ def _join_telescope_events(table1, table2): def _merge_table_same_index(table1, table2, index_keys, fallback_join_type="left"): """Merge two tables assuming their primary keys are identical""" if len(table1) != len(table2): - raise ValueError("Tables must have identical length") + raise ValueError(f"Tables must have identical length, {len(table1)} != {len(table2)}") if len(table1) == 0: return table1 From 5456408593f4ebfb7d8b15865e59e2ab99c7d7fb Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Thu, 28 Nov 2024 13:29:57 +0100 Subject: [PATCH 3/9] Distinguish bright and dim cases for missing true images --- src/ctapipe/io/simteleventsource.py | 34 ++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/src/ctapipe/io/simteleventsource.py b/src/ctapipe/io/simteleventsource.py index 5bc1c316f97..2d729fba468 100644 --- a/src/ctapipe/io/simteleventsource.py +++ b/src/ctapipe/io/simteleventsource.py @@ -83,6 +83,11 @@ "MirrorClass", ] +# default for MAX_PHOTOELECTRONS in simtel_array is 2_500_000 +# so this should be a safe limit to distinguish "dim image true missing" +# from "extremely bright true image missing", see issue #2344 +MISSING_IMAGE_BRIGHTNESS_LIMIT = 1_000_000 + # Mapping of SimTelArray Calibration trigger types to EventType: # from simtelarray: type Dark (0), pedestal (1), in-lid LED (2) or laser/LED (3+) data. SIMTEL_TO_CTA_EVENT_TYPE = { @@ -872,18 +877,29 @@ def _generate_events(self): .get(tel_id - 1, {}) .get("photoelectrons", None) ) + true_image_sum = true_image_sums[self.telescope_indices_original[tel_id]] if self._has_true_image is None: self._has_true_image = true_image is not None if self._has_true_image and true_image is None: - self.log.warning( - "Encountered telescope event with missing true_image in" - "file that has true images: event_id = %d, tel_id = %d", - event_id, - tel_id, - ) - true_image = np.full(n_pixels, -1, dtype=np.int32) + + if true_image_sum > MISSING_IMAGE_BRIGHTNESS_LIMIT: + self.log.warning( + "Encountered extremely bright telescope event with missing true_image in" + "file that has true images: event_id = %d, tel_id = %d." + "event might be truncated, skipping telescope event", + event_id, + tel_id, + ) + continue + else: + self.log.info( + "telescope event event_id = %d, tel_id = %d is missing true image", + event_id, + tel_id, + ) + true_image = np.full(n_pixels, -1, dtype=np.int32) if data.simulation is not None: if data.simulation.shower is not None: @@ -900,9 +916,7 @@ def _generate_events(self): ) data.simulation.tel[tel_id] = SimulatedCameraContainer( - true_image_sum=true_image_sums[ - self.telescope_indices_original[tel_id] - ], + true_image_sum=true_image_sum, true_image=true_image, impact=impact_container, ) From 7f4376f5a5aa9e8037500dc77342e72481a36cd6 Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Thu, 28 Nov 2024 13:42:06 +0100 Subject: [PATCH 4/9] Skip telescope events not present in trigger information --- src/ctapipe/io/simteleventsource.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/ctapipe/io/simteleventsource.py b/src/ctapipe/io/simteleventsource.py index 2d729fba468..7deff1adfa2 100644 --- a/src/ctapipe/io/simteleventsource.py +++ b/src/ctapipe/io/simteleventsource.py @@ -867,6 +867,15 @@ def _generate_events(self): impact_distances = np.full(len(self.subarray), np.nan) * u.m for tel_id, telescope_event in telescope_events.items(): + if tel_id not in trigger.tels_with_trigger: + self.log.warning( + "Encountered telescope event not present in" + " stereo trigger information, skipping." + " event_id = %d, tel_id = %d, tels_with_trigger: %s", + event_id, tel_id, trigger.tels_with_trigger, + ) + continue + adc_samples = telescope_event.get("adc_samples") if adc_samples is None: adc_samples = telescope_event["adc_sums"][:, :, np.newaxis] From 0a93bc9fc007f05e045e614defbb8ca4778b3cd1 Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Thu, 28 Nov 2024 15:26:26 +0100 Subject: [PATCH 5/9] Fix two remaining open file warnings in tests --- src/ctapipe/io/tests/test_datawriter.py | 19 ++++++++++--------- src/ctapipe/io/tests/test_hdf5.py | 24 ++++++++++++------------ 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/src/ctapipe/io/tests/test_datawriter.py b/src/ctapipe/io/tests/test_datawriter.py index c42dcca6858..f4924b4c3dd 100644 --- a/src/ctapipe/io/tests/test_datawriter.py +++ b/src/ctapipe/io/tests/test_datawriter.py @@ -201,15 +201,16 @@ def test_roundtrip(tmpdir: Path): # make sure it is readable by the event source and matches the images - for event in EventSource(output_path): - for tel_id, dl1 in event.dl1.tel.items(): - original_image = events[event.count].dl1.tel[tel_id].image - read_image = dl1.image - assert np.allclose(original_image, read_image, atol=0.1) - - original_peaktime = events[event.count].dl1.tel[tel_id].peak_time - read_peaktime = dl1.peak_time - assert np.allclose(original_peaktime, read_peaktime, atol=0.01) + with EventSource(output_path) as source: + for event in source: + for tel_id, dl1 in event.dl1.tel.items(): + original_image = events[event.count].dl1.tel[tel_id].image + read_image = dl1.image + assert np.allclose(original_image, read_image, atol=0.1) + + original_peaktime = events[event.count].dl1.tel[tel_id].peak_time + read_peaktime = dl1.peak_time + assert np.allclose(original_peaktime, read_peaktime, atol=0.01) def test_dl1writer_no_events(tmpdir: Path): diff --git a/src/ctapipe/io/tests/test_hdf5.py b/src/ctapipe/io/tests/test_hdf5.py index 681567068a6..65b9e9088da 100644 --- a/src/ctapipe/io/tests/test_hdf5.py +++ b/src/ctapipe/io/tests/test_hdf5.py @@ -988,19 +988,19 @@ class Container1(Container): # But when explicitly giving the prefixes, this works and order # should not be important - reader = HDF5TableReader(path) - generator = reader.read( - "/values", - [Container1, Container1], - prefixes=["bar", "foo"], - ) + with HDF5TableReader(path) as reader: + generator = reader.read( + "/values", + [Container1, Container1], + prefixes=["bar", "foo"], + ) - for value in (1, 2, 3): - c1, c2 = next(generator) - assert c1.value == 5 * value - assert c1.prefix == "bar" - assert c2.value == value - assert c2.prefix == "foo" + for value in (1, 2, 3): + c1, c2 = next(generator) + assert c1.value == 5 * value + assert c1.prefix == "bar" + assert c2.value == value + assert c2.prefix == "foo" @pytest.mark.parametrize("input_type", (str, Path, tables.File)) From 9f818e8fad551319de45bf1c87a91caa248bef0d Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Thu, 28 Nov 2024 15:47:04 +0100 Subject: [PATCH 6/9] Fix pre-commit --- src/ctapipe/io/simteleventsource.py | 9 ++++++--- src/ctapipe/io/tableloader.py | 4 +++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/ctapipe/io/simteleventsource.py b/src/ctapipe/io/simteleventsource.py index 7deff1adfa2..887170b4c25 100644 --- a/src/ctapipe/io/simteleventsource.py +++ b/src/ctapipe/io/simteleventsource.py @@ -872,7 +872,9 @@ def _generate_events(self): "Encountered telescope event not present in" " stereo trigger information, skipping." " event_id = %d, tel_id = %d, tels_with_trigger: %s", - event_id, tel_id, trigger.tels_with_trigger, + event_id, + tel_id, + trigger.tels_with_trigger, ) continue @@ -886,13 +888,14 @@ def _generate_events(self): .get(tel_id - 1, {}) .get("photoelectrons", None) ) - true_image_sum = true_image_sums[self.telescope_indices_original[tel_id]] + true_image_sum = true_image_sums[ + self.telescope_indices_original[tel_id] + ] if self._has_true_image is None: self._has_true_image = true_image is not None if self._has_true_image and true_image is None: - if true_image_sum > MISSING_IMAGE_BRIGHTNESS_LIMIT: self.log.warning( "Encountered extremely bright telescope event with missing true_image in" diff --git a/src/ctapipe/io/tableloader.py b/src/ctapipe/io/tableloader.py index 613b7c66907..041f68f5151 100644 --- a/src/ctapipe/io/tableloader.py +++ b/src/ctapipe/io/tableloader.py @@ -125,7 +125,9 @@ def _join_telescope_events(table1, table2): def _merge_table_same_index(table1, table2, index_keys, fallback_join_type="left"): """Merge two tables assuming their primary keys are identical""" if len(table1) != len(table2): - raise ValueError(f"Tables must have identical length, {len(table1)} != {len(table2)}") + raise ValueError( + f"Tables must have identical length, {len(table1)} != {len(table2)}" + ) if len(table1) == 0: return table1 From e906066910cea9dde6ed622556f69b7b33c70df5 Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Fri, 29 Nov 2024 18:06:53 +0100 Subject: [PATCH 7/9] Fix typo --- docs/changes/2659.bugfix.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changes/2659.bugfix.rst b/docs/changes/2659.bugfix.rst index 775f5724be7..1d82b35517a 100644 --- a/docs/changes/2659.bugfix.rst +++ b/docs/changes/2659.bugfix.rst @@ -6,5 +6,5 @@ is oversaturated to the point where the true pe didn't fit into memory constrain The error was due to the ``DataWriter`` trying to write a ``None`` into an already setup table for the true images. -The ``SimTelEventSource`` will no create an invalid true image filled with ``-1`` +The ``SimTelEventSource`` will now create an invalid true image filled with ``-1`` for such events. From ae6abe1d6d0837c09675c5575cabf774cd9953bc Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Mon, 2 Dec 2024 12:32:53 +0100 Subject: [PATCH 8/9] Add tests for prod6 issues using shrunken test file --- .../io/tests/test_simteleventsource.py | 35 +++++++++++++++++++ src/ctapipe/tools/tests/test_process.py | 33 +++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/src/ctapipe/io/tests/test_simteleventsource.py b/src/ctapipe/io/tests/test_simteleventsource.py index 7296fb7def5..a285160c056 100644 --- a/src/ctapipe/io/tests/test_simteleventsource.py +++ b/src/ctapipe/io/tests/test_simteleventsource.py @@ -653,3 +653,38 @@ def test_provenance(provenance, prod5_gamma_simtel_path): assert len(inputs) == 1 assert inputs[0]["url"] == str(prod5_gamma_simtel_path) assert inputs[0]["reference_meta"] is None + + +def test_prod6_issues(): + """Test behavior of source on file from prod6, see issues #2344 and #2660""" + input_url = "dataset://prod6_issues.simtel.zst" + + events_checked_trigger = set() + events_checked_image = set() + + # events with two telescope events but only one in stereo trigger in simtel + strange_trigger_events = { + 1548602: 3, + 2247909: 32, + 3974908: 2, + 4839806: 1, + } + missing_true_images = {1664106: 32} + + with SimTelEventSource(input_url) as source: + for e in source: + event_id = e.index.event_id + if event_id in strange_trigger_events: + expected_tel_id = strange_trigger_events[event_id] + np.testing.assert_equal(e.trigger.tels_with_trigger, [expected_tel_id]) + assert e.trigger.tel.keys() == {expected_tel_id} + assert e.r1.tel.keys() == {expected_tel_id} + events_checked_trigger.add(event_id) + + if event_id in missing_true_images: + tel_id = missing_true_images[event_id] + np.testing.assert_equal(e.simulation.tel[tel_id].true_image, -1) + events_checked_image.add(event_id) + + assert strange_trigger_events.keys() == events_checked_trigger + assert missing_true_images.keys() == events_checked_image diff --git a/src/ctapipe/tools/tests/test_process.py b/src/ctapipe/tools/tests/test_process.py index 49e70144602..24a4a4f4ef1 100644 --- a/src/ctapipe/tools/tests/test_process.py +++ b/src/ctapipe/tools/tests/test_process.py @@ -535,3 +535,36 @@ def test_on_old_file(input_url, args, tmp_path): np.testing.assert_array_equal( finite_reco, events["HillasReconstructor_is_valid"] ) + + +def test_prod6_issues(tmp_path): + """Test behavior of source on file from prod6, see issues #2344 and #2660""" + input_url = "dataset://prod6_issues.simtel.zst" + output_path = tmp_path / "test.dl1.h5" + + run_tool( + ProcessorTool(), + argv=[ + f"--input={input_url}", + f"--output={output_path}", + "--write-images", + "--write-showers", + "--overwrite", + ], + cwd=tmp_path, + raises=True, + ) + + with TableLoader(output_path) as loader: + tel_events = loader.read_telescope_events() + subarray_events = loader.read_subarray_events() + + trigger_counts = np.count_nonzero(subarray_events["tels_with_trigger"], axis=0) + _, tel_event_counts = np.unique(tel_events["tel_id"], return_counts=True) + + mask = trigger_counts > 0 + np.testing.assert_equal(trigger_counts[mask], tel_event_counts) + + images = loader.read_telescope_events([32], true_images=True) + images.add_index("event_id") + np.testing.assert_array_equal(images.loc[1664106]["true_image"], -1) From bca776194a1aa15f7dd6fc2caf40071eaeddfed1 Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Tue, 3 Dec 2024 11:19:18 +0100 Subject: [PATCH 9/9] Add comment --- src/ctapipe/io/simteleventsource.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/ctapipe/io/simteleventsource.py b/src/ctapipe/io/simteleventsource.py index 887170b4c25..4c98e864aac 100644 --- a/src/ctapipe/io/simteleventsource.py +++ b/src/ctapipe/io/simteleventsource.py @@ -868,6 +868,9 @@ def _generate_events(self): for tel_id, telescope_event in telescope_events.items(): if tel_id not in trigger.tels_with_trigger: + # skip additional telescopes that have data but did not + # participate in the subarray trigger decision for this stereo event + # see #2660 for details self.log.warning( "Encountered telescope event not present in" " stereo trigger information, skipping."