From 01bcbd43ad3317214dc3801daa8ac457628413d8 Mon Sep 17 00:00:00 2001 From: Torben Schiz Date: Tue, 12 Mar 2024 16:30:10 +0200 Subject: [PATCH 1/6] Add first draft of simulation crash handling --- examples/python-dummy/micro_dummy.py | 7 +++++++ micro_manager/micro_manager.py | 15 +++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/examples/python-dummy/micro_dummy.py b/examples/python-dummy/micro_dummy.py index eeed2984..aa3c213b 100644 --- a/examples/python-dummy/micro_dummy.py +++ b/examples/python-dummy/micro_dummy.py @@ -22,6 +22,13 @@ def solve(self, macro_data, dt): self._micro_scalar_data = macro_data["macro-scalar-data"] + 1 for d in range(self._dims): self._micro_vector_data.append(macro_data["macro-vector-data"][d] + 1) + + # random simulation crash + import numpy as np + if np.random.rand() < 0.01: + print("Micro simulation {} crashed!".format(self._sim_id)) + self._micro_scalar_data = 1 / 0 + return {"micro-scalar-data": self._micro_scalar_data.copy(), "micro-vector-data": self._micro_vector_data.copy()} diff --git a/micro_manager/micro_manager.py b/micro_manager/micro_manager.py index 28ca71a8..256f1f48 100644 --- a/micro_manager/micro_manager.py +++ b/micro_manager/micro_manager.py @@ -438,12 +438,23 @@ def _solve_micro_simulations(self, micro_sims_input: list) -> list: for count, sim in enumerate(self._micro_sims): start_time = time.time() - micro_sims_output[count] = sim.solve(micro_sims_input[count], self._dt) + try: + micro_sims_output[count] = sim.solve(micro_sims_input[count], self._dt) + except Exception as e: + _, mesh_vertex_coords = self._participant.get_mesh_vertex_ids_and_coordinates( + self._macro_mesh_name) + self._logger.error( + "Micro simulation with global ID {} at macro coordinates {} has experienced an error. Exiting simulation.".format( + sim.get_global_id(), mesh_vertex_coords[count])) + self._logger.error(e) + # set the micro simulation value to old value and keep it constant + micro_sims_output[count] = self._old_micro_sims_output[count] end_time = time.time() if self._is_micro_solve_time_required: micro_sims_output[count]["micro_sim_time"] = end_time - start_time - + self._old_micro_sims_output = micro_sims_output + return micro_sims_output def _solve_micro_simulations_with_adaptivity( From 48b7494e389c3d189f277da2f8adc88d9278b120 Mon Sep 17 00:00:00 2001 From: Torben Schiz Date: Tue, 19 Mar 2024 11:07:51 +0200 Subject: [PATCH 2/6] Extend handling of crashing simulations to adaptivity and corner cases --- examples/python-dummy/micro_dummy.py | 7 -- micro_manager/micro_manager.py | 97 +++++++++++++++++++++++----- tests/unit/test_micro_manager.py | 49 ++++++++++++-- 3 files changed, 122 insertions(+), 31 deletions(-) diff --git a/examples/python-dummy/micro_dummy.py b/examples/python-dummy/micro_dummy.py index aa3c213b..eeed2984 100644 --- a/examples/python-dummy/micro_dummy.py +++ b/examples/python-dummy/micro_dummy.py @@ -22,13 +22,6 @@ def solve(self, macro_data, dt): self._micro_scalar_data = macro_data["macro-scalar-data"] + 1 for d in range(self._dims): self._micro_vector_data.append(macro_data["macro-vector-data"][d] + 1) - - # random simulation crash - import numpy as np - if np.random.rand() < 0.01: - print("Micro simulation {} crashed!".format(self._sim_id)) - self._micro_scalar_data = 1 / 0 - return {"micro-scalar-data": self._micro_scalar_data.copy(), "micro-vector-data": self._micro_vector_data.copy()} diff --git a/micro_manager/micro_manager.py b/micro_manager/micro_manager.py index 256f1f48..f70be8c9 100644 --- a/micro_manager/micro_manager.py +++ b/micro_manager/micro_manager.py @@ -307,6 +307,9 @@ def _initialize(self) -> None: sim_id += 1 self._micro_sims = [None] * self._local_number_of_sims # DECLARATION + + self._crashed_sims = [False] * self._local_number_of_sims + self._old_micro_sims_output = [None] * self._local_number_of_sims micro_problem = getattr( __import__( @@ -437,22 +440,46 @@ def _solve_micro_simulations(self, micro_sims_input: list) -> list: micro_sims_output = [None] * self._local_number_of_sims for count, sim in enumerate(self._micro_sims): - start_time = time.time() - try: - micro_sims_output[count] = sim.solve(micro_sims_input[count], self._dt) - except Exception as e: - _, mesh_vertex_coords = self._participant.get_mesh_vertex_ids_and_coordinates( - self._macro_mesh_name) - self._logger.error( - "Micro simulation with global ID {} at macro coordinates {} has experienced an error. Exiting simulation.".format( - sim.get_global_id(), mesh_vertex_coords[count])) - self._logger.error(e) - # set the micro simulation value to old value and keep it constant + + if not self._crashed_sims[count]: + try: + start_time = time.time() + micro_sims_output[count] = sim.solve(micro_sims_input[count], self._dt) + end_time = time.time() + except Exception as error_message: + _, mesh_vertex_coords = self._participant.get_mesh_vertex_ids_and_coordinates( + self._macro_mesh_name) + self._logger.error( + "Micro simulation at macro coordinates {} has experienced an error. " + "See next entry for error message. " + "Keeping values constant at results of previous iteration".format( + mesh_vertex_coords[count])) + self._logger.error(error_message) + micro_sims_output[count] = self._old_micro_sims_output[count] + self._crashed_sims[count] = True + else: micro_sims_output[count] = self._old_micro_sims_output[count] - end_time = time.time() + + - if self._is_micro_solve_time_required: + if self._is_micro_solve_time_required and not self._crashed_sims[count]: micro_sims_output[count]["micro_sim_time"] = end_time - start_time + + crash_ratio = np.sum(self._crashed_sims) / len(self._crashed_sims) + if crash_ratio > 0.2: + self._logger.info("More than 20% of the micro simulations on rank {} have crashed. " + "Exiting simulation.".format(self._rank)) + sys.exit() + + set_sims = np.where(micro_sims_output) + none_mask = np.array([item is None for item in micro_sims_output]) + unset_sims = np.where(none_mask)[0] + + for unset_sims in unset_sims: + self._logger.info("Micro Sim {} has previously not run. " + "It will be replace with the output of the first " + "micro sim that ran {}".format(unset_sims, set_sims[0][0])) + micro_sims_output[unset_sims] = micro_sims_output[set_sims[0][0]] self._old_micro_sims_output = micro_sims_output return micro_sims_output @@ -494,17 +521,52 @@ def _solve_micro_simulations_with_adaptivity( # Solve all active micro simulations for active_id in active_sim_ids: - start_time = time.time() - micro_sims_output[active_id] = self._micro_sims[active_id].solve(micro_sims_input[active_id], self._dt) - end_time = time.time() + + if not self._crashed_sims[active_id]: + try: + start_time = time.time() + micro_sims_output[active_id] = self._micro_sims[active_id].solve(micro_sims_input[active_id], self._dt) + end_time = time.time() + except Exception as error_message: + _, mesh_vertex_coords = self._participant.get_mesh_vertex_ids_and_coordinates( + self._macro_mesh_name) + self._logger.error("Micro simulation at macro coordinates {} has experienced an error. " + "See next entry for error message. " + "Keeping values constant at results of previous iteration".format( + mesh_vertex_coords[active_id])) # Access the correct coordinates + self._logger.error(error_message) + # set the micro simulation value to old value and keep it constant if simulation crashes + micro_sims_output[active_id] = self._old_micro_sims_output[active_id] + self._crashed_sims[active_id] = True + else: + micro_sims_output[active_id] = self._old_micro_sims_output[active_id] + # Mark the micro sim as active for export micro_sims_output[active_id]["active_state"] = 1 micro_sims_output[active_id]["active_steps"] = self._micro_sims_active_steps[active_id] - if self._is_micro_solve_time_required: + if self._is_micro_solve_time_required and not self._crashed_sims[active_id]: micro_sims_output[active_id]["micro_sim_time"] = end_time - start_time + crash_ratio = np.sum(self._crashed_sims) / len(self._crashed_sims) + if crash_ratio > 0.2: + self._logger.info("More than 20% of the micro simulations on rank {} have crashed. " + "Exiting simulation.".format(self._rank)) + sys.exit() + + set_sims = np.where(micro_sims_output) + unset_sims = [] + for active_id in active_sim_ids: + if micro_sims_output[active_id] is None: + unset_sims.append(active_id) + + for unset_sims in unset_sims: + self._logger.info("Micro Sim {} has previously not run. " + "It will be replace with the output of the first " + "micro sim that ran {}".format(unset_sims, set_sims[0][0])) + micro_sims_output[unset_sims] = micro_sims_output[set_sims[0][0]] + # For each inactive simulation, copy data from most similar active simulation if self._adaptivity_type == "global": self._adaptivity_controller.communicate_micro_output(is_sim_active, sim_is_associated_to, micro_sims_output) @@ -525,6 +587,7 @@ def _solve_micro_simulations_with_adaptivity( for i in range(self._local_number_of_sims): for name in self._adaptivity_micro_data_names: self._data_for_adaptivity[name][i] = micro_sims_output[i][name] + self._old_micro_sims_output = micro_sims_output return micro_sims_output diff --git a/tests/unit/test_micro_manager.py b/tests/unit/test_micro_manager.py index ade67b32..75166333 100644 --- a/tests/unit/test_micro_manager.py +++ b/tests/unit/test_micro_manager.py @@ -5,18 +5,29 @@ class MicroSimulation: - def __init__(self, sim_id): + def __init__(self, sim_id, crashing=False): self.very_important_value = 0 + self.sim_id = sim_id + self.crashing = crashing + self.current_time = 0 + def initialize(self): pass def solve(self, macro_data, dt): - assert macro_data["macro-scalar-data"] == 1 - assert macro_data["macro-vector-data"].tolist() == [0, 1, 2] - return {"micro-scalar-data": macro_data["macro-scalar-data"] + 1, - "micro-vector-data": macro_data["macro-vector-data"] + 1} - + if not self.crashing: + assert macro_data["macro-scalar-data"] == 1 + assert macro_data["macro-vector-data"].tolist() == [0, 1, 2] + return {"micro-scalar-data": macro_data["macro-scalar-data"] + 1, + "micro-vector-data": macro_data["macro-vector-data"] + 1} + else: + if self.sim_id == 0: + self.current_time += dt + if self.current_time > dt: + raise Exception("Micro Simulation has crashed") + return {"micro-scalar-data": macro_data["macro-scalar-data"] + 1, + "micro-vector-data": macro_data["macro-vector-data"] + 1} class TestFunctioncalls(TestCase): def setUp(self): @@ -77,7 +88,7 @@ def test_read_write_data_from_precice(self): self.assertListEqual(data["macro-vector-data"].tolist(), fake_data["macro-vector-data"].tolist()) - def test_solve_mico_sims(self): + def test_solve_micro_sims(self): """ Test if the internal function _solve_micro_simulations works as expected. """ @@ -92,6 +103,30 @@ def test_solve_mico_sims(self): self.assertEqual(data["micro-scalar-data"], 2) self.assertListEqual(data["micro-vector-data"].tolist(), (fake_data["micro-vector-data"] + 1).tolist()) + def test_crash_handling(self): + """ + Test if the micro manager catches a simulation crash and handles it adequately. + """ + manager = micro_manager.MicroManager('micro-manager-config.json') + manager._local_number_of_sims = 4 + manager._micro_sims = [MicroSimulation(i, crashing = True) for i in range(4)] + manager._micro_sims_active_steps = np.zeros(4, dtype=np.int32) + # Momentarily, a simulation crash during the first step is not handled + micro_sims_output = manager._solve_micro_simulations(self.fake_read_data) + for i, data in enumerate(micro_sims_output): + self.fake_read_data[i]["macro-scalar-data"] = data["micro-scalar-data"] + self.fake_read_data[i]["macro-vector-data"] = data["micro-vector-data"] + micro_sims_output = manager._solve_micro_simulations(self.fake_read_data) + # The crashed simulation should have the same data as the previous step + data_crashed = micro_sims_output[0] + self.assertEqual(data_crashed["micro-scalar-data"], 2) + self.assertListEqual(data_crashed["micro-vector-data"].tolist(), + (self.fake_write_data[0]["micro-vector-data"] + 1).tolist()) + # Non-crashed simulations should have updated data + data_normal = micro_sims_output[1] + self.assertEqual(data_normal["micro-scalar-data"], 3) + self.assertListEqual(data_normal["micro-vector-data"].tolist(), + (self.fake_write_data[1]["micro-vector-data"] + 2).tolist()) def test_config(self): """ From 305f0bc8dccc4b8e8087cf44cbc6e184027eab5d Mon Sep 17 00:00:00 2001 From: Torben Schiz Date: Wed, 20 Mar 2024 11:27:42 +0200 Subject: [PATCH 3/6] Adapt format in crash handling to pep8 --- micro_manager/micro_manager.py | 51 +++++++++++++++------------------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/micro_manager/micro_manager.py b/micro_manager/micro_manager.py index f70be8c9..36feff8a 100644 --- a/micro_manager/micro_manager.py +++ b/micro_manager/micro_manager.py @@ -71,8 +71,6 @@ def __init__(self, config_file: str) -> None: self._rank, self._size) - micro_file_name = self._config.get_micro_file_name() - self._macro_mesh_name = self._config.get_macro_mesh_name() # Data names of data written to preCICE @@ -307,7 +305,7 @@ def _initialize(self) -> None: sim_id += 1 self._micro_sims = [None] * self._local_number_of_sims # DECLARATION - + self._crashed_sims = [False] * self._local_number_of_sims self._old_micro_sims_output = [None] * self._local_number_of_sims @@ -440,48 +438,45 @@ def _solve_micro_simulations(self, micro_sims_input: list) -> list: micro_sims_output = [None] * self._local_number_of_sims for count, sim in enumerate(self._micro_sims): - + if not self._crashed_sims[count]: try: start_time = time.time() - micro_sims_output[count] = sim.solve(micro_sims_input[count], self._dt) + micro_sims_output[count] = sim.solve( + micro_sims_input[count], self._dt) end_time = time.time() except Exception as error_message: _, mesh_vertex_coords = self._participant.get_mesh_vertex_ids_and_coordinates( - self._macro_mesh_name) - self._logger.error( - "Micro simulation at macro coordinates {} has experienced an error. " - "See next entry for error message. " - "Keeping values constant at results of previous iteration".format( - mesh_vertex_coords[count])) + self._macro_mesh_name) + self._logger.error("Micro simulation at macro coordinates {} has experienced an error. " + "See next entry for error message. " + "Keeping values constant at results of previous iteration".format( + mesh_vertex_coords[count])) self._logger.error(error_message) micro_sims_output[count] = self._old_micro_sims_output[count] self._crashed_sims[count] = True else: micro_sims_output[count] = self._old_micro_sims_output[count] - - - if self._is_micro_solve_time_required and not self._crashed_sims[count]: micro_sims_output[count]["micro_sim_time"] = end_time - start_time - + crash_ratio = np.sum(self._crashed_sims) / len(self._crashed_sims) if crash_ratio > 0.2: self._logger.info("More than 20% of the micro simulations on rank {} have crashed. " "Exiting simulation.".format(self._rank)) sys.exit() - + set_sims = np.where(micro_sims_output) none_mask = np.array([item is None for item in micro_sims_output]) unset_sims = np.where(none_mask)[0] - + for unset_sims in unset_sims: self._logger.info("Micro Sim {} has previously not run. " "It will be replace with the output of the first " "micro sim that ran {}".format(unset_sims, set_sims[0][0])) micro_sims_output[unset_sims] = micro_sims_output[set_sims[0][0]] self._old_micro_sims_output = micro_sims_output - + return micro_sims_output def _solve_micro_simulations_with_adaptivity( @@ -521,26 +516,26 @@ def _solve_micro_simulations_with_adaptivity( # Solve all active micro simulations for active_id in active_sim_ids: - + if not self._crashed_sims[active_id]: try: start_time = time.time() - micro_sims_output[active_id] = self._micro_sims[active_id].solve(micro_sims_input[active_id], self._dt) + micro_sims_output[active_id] = self._micro_sims[active_id].solve( + micro_sims_input[active_id], self._dt) end_time = time.time() except Exception as error_message: _, mesh_vertex_coords = self._participant.get_mesh_vertex_ids_and_coordinates( - self._macro_mesh_name) - self._logger.error("Micro simulation at macro coordinates {} has experienced an error. " - "See next entry for error message. " - "Keeping values constant at results of previous iteration".format( - mesh_vertex_coords[active_id])) # Access the correct coordinates + self._macro_mesh_name) + self._logger.error("Micro simulation at macro coordinates {} has experienced an error. " + "See next entry for error message. " + "Keeping values constant at results of previous iteration".format( + mesh_vertex_coords[active_id])) self._logger.error(error_message) # set the micro simulation value to old value and keep it constant if simulation crashes micro_sims_output[active_id] = self._old_micro_sims_output[active_id] self._crashed_sims[active_id] = True else: micro_sims_output[active_id] = self._old_micro_sims_output[active_id] - # Mark the micro sim as active for export micro_sims_output[active_id]["active_state"] = 1 @@ -554,13 +549,13 @@ def _solve_micro_simulations_with_adaptivity( self._logger.info("More than 20% of the micro simulations on rank {} have crashed. " "Exiting simulation.".format(self._rank)) sys.exit() - + set_sims = np.where(micro_sims_output) unset_sims = [] for active_id in active_sim_ids: if micro_sims_output[active_id] is None: unset_sims.append(active_id) - + for unset_sims in unset_sims: self._logger.info("Micro Sim {} has previously not run. " "It will be replace with the output of the first " From c8045516441827aaaacd03b5ae8cdd7307e5aa98 Mon Sep 17 00:00:00 2001 From: Torben Schiz Date: Thu, 21 Mar 2024 11:21:58 +0200 Subject: [PATCH 4/6] Add tests for simulation crashes --- tests/unit/micro-manager-config_crash.json | 25 +++++ tests/unit/test_micro_manager.py | 49 ++------- .../test_micro_simulation_crash_handling.py | 103 ++++++++++++++++++ 3 files changed, 135 insertions(+), 42 deletions(-) create mode 100644 tests/unit/micro-manager-config_crash.json create mode 100644 tests/unit/test_micro_simulation_crash_handling.py diff --git a/tests/unit/micro-manager-config_crash.json b/tests/unit/micro-manager-config_crash.json new file mode 100644 index 00000000..9ff06771 --- /dev/null +++ b/tests/unit/micro-manager-config_crash.json @@ -0,0 +1,25 @@ +{ + "micro_file_name": "test_micro_simulation_crash_handling", + "coupling_params": { + "config_file_name": "dummy-config.xml", + "macro_mesh_name": "dummy-macro-mesh", + "read_data_names": {"macro-scalar-data": "scalar", "macro-vector-data": "vector"}, + "write_data_names": {"micro-scalar-data": "scalar", "micro-vector-data": "vector"} + }, + "simulation_params": { + "macro_domain_bounds": [0.0, 25.0, 0.0, 25.0, 0.0, 25.0], + "adaptivity": { + "type": "local", + "data": ["macro-scalar-data", "macro-vector-data"], + "history_param": 0.5, + "coarsening_constant": 0.3, + "refining_constant": 0.4, + "every_implicit_iteration": "False", + "similarity_measure": "L1" + } + }, + "diagnostics": { + "output_micro_sim_solve_time": "True", + "micro_output_n": 10 + } +} diff --git a/tests/unit/test_micro_manager.py b/tests/unit/test_micro_manager.py index 75166333..ade67b32 100644 --- a/tests/unit/test_micro_manager.py +++ b/tests/unit/test_micro_manager.py @@ -5,29 +5,18 @@ class MicroSimulation: - def __init__(self, sim_id, crashing=False): + def __init__(self, sim_id): self.very_important_value = 0 - self.sim_id = sim_id - self.crashing = crashing - self.current_time = 0 - def initialize(self): pass def solve(self, macro_data, dt): - if not self.crashing: - assert macro_data["macro-scalar-data"] == 1 - assert macro_data["macro-vector-data"].tolist() == [0, 1, 2] - return {"micro-scalar-data": macro_data["macro-scalar-data"] + 1, - "micro-vector-data": macro_data["macro-vector-data"] + 1} - else: - if self.sim_id == 0: - self.current_time += dt - if self.current_time > dt: - raise Exception("Micro Simulation has crashed") - return {"micro-scalar-data": macro_data["macro-scalar-data"] + 1, - "micro-vector-data": macro_data["macro-vector-data"] + 1} + assert macro_data["macro-scalar-data"] == 1 + assert macro_data["macro-vector-data"].tolist() == [0, 1, 2] + return {"micro-scalar-data": macro_data["macro-scalar-data"] + 1, + "micro-vector-data": macro_data["macro-vector-data"] + 1} + class TestFunctioncalls(TestCase): def setUp(self): @@ -88,7 +77,7 @@ def test_read_write_data_from_precice(self): self.assertListEqual(data["macro-vector-data"].tolist(), fake_data["macro-vector-data"].tolist()) - def test_solve_micro_sims(self): + def test_solve_mico_sims(self): """ Test if the internal function _solve_micro_simulations works as expected. """ @@ -103,30 +92,6 @@ def test_solve_micro_sims(self): self.assertEqual(data["micro-scalar-data"], 2) self.assertListEqual(data["micro-vector-data"].tolist(), (fake_data["micro-vector-data"] + 1).tolist()) - def test_crash_handling(self): - """ - Test if the micro manager catches a simulation crash and handles it adequately. - """ - manager = micro_manager.MicroManager('micro-manager-config.json') - manager._local_number_of_sims = 4 - manager._micro_sims = [MicroSimulation(i, crashing = True) for i in range(4)] - manager._micro_sims_active_steps = np.zeros(4, dtype=np.int32) - # Momentarily, a simulation crash during the first step is not handled - micro_sims_output = manager._solve_micro_simulations(self.fake_read_data) - for i, data in enumerate(micro_sims_output): - self.fake_read_data[i]["macro-scalar-data"] = data["micro-scalar-data"] - self.fake_read_data[i]["macro-vector-data"] = data["micro-vector-data"] - micro_sims_output = manager._solve_micro_simulations(self.fake_read_data) - # The crashed simulation should have the same data as the previous step - data_crashed = micro_sims_output[0] - self.assertEqual(data_crashed["micro-scalar-data"], 2) - self.assertListEqual(data_crashed["micro-vector-data"].tolist(), - (self.fake_write_data[0]["micro-vector-data"] + 1).tolist()) - # Non-crashed simulations should have updated data - data_normal = micro_sims_output[1] - self.assertEqual(data_normal["micro-scalar-data"], 3) - self.assertListEqual(data_normal["micro-vector-data"].tolist(), - (self.fake_write_data[1]["micro-vector-data"] + 2).tolist()) def test_config(self): """ diff --git a/tests/unit/test_micro_simulation_crash_handling.py b/tests/unit/test_micro_simulation_crash_handling.py new file mode 100644 index 00000000..7f99379d --- /dev/null +++ b/tests/unit/test_micro_simulation_crash_handling.py @@ -0,0 +1,103 @@ +import numpy as np +from unittest import TestCase +import micro_manager + + +class MicroSimulation: + def __init__(self, sim_id): + self.very_important_value = 0 + self.sim_id = sim_id + self.current_time = 0 + + def initialize(self): + pass + + def solve(self, macro_data, dt): + if self.sim_id == 0: + self.current_time += dt + if self.current_time > dt: + raise Exception("Crash") + + return {"micro-scalar-data": macro_data["macro-scalar-data"] + 1, + "micro-vector-data": macro_data["macro-vector-data"] + 1} + + +class TestSimulationCrashHandling(TestCase): + def setUp(self): + self.fake_read_data_names = { + "macro-scalar-data": False, "macro-vector-data": True} + self.fake_read_data = [{"macro-scalar-data": 1, + "macro-vector-data": np.array([0, 1, 2])}] * 10 + self.fake_write_data = [{"micro-scalar-data": 1, + "micro-vector-data": np.array([0, 1, 2]), + "micro_sim_time": 0, + "active_state": 0, + "active_steps": 0}] * 10 + + def test_crash_handling(self): + """ + Test if the micro manager catches a simulation crash and handles it adequately. + """ + manager = micro_manager.MicroManager('micro-manager-config_crash.json') + + manager._local_number_of_sims = 10 + manager._crashed_sims = [False] * 10 + manager._micro_sims = [MicroSimulation(i) for i in range(10)] + manager._micro_sims_active_steps = np.zeros(10, dtype=np.int32) + # Crash during first time step has to be handled differently + + micro_sims_output = manager._solve_micro_simulations( + self.fake_read_data) + for i, data in enumerate(micro_sims_output): + self.fake_read_data[i]["macro-scalar-data"] = data["micro-scalar-data"] + self.fake_read_data[i]["macro-vector-data"] = data["micro-vector-data"] + micro_sims_output = manager._solve_micro_simulations( + self.fake_read_data) + # The crashed simulation should have the same data as the previous step + data_crashed = micro_sims_output[0] + self.assertEqual(data_crashed["micro-scalar-data"], 2) + self.assertListEqual(data_crashed["micro-vector-data"].tolist(), + (self.fake_write_data[0]["micro-vector-data"] + 1).tolist()) + # Non-crashed simulations should have updated data + data_normal = micro_sims_output[1] + self.assertEqual(data_normal["micro-scalar-data"], 3) + self.assertListEqual(data_normal["micro-vector-data"].tolist(), + (self.fake_write_data[1]["micro-vector-data"] + 2).tolist()) + + def test_crash_handling_with_adaptivity(self): + """ + Test if the micro manager catches a simulation crash and handles it adequately with adaptivity. + """ + manager = micro_manager.MicroManager('micro-manager-config_crash.json') + + manager._local_number_of_sims = 10 + manager._crashed_sims = [False] * 10 + manager._micro_sims = [MicroSimulation(i) for i in range(10)] + manager._micro_sims_active_steps = np.zeros(10, dtype=np.int32) + is_sim_active = np.array( + [True, True, False, True, False, False, False, True, True, False,]) + sim_is_associated_to = np.array([-2, -2, 1, -2, 3, 3, 0, -2, -2, 8]) + # Crash in the first time step is handled differently + + micro_sims_output = manager._solve_micro_simulations_with_adaptivity( + self.fake_read_data, is_sim_active, sim_is_associated_to) + for i, data in enumerate(micro_sims_output): + self.fake_read_data[i]["macro-scalar-data"] = data["micro-scalar-data"] + self.fake_read_data[i]["macro-vector-data"] = data["micro-vector-data"] + micro_sims_output = manager._solve_micro_simulations_with_adaptivity( + self.fake_read_data, is_sim_active, sim_is_associated_to) + # The crashed simulation should have the same data as the previous step + data_crashed = micro_sims_output[0] + self.assertEqual(data_crashed["micro-scalar-data"], 2) + self.assertListEqual(data_crashed["micro-vector-data"].tolist(), + (self.fake_write_data[0]["micro-vector-data"] + 1).tolist()) + # Non-crashed simulations should have updated data + data_normal = micro_sims_output[1] + self.assertEqual(data_normal["micro-scalar-data"], 3) + self.assertListEqual(data_normal["micro-vector-data"].tolist(), + (self.fake_write_data[1]["micro-vector-data"] + 2).tolist()) + + +if __name__ == '__main__': + import unittest + unittest.main() From 926823dcc8d08d0c23686f0f995b0874f9a216a8 Mon Sep 17 00:00:00 2001 From: Torben Schiz Date: Thu, 21 Mar 2024 11:25:07 +0200 Subject: [PATCH 5/6] Adapt formatting of tests --- tests/unit/test_micro_manager.py | 2 +- tests/unit/test_micro_simulation_crash_handling.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_micro_manager.py b/tests/unit/test_micro_manager.py index ade67b32..e7ccfb8e 100644 --- a/tests/unit/test_micro_manager.py +++ b/tests/unit/test_micro_manager.py @@ -77,7 +77,7 @@ def test_read_write_data_from_precice(self): self.assertListEqual(data["macro-vector-data"].tolist(), fake_data["macro-vector-data"].tolist()) - def test_solve_mico_sims(self): + def test_solve_micro_sims(self): """ Test if the internal function _solve_micro_simulations works as expected. """ diff --git a/tests/unit/test_micro_simulation_crash_handling.py b/tests/unit/test_micro_simulation_crash_handling.py index 7f99379d..308e315b 100644 --- a/tests/unit/test_micro_simulation_crash_handling.py +++ b/tests/unit/test_micro_simulation_crash_handling.py @@ -75,7 +75,7 @@ def test_crash_handling_with_adaptivity(self): manager._micro_sims = [MicroSimulation(i) for i in range(10)] manager._micro_sims_active_steps = np.zeros(10, dtype=np.int32) is_sim_active = np.array( - [True, True, False, True, False, False, False, True, True, False,]) + [True, True, False, True, False, False, False, True, True, False]) sim_is_associated_to = np.array([-2, -2, 1, -2, 3, 3, 0, -2, -2, 8]) # Crash in the first time step is handled differently From 1bd5c34115abff8d7ea19affcddd9d6a96a2a5fa Mon Sep 17 00:00:00 2001 From: Torben Schiz <49746900+tjwsch@users.noreply.github.com> Date: Thu, 28 Mar 2024 14:28:13 +0100 Subject: [PATCH 6/6] Update logger message for crashing simulation in the first run Co-authored-by: Ishaan Desai --- micro_manager/micro_manager.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/micro_manager/micro_manager.py b/micro_manager/micro_manager.py index 36feff8a..6aceeb06 100644 --- a/micro_manager/micro_manager.py +++ b/micro_manager/micro_manager.py @@ -471,9 +471,8 @@ def _solve_micro_simulations(self, micro_sims_input: list) -> list: unset_sims = np.where(none_mask)[0] for unset_sims in unset_sims: - self._logger.info("Micro Sim {} has previously not run. " - "It will be replace with the output of the first " - "micro sim that ran {}".format(unset_sims, set_sims[0][0])) + self._logger.info("Micro simulation {} has has crashed in the very first run attempt. " + "The output of the first micro sim that ran ({}) will be used as its output.".format(unset_sims, set_sims[0][0])) micro_sims_output[unset_sims] = micro_sims_output[set_sims[0][0]] self._old_micro_sims_output = micro_sims_output