From a01e57b48aac080b414396dfd51d9fe43a756bcd Mon Sep 17 00:00:00 2001 From: Jan Vesely Date: Tue, 28 Jan 2025 21:18:19 -0500 Subject: [PATCH 1/9] tests: Cleanup use of per-node compilation mode Signed-off-by: Jan Vesely --- tests/mechanisms/test_recurrent_transfer_mechanism.py | 9 ++++----- tests/scheduling/test_condition.py | 7 ++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/tests/mechanisms/test_recurrent_transfer_mechanism.py b/tests/mechanisms/test_recurrent_transfer_mechanism.py index 7a60d775c9..0e7d2f851c 100644 --- a/tests/mechanisms/test_recurrent_transfer_mechanism.py +++ b/tests/mechanisms/test_recurrent_transfer_mechanism.py @@ -1179,8 +1179,8 @@ def test_reset_stateful_function_when_has_initializers_composition(self, comp_mo @pytest.mark.composition @pytest.mark.integrator_mechanism @pytest.mark.parametrize('until_finished, expected', [ - (True, [[[[0.96875]]], [[[0.9990234375]]]]), # The 5th and the 10th iteration - (False, [[[[0.5]]], [[[0.75]]]]), # The first and the second iteration + (True, [[[0.96875]], [[0.9990234375]]]), # The 5th and the 10th iteration + (False, [[[0.5]], [[0.75]]]), # The first and the second iteration ], ids=['until_finished', 'oneshot']) # 'LLVM' mode is not supported, because synchronization of compiler and # python values during execution is not implemented. @@ -1197,10 +1197,9 @@ def test_max_executions_before_finished(self, comp_mode, until_finished, expecte results = C.run(inputs={I1: [[1.0]]}, num_trials=1, execution_mode=comp_mode) if comp_mode is pnl.ExecutionMode.Python: assert I1.parameters.is_finished_flag.get(C) is until_finished + results2 = C.run(inputs={I1: [[1.0]]}, num_trials=1, execution_mode=comp_mode) - if comp_mode is not pnl.ExecutionMode.LLVM: - results = [results] - results2 = [results2] + np.testing.assert_allclose(expected[0], results) np.testing.assert_allclose(expected[1], results2) diff --git a/tests/scheduling/test_condition.py b/tests/scheduling/test_condition.py index a823f534f2..5196fd46f9 100644 --- a/tests/scheduling/test_condition.py +++ b/tests/scheduling/test_condition.py @@ -761,12 +761,10 @@ def test_Threshold_parameters( ('!=', -1, 0, [[[-1]]]), ] ) + @pytest.mark.usefixtures("comp_mode_no_llvm") # Per-node mode doesn't support Parameter access in conditions def test_Threshold_comparators( self, comparator, increment, threshold, expected_results, comp_mode ): - if comp_mode is pnl.ExecutionMode.LLVM: - pytest.skip('ExecutionMode.LLVM does not support Parameter access in conditions') - A = TransferMechanism( integrator_mode=True, integrator_function=pnl.AccumulatorIntegrator(rate=1, increment=increment), @@ -796,11 +794,10 @@ def test_Threshold_comparators( ('!=', -1, -1, 0, 1, [[[-3]]]), ] ) + @pytest.mark.usefixtures("comp_mode_no_llvm") # Per-node mode doesn't support Parameter access in conditions def test_Threshold_tolerances( self, comparator, increment, threshold, atol, rtol, expected_results, comp_mode ): - if comp_mode is pnl.ExecutionMode.LLVM: - pytest.skip('ExecutionMode.LLVM does not support Parameter access in conditions') A = TransferMechanism( integrator_mode=True, From fdcbd98aab1803701147b577a6990c48ad9e5316 Mon Sep 17 00:00:00 2001 From: Jan Vesely Date: Tue, 28 Jan 2025 23:26:33 -0500 Subject: [PATCH 2/9] llvm: Rename ExecutionMode.PTX -> ExecutionMode._PTX This mode is not supposed to be used outside of its combination in PTXRun. Signed-off-by: Jan Vesely --- psyneulink/core/compositions/composition.py | 2 +- psyneulink/core/llvm/__init__.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/psyneulink/core/compositions/composition.py b/psyneulink/core/compositions/composition.py index 5f4b24dfc2..39fedaeebc 100644 --- a/psyneulink/core/compositions/composition.py +++ b/psyneulink/core/compositions/composition.py @@ -11432,7 +11432,7 @@ def run( _comp_ex = pnlvm.CompExecution.get(self, context, additional_tags=comp_ex_tags) if execution_mode & pnlvm.ExecutionMode.LLVM: results += _comp_ex.run(inputs, num_trials, num_inputs_sets) - elif execution_mode & pnlvm.ExecutionMode.PTX: + elif execution_mode & pnlvm.ExecutionMode._PTX: results += _comp_ex.cuda_run(inputs, num_trials, num_inputs_sets) else: assert False, "Unknown execution mode: {}".format(execution_mode) diff --git a/psyneulink/core/llvm/__init__.py b/psyneulink/core/llvm/__init__.py index f46508e71c..5649cecc2e 100644 --- a/psyneulink/core/llvm/__init__.py +++ b/psyneulink/core/llvm/__init__.py @@ -77,7 +77,7 @@ class ExecutionMode(enum.Flag): Python = 0 PyTorch = enum.auto() LLVM = enum.auto() - PTX = enum.auto() + _PTX = enum.auto() _Run = enum.auto() _Exec = enum.auto() _Fallback = enum.auto() @@ -85,7 +85,7 @@ class ExecutionMode(enum.Flag): Auto = _Fallback | _Run | _Exec | LLVM LLVMRun = LLVM | _Run LLVMExec = LLVM | _Exec - PTXRun = PTX | _Run + PTXRun = _PTX | _Run COMPILED = ~ (Python | PyTorch) From 96e4a78f812f5e7c3cae740181be34539e9bf4da Mon Sep 17 00:00:00 2001 From: Jan Vesely Date: Wed, 29 Jan 2025 12:11:55 -0500 Subject: [PATCH 3/9] tests/predator_prey: Consolidate check for compiled execution of large test instances Signed-off-by: Jan Vesely --- tests/models/test_greedy_agent.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/models/test_greedy_agent.py b/tests/models/test_greedy_agent.py index 5a94786a41..89452373e6 100644 --- a/tests/models/test_greedy_agent.py +++ b/tests/models/test_greedy_agent.py @@ -101,10 +101,9 @@ def test_simplified_greedy_agent_random(benchmark, comp_mode): @pytest.mark.parametrize('prng', ['Default', 'Philox']) @pytest.mark.parametrize('fp_type', [pnl.core.llvm.ir.DoubleType, pnl.core.llvm.ir.FloatType]) def test_predator_prey(benchmark, mode, ocm_mode, prng, samples, fp_type): - if len(samples) > 10 and mode not in {pnl.ExecutionMode.LLVM, - pnl.ExecutionMode.LLVMExec, - pnl.ExecutionMode.LLVMRun} and \ - ocm_mode not in {'LLVM', 'PTX'}: + + # Skip large test instances that are not CPU compiled, or executed in parallel. + if len(samples) > 10 and not (mode & pnl.ExecutionMode.LLVM) and ocm_mode not in {'LLVM', 'PTX'}: pytest.skip("This test takes too long") # Instantiate LLVMBuilderContext using the preferred fp type From c428319f99aef9534f8409f5d8a86eb082e62dbc Mon Sep 17 00:00:00 2001 From: Jan Vesely Date: Wed, 29 Jan 2025 12:13:34 -0500 Subject: [PATCH 4/9] llvm: Rename ExecutionMode.LLVMExec -> ExecutionMode._LLVMExec This mode should not be used outside development or testing. Signed-off-by: Jan Vesely --- conftest.py | 2 +- docs/source/Compilation.rst | 2 +- psyneulink/core/compositions/composition.py | 5 +++-- psyneulink/core/llvm/__init__.py | 4 ++-- tests/composition/test_composition.py | 6 +++--- 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/conftest.py b/conftest.py index 2761218504..1114dde65e 100644 --- a/conftest.py +++ b/conftest.py @@ -188,7 +188,7 @@ def llvm_current_fp_precision(): def get_comp_execution_modes(): return [pytest.param(pnlvm.ExecutionMode.Python), pytest.param(pnlvm.ExecutionMode.LLVM, marks=pytest.mark.llvm), - pytest.param(pnlvm.ExecutionMode.LLVMExec, marks=pytest.mark.llvm), + pytest.param(pnlvm.ExecutionMode._LLVMExec, marks=pytest.mark.llvm), pytest.param(pnlvm.ExecutionMode.LLVMRun, marks=pytest.mark.llvm), pytest.param(pnlvm.ExecutionMode.PTXRun, marks=[pytest.mark.llvm, pytest.mark.cuda]) ] diff --git a/docs/source/Compilation.rst b/docs/source/Compilation.rst index ce9d9e3847..1460cdb73f 100644 --- a/docs/source/Compilation.rst +++ b/docs/source/Compilation.rst @@ -34,7 +34,7 @@ Compiled form of a model can be invoked by passing one of the following values t * `ExecutionMode.Python`: Normal python execution * `ExecutionMode.LLVM`: Compile and execute individual nodes. The scheduling loop still runs in Python. If any of the nodes fails to compile, an error is raised. *NOTE:* Schedules that require access to node data will not work correctly. - * `ExecutionMode.LLVMExec`: Execution of `Composition.exec` is replaced by a compiled equivalent. If the `Composition` fails to compile, an error is raised. + * `ExecutionMode._LLVMExec`: Execution of `Composition.exec` is replaced by a compiled equivalent. If the `Composition` fails to compile, an error is raised. * `ExecutionMode.LLVMRun`: Execution of `Composition.run` is replaced by a compiled equivalent. If the `Composition` fails to compile, an error is raised. * `ExecutionMode.Auto`: This option attempts all three above mentioned granularities, and gracefully falls back to lower granularity. Warnings are raised in place of errors. This is the recommended way to invoke compiled execution as the final fallback is the Python baseline. diff --git a/psyneulink/core/compositions/composition.py b/psyneulink/core/compositions/composition.py index 39fedaeebc..6e72354ccf 100644 --- a/psyneulink/core/compositions/composition.py +++ b/psyneulink/core/compositions/composition.py @@ -2067,14 +2067,15 @@ def input_function(env, result): the compiled binary is semantically equivalent to the execution of the `run ` method using the Python interpreter; - * `ExecutionMode.LLVMExec` -- compile and run each `TRIAL `, using the Python interpreter + * `ExecutionMode._LLVMExec` -- compile and run each `TRIAL `, using the Python interpreter to iterate over them; if successful, the compiled binary for each `TRIAL ` is semantically equivalent the execution of the `execute ` method using the Python interpreter; + This mode does not support Trial scope scheduling rules and should not be used outside of development or testing. * `ExecutionMode.LLVM` -- compile and run `Node ` of the `Composition` and their `Projections `, using the Python interpreter to call the Composition's `scheduler `, execute each Node and iterate over `TRIAL `\\s; note that, in this mode, scheduling - `Conditions ` that rely on Node `Parameters` is not supported; + `Conditions ` that rely on Node `Parameters` are not supported; * `ExecutionMode.Python` (same as *False*; the default) -- use the Python interpreter to execute the `Composition`. diff --git a/psyneulink/core/llvm/__init__.py b/psyneulink/core/llvm/__init__.py index 5649cecc2e..a48accb3f0 100644 --- a/psyneulink/core/llvm/__init__.py +++ b/psyneulink/core/llvm/__init__.py @@ -49,7 +49,7 @@ class ExecutionMode(enum.Flag): LLVM compile and run Composition `Nodes ` and `Projections ` individually. - LLVMExec + _LLVMExec compile and run each `TRIAL ` individually. LLVMRun @@ -84,7 +84,7 @@ class ExecutionMode(enum.Flag): Auto = _Fallback | _Run | _Exec | LLVM LLVMRun = LLVM | _Run - LLVMExec = LLVM | _Exec + _LLVMExec = LLVM | _Exec PTXRun = _PTX | _Run COMPILED = ~ (Python | PyTorch) diff --git a/tests/composition/test_composition.py b/tests/composition/test_composition.py index 15fb27a135..3b5fef1457 100644 --- a/tests/composition/test_composition.py +++ b/tests/composition/test_composition.py @@ -3765,7 +3765,7 @@ def test_run_2_mechanisms_double_trial_specs(self, comp_mode): @pytest.mark.parametrize("mode", [pnl.ExecutionMode.Python, pytest.param(pnl.ExecutionMode.LLVM, marks=pytest.mark.llvm), - pytest.param(pnl.ExecutionMode.LLVMExec, marks=pytest.mark.llvm), + pytest.param(pnl.ExecutionMode._LLVMExec, marks=pytest.mark.llvm), ]) def test_execute_composition(self, mode): comp = Composition() @@ -3865,7 +3865,7 @@ def test_LPP_wrong_component(self): @pytest.mark.parametrize("mode", [pnl.ExecutionMode.Python, pytest.param(pnl.ExecutionMode.LLVM, marks=pytest.mark.llvm), - pytest.param(pnl.ExecutionMode.LLVMExec, marks=pytest.mark.llvm), + pytest.param(pnl.ExecutionMode._LLVMExec, marks=pytest.mark.llvm), ]) def test_execute_no_inputs(self, mode): m_inner = ProcessingMechanism(input_shapes=2) @@ -6608,7 +6608,7 @@ class TestProperties: @pytest.mark.composition @pytest.mark.parametrize("mode", [pnl.ExecutionMode.Auto, pnl.ExecutionMode.Python, pytest.param(pnl.ExecutionMode.LLVM, marks=[_fallback_xfail, pytest.mark.llvm]), - pytest.param(pnl.ExecutionMode.LLVMExec, marks=[_fallback_xfail, pytest.mark.llvm]), + pytest.param(pnl.ExecutionMode._LLVMExec, marks=[_fallback_xfail, pytest.mark.llvm]), pytest.param(pnl.ExecutionMode.LLVMRun, marks=[_fallback_xfail, pytest.mark.llvm]), pytest.param(pnl.ExecutionMode.PTXRun, marks=[_fallback_xfail, pytest.mark.llvm, pytest.mark.cuda]), ]) From 148d2b41c036574777dce506be094fc038f0c4a6 Mon Sep 17 00:00:00 2001 From: Jan Vesely Date: Tue, 28 Jan 2025 21:01:01 -0500 Subject: [PATCH 5/9] llvm, Composition: Do not fallback to per-node execution mode It does not support non-trivial scheduling. Signed-off-by: Jan Vesely --- psyneulink/core/compositions/composition.py | 60 +++++++++------------ 1 file changed, 24 insertions(+), 36 deletions(-) diff --git a/psyneulink/core/compositions/composition.py b/psyneulink/core/compositions/composition.py index 6e72354ccf..694f5eec27 100644 --- a/psyneulink/core/compositions/composition.py +++ b/psyneulink/core/compositions/composition.py @@ -2060,8 +2060,8 @@ def input_function(env, result): .. _Composition_Compilation_Modes: - * *True* -- try to use the one that yields the greatesst improvement, progressively reverting to less powerful - but more forgiving modes, in the order listed below, for each that fails; + * *True* -- try to use the one that yields the greatest improvement, progressively reverting to less powerful + but more forgiving modes, trying LLVMRun, _LLVMExec, and Python. * `ExecutionMode.LLVMRun` - compile and run multiple `TRIAL `\\s; if successful, the compiled binary is semantically equivalent to the execution of the `run ` method @@ -2088,7 +2088,7 @@ def input_function(env, result): using it with a standard `Composition` is possible, but it will **not** have the expected effect of executing its `learn ` method using PyTorch. - * `ExecutionMode.PTXrun` -- compile multiple `TRIAL `\\s for execution on GPU + * `ExecutionMode.PTXRun` -- compile multiple `TRIAL `\\s for execution on GPU (see `below ` for additional details). .. _Composition_Compilation_PyTorch: @@ -12055,30 +12055,27 @@ def execute( self._initialize_from_context(context, base_context, override=False) context.composition = self - # Run compiled execution (if compiled execution was requested + # Try compiled execution (if compiled execution was requested) # NOTE: This should be as high up as possible, # but still after the context has been initialized if execution_mode & pnlvm.ExecutionMode.COMPILED: - is_simulation = (context is not None and - ContextFlags.SIMULATION_MODE in context.runmode) - # Try running in Exec mode first - if (execution_mode & pnlvm.ExecutionMode._Exec): - # There's no mode to execute simulations. + + is_simulation = (context is not None and ContextFlags.SIMULATION_MODE in context.runmode) + + _comp_ex = pnlvm.CompExecution.get(self, context) + + if execution_mode & pnlvm.ExecutionMode._Exec: + # There's no mode to execute compiled simulations. # Simulations are run as part of the controller node wrapper. assert not is_simulation + try: + assert execution_mode & pnlvm.ExecutionMode.LLVM, "Unsupported execution mode: {}".format(execution_mode) + llvm_inputs = self._validate_execution_inputs(inputs) - _comp_ex = pnlvm.CompExecution.get(self, context) - if execution_mode & pnlvm.ExecutionMode.LLVM: - _comp_ex.execute(llvm_inputs) - else: - assert False, "Unknown execution mode: {}".format(execution_mode) + _comp_ex.execute(llvm_inputs) - report(self, - PROGRESS_REPORT, - report_num=report_num, - content='trial_end', - context=context) + report(self, PROGRESS_REPORT, report_num=report_num, content='trial_end', context=context) self._propagate_most_recent_context(context) return _comp_ex.extract_node_output(self.output_CIM) @@ -12087,27 +12084,18 @@ def execute( if not execution_mode & pnlvm.ExecutionMode._Fallback: raise e from None - warnings.warn("Failed to execute `{}': {}".format(self.name, str(e))) + warnings.warn("Failed to compile wrapper for `{}' in `{}': {}".format(self.name, self.name, str(e))) + execution_mode = pnlvm.ExecutionMode.Python - # Exec failed for some reason, we can still try node level execution_mode - # Filter out nested compositions. They are not executed in this mode - # Filter out controller if running simulation. - mechanisms = (n for n in self._all_nodes - if isinstance(n, Mechanism) and - (n is not self.controller or not is_simulation)) + elif execution_mode is pnlvm.ExecutionMode.LLVM: - assert execution_mode & pnlvm.ExecutionMode.LLVM - try: - _comp_ex = pnlvm.CompExecution.get(self, context) # Compile all mechanism wrappers - for m in mechanisms: - _comp_ex._set_bin_node(m) - except Exception as e: - if not execution_mode & pnlvm.ExecutionMode._Fallback: - raise e from None + for m in self._all_nodes: + if isinstance(m, Mechanism) and not (m is self.controller and is_simulation): + _comp_ex._set_bin_node(m) - warnings.warn("Failed to compile wrapper for `{}' in `{}': {}".format(m.name, self.name, str(e))) - execution_mode = pnlvm.ExecutionMode.Python + else: + assert False, "Unsupported execution mode: {}".format(execution_mode) # Generate first frame of animation without any active_items From 77e0f836c762c528a48f15a70bffa9aee19c8e96 Mon Sep 17 00:00:00 2001 From: Jan Vesely Date: Fri, 31 Jan 2025 10:41:15 -0500 Subject: [PATCH 6/9] llvm: Add _LLVMPerNode mode Explicitly list composition compilation mode. Make the new mode private. Only used by developers and tests. Signed-off-by: Jan Vesely --- conftest.py | 11 ++++--- docs/source/Compilation.rst | 2 +- psyneulink/core/compositions/composition.py | 29 +++++++++++++------ psyneulink/core/llvm/__init__.py | 4 ++- .../compositions/autodiffcomposition.py | 4 +-- tests/composition/test_composition.py | 10 +++---- tests/composition/test_control.py | 8 ++--- tests/mechanisms/test_ddm_mechanism.py | 12 ++++---- tests/mechanisms/test_integrator_mechanism.py | 4 +-- tests/mechanisms/test_mechanisms.py | 2 +- .../test_recurrent_transfer_mechanism.py | 12 ++++---- tests/mechanisms/test_transfer_mechanism.py | 4 +-- tests/ports/test_output_ports.py | 2 +- tests/scheduling/test_condition.py | 6 ++-- tests/scheduling/test_scheduler.py | 4 +-- 15 files changed, 63 insertions(+), 51 deletions(-) diff --git a/conftest.py b/conftest.py index 1114dde65e..6ee8bac6fa 100644 --- a/conftest.py +++ b/conftest.py @@ -71,8 +71,7 @@ def pytest_runtest_setup(item): def pytest_generate_tests(metafunc): mech_and_func_modes = ['Python', pytest.param('LLVM', marks=pytest.mark.llvm), - pytest.param('PTX', marks=[pytest.mark.llvm, - pytest.mark.cuda]) + pytest.param('PTX', marks=[pytest.mark.llvm, pytest.mark.cuda]) ] if "func_mode" in metafunc.fixturenames: @@ -81,9 +80,9 @@ def pytest_generate_tests(metafunc): if "mech_mode" in metafunc.fixturenames: metafunc.parametrize("mech_mode", mech_and_func_modes) - if "comp_mode_no_llvm" in metafunc.fixturenames: + if "comp_mode_no_per_node" in metafunc.fixturenames: modes = [m for m in get_comp_execution_modes() - if m.values[0] is not pnlvm.ExecutionMode.LLVM] + if m.values[0] is not pnlvm.ExecutionMode._LLVMPerNode] metafunc.parametrize("comp_mode", modes) elif "comp_mode" in metafunc.fixturenames: @@ -151,7 +150,7 @@ def pytest_runtest_teardown(item): pnlvm.cleanup("llvm" in item.keywords and not skip_cleanup_check) @pytest.fixture -def comp_mode_no_llvm(): +def comp_mode_no_per_node(): # dummy fixture to allow 'comp_mode' filtering pass @@ -187,7 +186,7 @@ def llvm_current_fp_precision(): @pytest.helpers.register def get_comp_execution_modes(): return [pytest.param(pnlvm.ExecutionMode.Python), - pytest.param(pnlvm.ExecutionMode.LLVM, marks=pytest.mark.llvm), + pytest.param(pnlvm.ExecutionMode._LLVMPerNode, marks=pytest.mark.llvm), pytest.param(pnlvm.ExecutionMode._LLVMExec, marks=pytest.mark.llvm), pytest.param(pnlvm.ExecutionMode.LLVMRun, marks=pytest.mark.llvm), pytest.param(pnlvm.ExecutionMode.PTXRun, marks=[pytest.mark.llvm, pytest.mark.cuda]) diff --git a/docs/source/Compilation.rst b/docs/source/Compilation.rst index 1460cdb73f..239c743038 100644 --- a/docs/source/Compilation.rst +++ b/docs/source/Compilation.rst @@ -33,7 +33,7 @@ Use Compiled form of a model can be invoked by passing one of the following values to the `bin_execute` parameter of `Composition.run`, or `Composition.exec`: * `ExecutionMode.Python`: Normal python execution - * `ExecutionMode.LLVM`: Compile and execute individual nodes. The scheduling loop still runs in Python. If any of the nodes fails to compile, an error is raised. *NOTE:* Schedules that require access to node data will not work correctly. + * `ExecutionMode._LLVMPerNode`: Compile and execute individual nodes. The scheduling loop still runs in Python. If any of the nodes fails to compile, an error is raised. *NOTE:* Schedules that require access to node data will not work correctly. * `ExecutionMode._LLVMExec`: Execution of `Composition.exec` is replaced by a compiled equivalent. If the `Composition` fails to compile, an error is raised. * `ExecutionMode.LLVMRun`: Execution of `Composition.run` is replaced by a compiled equivalent. If the `Composition` fails to compile, an error is raised. * `ExecutionMode.Auto`: This option attempts all three above mentioned granularities, and gracefully falls back to lower granularity. Warnings are raised in place of errors. This is the recommended way to invoke compiled execution as the final fallback is the Python baseline. diff --git a/psyneulink/core/compositions/composition.py b/psyneulink/core/compositions/composition.py index 694f5eec27..0aa77a8352 100644 --- a/psyneulink/core/compositions/composition.py +++ b/psyneulink/core/compositions/composition.py @@ -1122,7 +1122,7 @@ an AutodffComposition to be run in any mode (e.g., for comparison and/or compatibility purposes). .. warning:: - * `ExecutionMode.LLVM` and `ExecutionMode.PyTorch` can only be used in the `learn ` + * `ExecutionMode.LLVMRun` and `ExecutionMode.PyTorch` can only be used in the `learn ` method of an `AutodiffComposition`; specifying them in the `learn `()` method of a standard `Composition` causes an error. @@ -2072,7 +2072,7 @@ def input_function(env, result): equivalent the execution of the `execute ` method using the Python interpreter; This mode does not support Trial scope scheduling rules and should not be used outside of development or testing. - * `ExecutionMode.LLVM` -- compile and run `Node ` of the `Composition` and their `Projections + * `ExecutionMode._LLVMPerNode` -- compile and run `Node ` of the `Composition` and their `Projections `, using the Python interpreter to call the Composition's `scheduler `, execute each Node and iterate over `TRIAL `\\s; note that, in this mode, scheduling `Conditions ` that rely on Node `Parameters` are not supported; @@ -11837,12 +11837,13 @@ def _execute_controller(self, node=self.controller) if self.controller and not execution_mode & pnlvm.ExecutionMode.COMPILED: + context.execution_phase = ContextFlags.PROCESSING self.controller.execute(context=context) else: - assert (execution_mode == pnlvm.ExecutionMode.LLVM - or execution_mode & pnlvm.ExecutionMode._Fallback),\ + assert execution_mode & pnlvm.ExecutionMode._PerNode + assert execution_mode & pnlvm.ExecutionMode.LLVM, \ f"PROGRAM ERROR: Unrecognized compiled execution_mode: '{execution_mode}'." _comp_ex.freeze_values() _comp_ex.execute_node(self.controller) @@ -12060,6 +12061,8 @@ def execute( # but still after the context has been initialized if execution_mode & pnlvm.ExecutionMode.COMPILED: + assert execution_mode & pnlvm.ExecutionMode.LLVM, "Unsupported execution mode: {}".format(execution_mode) + is_simulation = (context is not None and ContextFlags.SIMULATION_MODE in context.runmode) _comp_ex = pnlvm.CompExecution.get(self, context) @@ -12070,8 +12073,6 @@ def execute( assert not is_simulation try: - assert execution_mode & pnlvm.ExecutionMode.LLVM, "Unsupported execution mode: {}".format(execution_mode) - llvm_inputs = self._validate_execution_inputs(inputs) _comp_ex.execute(llvm_inputs) @@ -12087,7 +12088,7 @@ def execute( warnings.warn("Failed to compile wrapper for `{}' in `{}': {}".format(self.name, self.name, str(e))) execution_mode = pnlvm.ExecutionMode.Python - elif execution_mode is pnlvm.ExecutionMode.LLVM: + elif execution_mode & pnlvm.ExecutionMode._PerNode: # Compile all mechanism wrappers for m in self._all_nodes: @@ -12161,11 +12162,13 @@ def execute( build_CIM_input = self._build_variable_for_input_CIM(inputs) if execution_mode & pnlvm.ExecutionMode.COMPILED: - _comp_ex.execute_node(self.input_CIM, inputs) # FIXME: parameter_CIM should be executed here as well, # but node execution of nested compositions with # outside control is not supported yet. assert not self.is_nested or len(self.parameter_CIM.afferents) == 0 + assert execution_mode & pnlvm.ExecutionMode._PerNode + + _comp_ex.execute_node(self.input_CIM, inputs) elif self.is_nested: simulation = ContextFlags.SIMULATION_MODE in context.runmode @@ -12365,6 +12368,7 @@ def execute( frozen_values = {} new_values = {} if execution_mode & pnlvm.ExecutionMode.COMPILED: + assert execution_mode & pnlvm.ExecutionMode._PerNode _comp_ex.freeze_values() # PURGE LEARNING IF NOT ENABLED ---------------------------------------------------------------- @@ -12448,7 +12452,9 @@ def execute( # Execute Mechanism if execution_mode & pnlvm.ExecutionMode.COMPILED: + assert execution_mode & pnlvm.ExecutionMode._PerNode _comp_ex.execute_node(node) + else: if node is not self.controller: mech_context = copy(context) @@ -12475,6 +12481,8 @@ def execute( elif isinstance(node, Composition): if execution_mode & pnlvm.ExecutionMode.COMPILED: + assert execution_mode & pnlvm.ExecutionMode._PerNode + # Invoking nested composition passes data via Python # structures. Make sure all sources get their latest values srcs = (proj.sender.owner for proj in node.input_CIM.afferents) @@ -12507,7 +12515,7 @@ def execute( # Run node-level compiled nested composition # only if there are no control projections - if execution_mode == pnlvm.ExecutionMode.LLVM and len(node.parameter_CIM.afferents) != 0: + if execution_mode & pnlvm.ExecutionMode.LLVM and len(node.parameter_CIM.afferents) != 0: nested_execution_mode = pnlvm.ExecutionMode.Python else: nested_execution_mode = execution_mode @@ -12516,6 +12524,8 @@ def execute( # Get output info from nested execution if execution_mode & pnlvm.ExecutionMode.COMPILED: + assert execution_mode & pnlvm.ExecutionMode._PerNode + # Update result in binary data structure _comp_ex.insert_node_output(node, ret) @@ -12659,6 +12669,7 @@ def execute( # Extract result here if execution_mode & pnlvm.ExecutionMode.COMPILED: + assert execution_mode & pnlvm.ExecutionMode._PerNode _comp_ex.freeze_values() _comp_ex.execute_node(self.output_CIM) report(self, diff --git a/psyneulink/core/llvm/__init__.py b/psyneulink/core/llvm/__init__.py index a48accb3f0..95a22fcc4f 100644 --- a/psyneulink/core/llvm/__init__.py +++ b/psyneulink/core/llvm/__init__.py @@ -78,13 +78,15 @@ class ExecutionMode(enum.Flag): PyTorch = enum.auto() LLVM = enum.auto() _PTX = enum.auto() - _Run = enum.auto() + _PerNode = enum.auto() _Exec = enum.auto() + _Run = enum.auto() _Fallback = enum.auto() Auto = _Fallback | _Run | _Exec | LLVM LLVMRun = LLVM | _Run _LLVMExec = LLVM | _Exec + _LLVMPerNode = LLVM | _PerNode PTXRun = _PTX | _Run COMPILED = ~ (Python | PyTorch) diff --git a/psyneulink/library/compositions/autodiffcomposition.py b/psyneulink/library/compositions/autodiffcomposition.py index 893fb1b17e..2909fee763 100644 --- a/psyneulink/library/compositions/autodiffcomposition.py +++ b/psyneulink/library/compositions/autodiffcomposition.py @@ -184,7 +184,7 @@ during execution (see `AutodiffComposition_Nested_Modulation` below), which is not supported by PyTorch. .. warning:: - * Specifying `ExecutionMode.LLVM` or `ExecutionMode.PyTorch` in the learn() method of a standard + * Specifying `ExecutionMode.LLVMRun` or `ExecutionMode.PyTorch` in the learn() method of a standard `Composition` causes an error. COMMENT: @@ -204,7 +204,7 @@ `Compilation Modes ` for more information about executing a Composition in compiled mode. .. note:: - Specifying `ExecutionMode.LLVMRUn` in either the `learn ` and `run ` + Specifying `ExecutionMode.LLVMRun` in either the `learn ` and `run ` methods of an AutodiffComposition causes it to (attempt to) use compiled execution in both cases; this is because LLVM compilation supports the use of modulation in PsyNeuLink models (as compared to `PyTorch mode `; see `note ` below). diff --git a/tests/composition/test_composition.py b/tests/composition/test_composition.py index 3b5fef1457..65df4cd2a4 100644 --- a/tests/composition/test_composition.py +++ b/tests/composition/test_composition.py @@ -3764,7 +3764,7 @@ def test_run_2_mechanisms_double_trial_specs(self, comp_mode): np.testing.assert_allclose(np.array([[75.]]), output) @pytest.mark.parametrize("mode", [pnl.ExecutionMode.Python, - pytest.param(pnl.ExecutionMode.LLVM, marks=pytest.mark.llvm), + pytest.param(pnl.ExecutionMode._LLVMPerNode, marks=pytest.mark.llvm), pytest.param(pnl.ExecutionMode._LLVMExec, marks=pytest.mark.llvm), ]) def test_execute_composition(self, mode): @@ -3864,7 +3864,7 @@ def test_LPP_wrong_component(self): and "that is in deferred init" in str(error_text.value)) @pytest.mark.parametrize("mode", [pnl.ExecutionMode.Python, - pytest.param(pnl.ExecutionMode.LLVM, marks=pytest.mark.llvm), + pytest.param(pnl.ExecutionMode._LLVMPerNode, marks=pytest.mark.llvm), pytest.param(pnl.ExecutionMode._LLVMExec, marks=pytest.mark.llvm), ]) def test_execute_no_inputs(self, mode): @@ -4423,8 +4423,8 @@ def test_multiple_runs_with_parameter_change_from_data_struct(self, comp_mode): self._check_comp_ex(comp, None, comp_mode, struct_name, is_not=True) self._check_comp_ex(comp, orig_comp_ex, comp_mode, struct_name, is_not=True) - @pytest.mark.usefixtures("comp_mode_no_llvm") - @pytest.mark.parametrize("comp_mode2", [m for m in pytest.helpers.get_comp_execution_modes() if m.values[0] is not pnl.ExecutionMode.LLVM]) + @pytest.mark.usefixtures("comp_mode_no_per_node") + @pytest.mark.parametrize("comp_mode2", [m for m in pytest.helpers.get_comp_execution_modes() if m.values[0] is not pnl.ExecutionMode._LLVMPerNode]) def test_execution_after_cleanup_enum_param(self, comp_mode, comp_mode2): """ This test checks that compiled sync works for Parameters with Enum values. @@ -6607,7 +6607,7 @@ class TestProperties: @pytest.mark.composition @pytest.mark.parametrize("mode", [pnl.ExecutionMode.Auto, pnl.ExecutionMode.Python, - pytest.param(pnl.ExecutionMode.LLVM, marks=[_fallback_xfail, pytest.mark.llvm]), + pytest.param(pnl.ExecutionMode._LLVMPerNode, marks=[_fallback_xfail, pytest.mark.llvm]), pytest.param(pnl.ExecutionMode._LLVMExec, marks=[_fallback_xfail, pytest.mark.llvm]), pytest.param(pnl.ExecutionMode.LLVMRun, marks=[_fallback_xfail, pytest.mark.llvm]), pytest.param(pnl.ExecutionMode.PTXRun, marks=[_fallback_xfail, pytest.mark.llvm, pytest.mark.cuda]), diff --git a/tests/composition/test_control.py b/tests/composition/test_control.py index b473953d9f..d3f070d729 100644 --- a/tests/composition/test_control.py +++ b/tests/composition/test_control.py @@ -2507,9 +2507,9 @@ def get_val(s, dty): @pytest.mark.benchmark @pytest.mark.control @pytest.mark.composition - # 'LLVM' mode is not supported, because synchronization of compiler and + # '_LLVMPerNode' mode is not supported, because synchronization of compiler and # python values during execution is not implemented. - @pytest.mark.usefixtures("comp_mode_no_llvm") + @pytest.mark.usefixtures("comp_mode_no_per_node") @pytest.mark.parametrize('prng', ['Default', 'Philox']) def test_modulation_of_random_state_DDM(self, comp_mode, benchmark, prng): # set explicit seed to make sure modulation is different @@ -2622,9 +2622,9 @@ def test_modulation_of_initializer_nested(self, ocm_mode): @pytest.mark.benchmark @pytest.mark.control @pytest.mark.composition - # 'LLVM' mode is not supported, because synchronization of compiler and + # '_LLVMPerNode' mode is not supported, because synchronization of compiler and # python values during execution is not implemented. - @pytest.mark.usefixtures("comp_mode_no_llvm") + @pytest.mark.usefixtures("comp_mode_no_per_node") @pytest.mark.parametrize('prng', ['Default', 'Philox']) def test_modulation_of_random_state_DDM_Analytical(self, comp_mode, benchmark, prng): # set explicit seed to make sure modulation is different diff --git a/tests/mechanisms/test_ddm_mechanism.py b/tests/mechanisms/test_ddm_mechanism.py index 50c1d2eef7..5ed8cea5f5 100644 --- a/tests/mechanisms/test_ddm_mechanism.py +++ b/tests/mechanisms/test_ddm_mechanism.py @@ -693,9 +693,9 @@ def test_DDM_threshold_modulation_integrator(comp_mode): (100.0, 100.0, [[100.0], [76.0]]), ]) # 3/5/2021 - DDM' default behaviour now requires resetting stateful -# functions after each trial. This is not supported in LLVM execution mode. +# functions after each trial. This is not supported in _LLVMPerNode execution mode. # See: https://github.com/PrincetonUniversity/PsyNeuLink/issues/1935 -@pytest.mark.usefixtures("comp_mode_no_llvm") +@pytest.mark.usefixtures("comp_mode_no_per_node") def test_ddm_is_finished(comp_mode, noise, threshold, expected_results): comp = Composition() @@ -711,11 +711,11 @@ def test_ddm_is_finished(comp_mode, noise, threshold, expected_results): @pytest.mark.parametrize("until_finished", ["until_finished", "not_until_finished"]) @pytest.mark.parametrize("threshold_mod", ["threshold_modulated", "threshold_not_modulated"]) # 3/5/2021 - DDM' default behaviour now requires resetting stateful -# functions after each trial. This is not supported in LLVM execution mode. +# functions after each trial. This is not supported in _LLVMPerNode execution mode. # See: https://github.com/PrincetonUniversity/PsyNeuLink/issues/1935 # Moreover, evaluating scheduler conditions in Python is not supported # for compiled execution -@pytest.mark.usefixtures("comp_mode_no_llvm") +@pytest.mark.usefixtures("comp_mode_no_per_node") def test_ddm_is_finished_with_dependency(comp_mode, until_finished, threshold_mod): comp = Composition() @@ -801,9 +801,9 @@ def test_sequence_of_DDM_mechs_in_Composition_Pathway(): @pytest.mark.composition @pytest.mark.ddm_mechanism # 3/5/2021 - DDM' default behaviour now requires resetting stateful -# functions after each trial. This is not supported in LLVM execution mode. +# functions after each trial. This is not supported in _LLVMPerNode execution mode. # See: https://github.com/PrincetonUniversity/PsyNeuLink/issues/1935 -@pytest.mark.usefixtures("comp_mode_no_llvm") +@pytest.mark.usefixtures("comp_mode_no_per_node") def test_DDMMechanism_LCA_equivalent(comp_mode): ddm = DDM(default_variable=[0], diff --git a/tests/mechanisms/test_integrator_mechanism.py b/tests/mechanisms/test_integrator_mechanism.py index 414c6837f1..e7f616b452 100644 --- a/tests/mechanisms/test_integrator_mechanism.py +++ b/tests/mechanisms/test_integrator_mechanism.py @@ -1233,9 +1233,9 @@ def test_has_initializers(self): [np.array([0.5]), np.array([0.9375])], [np.array([0.5]), np.array([0.96875])]]), ], ids=lambda x: str(x) if isinstance(x, pnl.Condition) else "") - # 'LLVM' mode is not supported, because synchronization of compiler and + # '_LLVMPerNode' mode is not supported, because synchronization of compiler and # python values during execution is not implemented. - @pytest.mark.usefixtures("comp_mode_no_llvm") + @pytest.mark.usefixtures("comp_mode_no_per_node") def test_reset_stateful_function_when_composition(self, comp_mode, cond0, cond1, expected): I1 = pnl.IntegratorMechanism() I2 = pnl.IntegratorMechanism() diff --git a/tests/mechanisms/test_mechanisms.py b/tests/mechanisms/test_mechanisms.py index 11c7cc485d..616a76d121 100644 --- a/tests/mechanisms/test_mechanisms.py +++ b/tests/mechanisms/test_mechanisms.py @@ -289,7 +289,7 @@ def test_reset_state_transfer_mechanism(self): np.testing.assert_allclose(original_output, [np.array([[0.5]]), np.array([[0.75]])]) np.testing.assert_allclose(output_after_reinitialization, [np.array([[0.875]]), np.array([[0.9375]])]) - @pytest.mark.usefixtures("comp_mode_no_llvm") + @pytest.mark.usefixtures("comp_mode_no_per_node") def test_reset_integrator_function(self, comp_mode): """This test checks that the Mechanism.integrator_function is reset when the mechanism is""" diff --git a/tests/mechanisms/test_recurrent_transfer_mechanism.py b/tests/mechanisms/test_recurrent_transfer_mechanism.py index 0e7d2f851c..199677ab67 100644 --- a/tests/mechanisms/test_recurrent_transfer_mechanism.py +++ b/tests/mechanisms/test_recurrent_transfer_mechanism.py @@ -1110,9 +1110,9 @@ def my_fct(x): [np.array([0.5]), np.array([0.9375])], [np.array([0.5]), np.array([0.96875])]]), ], ids=lambda x: str(x) if isinstance(x, pnl.Condition) else "") - # 'LLVM' mode is not supported, because synchronization of compiler and + # '_LLVMPerNode' mode is not supported, because synchronization of compiler and # python values during execution is not implemented. - @pytest.mark.usefixtures("comp_mode_no_llvm") + @pytest.mark.usefixtures("comp_mode_no_per_node") def test_reset_stateful_function_when_composition(self, comp_mode, cond0, cond1, expected): I1 = pnl.RecurrentTransferMechanism(integrator_mode=True, integration_rate=0.5) @@ -1145,9 +1145,9 @@ def test_reset_stateful_function_when_composition(self, comp_mode, cond0, cond1, ids=["initializers1", "NO initializers1"]) @pytest.mark.parametrize('has_initializers1', [True, False], ids=["initializers2", "NO initializers2"]) - # 'LLVM' mode is not supported, because synchronization of compiler and + # '_LLVMPerNode' mode is not supported, because synchronization of compiler and # python values during execution is not implemented. - @pytest.mark.usefixtures("comp_mode_no_llvm") + @pytest.mark.usefixtures("comp_mode_no_per_node") def test_reset_stateful_function_when_has_initializers_composition(self, comp_mode, cond0, cond1, expected, has_initializers1, has_initializers2): I1 = pnl.RecurrentTransferMechanism(integrator_mode=True, @@ -1182,9 +1182,9 @@ def test_reset_stateful_function_when_has_initializers_composition(self, comp_mo (True, [[[0.96875]], [[0.9990234375]]]), # The 5th and the 10th iteration (False, [[[0.5]], [[0.75]]]), # The first and the second iteration ], ids=['until_finished', 'oneshot']) - # 'LLVM' mode is not supported, because synchronization of compiler and + # '_LLVMPerNode' mode is not supported, because synchronization of compiler and # python values during execution is not implemented. - @pytest.mark.usefixtures("comp_mode_no_llvm") + @pytest.mark.usefixtures("comp_mode_no_per_node") def test_max_executions_before_finished(self, comp_mode, until_finished, expected): I1 = pnl.RecurrentTransferMechanism(integrator_mode=True, integration_rate=0.5, diff --git a/tests/mechanisms/test_transfer_mechanism.py b/tests/mechanisms/test_transfer_mechanism.py index 6e63307602..a3c0d19814 100644 --- a/tests/mechanisms/test_transfer_mechanism.py +++ b/tests/mechanisms/test_transfer_mechanism.py @@ -1650,9 +1650,9 @@ def test_reset_spec(self): @pytest.mark.transfer_mechanism @pytest.mark.benchmark(group="TransferMechanism") - # 'LLVM' mode is not supported, because synchronization of compiler and + # '_LLVMPerNode' mode is not supported, because synchronization of compiler and # python values during execution is not implemented. - @pytest.mark.usefixtures("comp_mode_no_llvm") + @pytest.mark.usefixtures("comp_mode_no_per_node") def test_termination_measures(self, comp_mode): stim_input = ProcessingMechanism(input_shapes=2, name='Stim Input') stim_percept = TransferMechanism(name='Stimulus', input_shapes=2, function=Logistic) diff --git a/tests/ports/test_output_ports.py b/tests/ports/test_output_ports.py index 397d77b23c..5c200e86a7 100644 --- a/tests/ports/test_output_ports.py +++ b/tests/ports/test_output_ports.py @@ -44,7 +44,7 @@ def test_output_port_variable_spec(self, mech_mode): (("num_executions", pnl.TimeScale.PASS), [1], [1]), (("num_executions", pnl.TimeScale.TIME_STEP), [1], [1]), ], ids=lambda x: str(x) if len(x) != 1 else '') - @pytest.mark.usefixtures("comp_mode_no_llvm") + @pytest.mark.usefixtures("comp_mode_no_per_node") def tests_output_port_variable_spec_composition(self, comp_mode, spec, expected1, expected2): if (len(spec) == 2) and (spec[1] == pnl.TimeScale.RUN) and \ ((comp_mode & pnl.ExecutionMode._Exec) == pnl.ExecutionMode._Exec): diff --git a/tests/scheduling/test_condition.py b/tests/scheduling/test_condition.py index 5196fd46f9..f1b9d73f3e 100644 --- a/tests/scheduling/test_condition.py +++ b/tests/scheduling/test_condition.py @@ -728,7 +728,7 @@ def test_AllHaveRun_2(self): ] ) @pytest.mark.parametrize('threshold', [10, 10.0]) - @pytest.mark.usefixtures("comp_mode_no_llvm") + @pytest.mark.usefixtures("comp_mode_no_per_node") def test_Threshold_parameters( self, parameter, indices, default_variable, integration_rate, expected_results, threshold, comp_mode ): @@ -761,7 +761,7 @@ def test_Threshold_parameters( ('!=', -1, 0, [[[-1]]]), ] ) - @pytest.mark.usefixtures("comp_mode_no_llvm") # Per-node mode doesn't support Parameter access in conditions + @pytest.mark.usefixtures("comp_mode_no_per_node") # Per-node mode doesn't support Parameter access in conditions def test_Threshold_comparators( self, comparator, increment, threshold, expected_results, comp_mode ): @@ -794,7 +794,7 @@ def test_Threshold_comparators( ('!=', -1, -1, 0, 1, [[[-3]]]), ] ) - @pytest.mark.usefixtures("comp_mode_no_llvm") # Per-node mode doesn't support Parameter access in conditions + @pytest.mark.usefixtures("comp_mode_no_per_node") # Per-node mode doesn't support Parameter access in conditions def test_Threshold_tolerances( self, comparator, increment, threshold, atol, rtol, expected_results, comp_mode ): diff --git a/tests/scheduling/test_scheduler.py b/tests/scheduling/test_scheduler.py index 07106d6e5e..f7ca28a7f1 100644 --- a/tests/scheduling/test_scheduler.py +++ b/tests/scheduling/test_scheduler.py @@ -1544,9 +1544,9 @@ def test_inline_control_mechanism_example(self): (TimeScale.TRIAL, [[1.5], [0.4375]]), (TimeScale.RUN, [[1.5], [0.4375]])], ids=lambda x: x if isinstance(x, TimeScale) else "") - # 'LLVM' mode is not supported, because synchronization of compiler and + # '_LLVMPerNode' mode is not supported, because synchronization of compiler and # python values during execution is not implemented. - @pytest.mark.usefixtures("comp_mode_no_llvm") + @pytest.mark.usefixtures("comp_mode_no_per_node") def test_time_termination_measures(self, comp_mode, timescale, expected): in_one_pass = timescale in {TimeScale.TIME_STEP, TimeScale.PASS} attention = pnl.TransferMechanism(name='Attention', From dbfa7b394f0e30d50f2218ac5750acc8a7082a25 Mon Sep 17 00:00:00 2001 From: Jan Vesely Date: Fri, 31 Jan 2025 15:55:14 -0500 Subject: [PATCH 7/9] llvm/ExecutionMode: Add is_*_compiled helper functions Use them instead of the COMPILED mask. Signed-off-by: Jan Vesely --- psyneulink/core/compositions/composition.py | 37 ++++++++++++------- psyneulink/core/llvm/__init__.py | 36 ++++++++++++++---- .../library/compositions/compositionrunner.py | 8 ++-- tests/composition/test_learning.py | 2 +- tests/models/test_greedy_agent.py | 2 +- 5 files changed, 59 insertions(+), 26 deletions(-) diff --git a/psyneulink/core/compositions/composition.py b/psyneulink/core/compositions/composition.py index 0aa77a8352..913a529da1 100644 --- a/psyneulink/core/compositions/composition.py +++ b/psyneulink/core/compositions/composition.py @@ -11431,9 +11431,9 @@ def run( try: comp_ex_tags = frozenset({"learning"}) if self._is_learning(context) else frozenset() _comp_ex = pnlvm.CompExecution.get(self, context, additional_tags=comp_ex_tags) - if execution_mode & pnlvm.ExecutionMode.LLVM: + if execution_mode.is_cpu_compiled(): results += _comp_ex.run(inputs, num_trials, num_inputs_sets) - elif execution_mode & pnlvm.ExecutionMode._PTX: + elif execution_mode.is_gpu_compiled(): results += _comp_ex.cuda_run(inputs, num_trials, num_inputs_sets) else: assert False, "Unknown execution mode: {}".format(execution_mode) @@ -11836,15 +11836,16 @@ def _execute_controller(self, context=context, node=self.controller) - if self.controller and not execution_mode & pnlvm.ExecutionMode.COMPILED: + if self.controller and not execution_mode.is_compiled(): context.execution_phase = ContextFlags.PROCESSING self.controller.execute(context=context) else: assert execution_mode & pnlvm.ExecutionMode._PerNode - assert execution_mode & pnlvm.ExecutionMode.LLVM, \ + assert execution_mode.is_cpu_compiled(), \ f"PROGRAM ERROR: Unrecognized compiled execution_mode: '{execution_mode}'." + _comp_ex.freeze_values() _comp_ex.execute_node(self.controller) @@ -12059,9 +12060,9 @@ def execute( # Try compiled execution (if compiled execution was requested) # NOTE: This should be as high up as possible, # but still after the context has been initialized - if execution_mode & pnlvm.ExecutionMode.COMPILED: + if execution_mode.is_compiled(): - assert execution_mode & pnlvm.ExecutionMode.LLVM, "Unsupported execution mode: {}".format(execution_mode) + assert execution_mode.is_cpu_compiled(), "Unsupported execution mode: {}".format(execution_mode) is_simulation = (context is not None and ContextFlags.SIMULATION_MODE in context.runmode) @@ -12161,12 +12162,13 @@ def execute( inputs = self._validate_execution_inputs(inputs) build_CIM_input = self._build_variable_for_input_CIM(inputs) - if execution_mode & pnlvm.ExecutionMode.COMPILED: + if execution_mode.is_compiled(): # FIXME: parameter_CIM should be executed here as well, # but node execution of nested compositions with # outside control is not supported yet. assert not self.is_nested or len(self.parameter_CIM.afferents) == 0 assert execution_mode & pnlvm.ExecutionMode._PerNode + assert execution_mode.is_cpu_compiled() _comp_ex.execute_node(self.input_CIM, inputs) @@ -12367,8 +12369,10 @@ def execute( # This ensures that the order in which nodes execute does not affect the results of this timestep frozen_values = {} new_values = {} - if execution_mode & pnlvm.ExecutionMode.COMPILED: + if execution_mode.is_compiled(): assert execution_mode & pnlvm.ExecutionMode._PerNode + assert execution_mode.is_cpu_compiled() + _comp_ex.freeze_values() # PURGE LEARNING IF NOT ENABLED ---------------------------------------------------------------- @@ -12451,8 +12455,10 @@ def execute( context.replace_flag(ContextFlags.PROCESSING, ContextFlags.LEARNING) # Execute Mechanism - if execution_mode & pnlvm.ExecutionMode.COMPILED: + if execution_mode.is_compiled(): assert execution_mode & pnlvm.ExecutionMode._PerNode + assert execution_mode.is_cpu_compiled() + _comp_ex.execute_node(node) else: @@ -12480,8 +12486,9 @@ def execute( elif isinstance(node, Composition): - if execution_mode & pnlvm.ExecutionMode.COMPILED: + if execution_mode.is_compiled(): assert execution_mode & pnlvm.ExecutionMode._PerNode + assert execution_mode.is_cpu_compiled() # Invoking nested composition passes data via Python # structures. Make sure all sources get their latest values @@ -12515,16 +12522,18 @@ def execute( # Run node-level compiled nested composition # only if there are no control projections - if execution_mode & pnlvm.ExecutionMode.LLVM and len(node.parameter_CIM.afferents) != 0: + if execution_mode.is_compiled() and len(node.parameter_CIM.afferents) != 0: nested_execution_mode = pnlvm.ExecutionMode.Python else: nested_execution_mode = execution_mode + ret = node.execute(context=context, execution_mode=nested_execution_mode) # Get output info from nested execution - if execution_mode & pnlvm.ExecutionMode.COMPILED: + if execution_mode.is_compiled(): assert execution_mode & pnlvm.ExecutionMode._PerNode + assert execution_mode.is_cpu_compiled() # Update result in binary data structure _comp_ex.insert_node_output(node, ret) @@ -12668,8 +12677,10 @@ def execute( context=context) # Extract result here - if execution_mode & pnlvm.ExecutionMode.COMPILED: + if execution_mode.is_compiled(): assert execution_mode & pnlvm.ExecutionMode._PerNode + assert execution_mode.is_cpu_compiled() + _comp_ex.freeze_values() _comp_ex.execute_node(self.output_CIM) report(self, diff --git a/psyneulink/core/llvm/__init__.py b/psyneulink/core/llvm/__init__.py index 95a22fcc4f..85d3027363 100644 --- a/psyneulink/core/llvm/__init__.py +++ b/psyneulink/core/llvm/__init__.py @@ -74,21 +74,41 @@ class ExecutionMode(enum.Flag): compile and run multiple `TRIAL `\\s using CUDA for GPU. """ - Python = 0 - PyTorch = enum.auto() - LLVM = enum.auto() + Python = 0 + PyTorch = enum.auto() + _LLVM = enum.auto() _PTX = enum.auto() _PerNode = enum.auto() _Exec = enum.auto() _Run = enum.auto() _Fallback = enum.auto() - Auto = _Fallback | _Run | _Exec | LLVM - LLVMRun = LLVM | _Run - _LLVMExec = LLVM | _Exec - _LLVMPerNode = LLVM | _PerNode + Auto = _Fallback | _Run | _Exec | _LLVM PTXRun = _PTX | _Run - COMPILED = ~ (Python | PyTorch) + LLVMRun = _LLVM | _Run + _LLVMExec = _LLVM | _Exec + _LLVMPerNode = _LLVM | _PerNode + + def is_cpu_compiled(self): + is_cpu_compiled = self & self._LLVM + + # assert that only on of CPU and GPU compiled modes is enabled + if is_cpu_compiled: + assert not self & self._PTX + + return is_cpu_compiled + + def is_gpu_compiled(self): + is_gpu_compiled = self & self._PTX + + # assert that only on of CPU and GPU compiled modes is enabled + if is_gpu_compiled: + assert not self & self._LLVM + + return is_gpu_compiled + + def is_compiled(self): + return self.is_cpu_compiled() or self.is_gpu_compiled() _binary_generation = 0 diff --git a/psyneulink/library/compositions/compositionrunner.py b/psyneulink/library/compositions/compositionrunner.py index 4d45ccc6d0..e1f9faf784 100644 --- a/psyneulink/library/compositions/compositionrunner.py +++ b/psyneulink/library/compositions/compositionrunner.py @@ -238,11 +238,13 @@ def run_learning(self, Outputs from the final execution """ - if not (execution_mode & ExecutionMode.COMPILED): - self._is_llvm_mode = False - else: + if execution_mode.is_compiled(): + assert execution_mode.is_cpu_compiled() self._is_llvm_mode = True + else: + self._is_llvm_mode = False + if execution_mode is ExecutionMode.Python and learning_rate is not None: # User learning_rate specified in call to learn, so use that by passing it in runtime_params, # excluding any LearningMechanisms for which learning_rate has been individually specified diff --git a/tests/composition/test_learning.py b/tests/composition/test_learning.py index 9c22e58f55..2a64ca2a8c 100644 --- a/tests/composition/test_learning.py +++ b/tests/composition/test_learning.py @@ -510,7 +510,7 @@ def test_indepedence_of_learning_pathways_using_same_mechs_in_different_comps(se # Use explicit parametrize instead of the autodiff_mode fixture to avoid # applying marks. This test doesn't execute pytorch or compiled mode - @pytest.mark.parametrize('execution_mode', [pnl.ExecutionMode.LLVM, pnl.ExecutionMode.PyTorch]) + @pytest.mark.parametrize('execution_mode', [pnl.ExecutionMode.LLVMRun, pnl.ExecutionMode.PyTorch]) def test_execution_mode_pytorch_and_LLVM_errors(self, execution_mode): A = TransferMechanism(name="learning-process-mech-A") B = TransferMechanism(name="learning-process-mech-B") diff --git a/tests/models/test_greedy_agent.py b/tests/models/test_greedy_agent.py index 89452373e6..3796f38fbb 100644 --- a/tests/models/test_greedy_agent.py +++ b/tests/models/test_greedy_agent.py @@ -103,7 +103,7 @@ def test_simplified_greedy_agent_random(benchmark, comp_mode): def test_predator_prey(benchmark, mode, ocm_mode, prng, samples, fp_type): # Skip large test instances that are not CPU compiled, or executed in parallel. - if len(samples) > 10 and not (mode & pnl.ExecutionMode.LLVM) and ocm_mode not in {'LLVM', 'PTX'}: + if len(samples) > 10 and not mode.is_compiled() and ocm_mode not in {'LLVM', 'PTX'}: pytest.skip("This test takes too long") # Instantiate LLVMBuilderContext using the preferred fp type From f1f8d46c9179a53407df13dfd48b469b3fd5b7bc Mon Sep 17 00:00:00 2001 From: Jan Vesely Date: Fri, 31 Jan 2025 16:42:31 -0500 Subject: [PATCH 8/9] llvm, Composition: Don't execute Python output_CIM in _LLVMPerNode mode Move compiled execution of output_CIM to the same place as Python execution. Do not report end of trial twice in _LLVMPerNode execution mode. Signed-off-by: Jan Vesely --- psyneulink/core/compositions/composition.py | 33 +++++++++++---------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/psyneulink/core/compositions/composition.py b/psyneulink/core/compositions/composition.py index 913a529da1..d359cc4792 100644 --- a/psyneulink/core/compositions/composition.py +++ b/psyneulink/core/compositions/composition.py @@ -12527,8 +12527,7 @@ def execute( else: nested_execution_mode = execution_mode - ret = node.execute(context=context, - execution_mode=nested_execution_mode) + ret = node.execute(context=context, execution_mode=nested_execution_mode) # Get output info from nested execution if execution_mode.is_compiled(): @@ -12632,7 +12631,17 @@ def execute( # Reset context flags context.execution_phase = ContextFlags.PROCESSING - self.output_CIM.execute(context=context) + + if execution_mode.is_compiled(): + assert execution_mode & pnlvm.ExecutionMode._PerNode + assert execution_mode.is_cpu_compiled() + + _comp_ex.freeze_values() + _comp_ex.execute_node(self.output_CIM) + + else: + self.output_CIM.execute(context=context) + context.execution_phase = ContextFlags.IDLE # Animate output_CIM @@ -12676,25 +12685,19 @@ def execute( content='execute_end', context=context) + # UPDATE TIME and RETURN *********************************************************************************** + + execution_scheduler.get_clock(context)._increment_time(TimeScale.TRIAL) + # Extract result here if execution_mode.is_compiled(): assert execution_mode & pnlvm.ExecutionMode._PerNode assert execution_mode.is_cpu_compiled() - _comp_ex.freeze_values() - _comp_ex.execute_node(self.output_CIM) - report(self, - PROGRESS_REPORT, - report_num=report_num, - content='trial_end', - context=context) return _comp_ex.extract_node_output(self.output_CIM) - # UPDATE TIME and RETURN *********************************************************************************** - - execution_scheduler.get_clock(context)._increment_time(TimeScale.TRIAL) - - return self.get_output_values(context) + else: + return self.get_output_values(context) def __call__(self, *args, **kwargs): """Execute Composition if any args are provided; else simply return results of last execution. From f2523c786a7fc999ba04ee2d0c17a405ce4c3665 Mon Sep 17 00:00:00 2001 From: Jan Vesely Date: Fri, 31 Jan 2025 18:15:47 -0500 Subject: [PATCH 9/9] llvm/ExecutionMode: Use identity comparisons Enum members are singletons. Signed-off-by: Jan Vesely --- psyneulink/library/compositions/autodiffcomposition.py | 4 ++-- psyneulink/library/compositions/compositionrunner.py | 4 ++-- tests/composition/test_autodiffcomposition.py | 5 +++-- tests/composition/test_composition.py | 2 +- tests/composition/test_control.py | 8 ++++---- tests/models/test_greedy_agent.py | 4 ++-- tests/ports/test_output_ports.py | 3 +-- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/psyneulink/library/compositions/autodiffcomposition.py b/psyneulink/library/compositions/autodiffcomposition.py index 2909fee763..a00e1c19fb 100644 --- a/psyneulink/library/compositions/autodiffcomposition.py +++ b/psyneulink/library/compositions/autodiffcomposition.py @@ -949,7 +949,7 @@ def create_pathway(node)->list: if node not in self.get_nodes_by_role(NodeRole.TARGET) for pathway in _get_pytorch_backprop_pathway(node)] - if execution_mode == pnlvm.ExecutionMode.PyTorch: + if execution_mode is pnlvm.ExecutionMode.PyTorch: # For PyTorch mode, only need to construct dummy TARGET Nodes, to allow targets to be: # - specified in the same way as for other execution_modes # - trial-by-trial values kept aligned with inputs in batch / minibatch construction @@ -1073,7 +1073,7 @@ def autodiff_forward(self, inputs, targets, before the next time it calls run(), in a call to backward() by do_gradient_optimization() in _batch_inputs() or _batch_function_inputs(), """ - assert execution_mode == pnlvm.ExecutionMode.PyTorch + assert execution_mode is pnlvm.ExecutionMode.PyTorch pytorch_rep = self.parameters.pytorch_representation._get(context) # --------- Do forward computation on current inputs ------------------------------------------------- diff --git a/psyneulink/library/compositions/compositionrunner.py b/psyneulink/library/compositions/compositionrunner.py index e1f9faf784..2468467c07 100644 --- a/psyneulink/library/compositions/compositionrunner.py +++ b/psyneulink/library/compositions/compositionrunner.py @@ -359,7 +359,7 @@ def run_learning(self, **kwargs) skip_initialization = True - if execution_mode == ExecutionMode.PyTorch: + if execution_mode is ExecutionMode.PyTorch: pytorch_rep = (self._composition.parameters.pytorch_representation._get(context). copy_weights_to_psyneulink(context)) if pytorch_rep and synch_with_pnl_options[MATRIX_WEIGHTS] == MINIBATCH: @@ -372,7 +372,7 @@ def run_learning(self, self._composition.parameters.results.get(context)[-1 * num_epoch_results:], context) # return result of last *trial* (as usual for a call to run) - if execution_mode == ExecutionMode.PyTorch and synch_with_pnl_options[MATRIX_WEIGHTS] == EPOCH: + if execution_mode is ExecutionMode.PyTorch and synch_with_pnl_options[MATRIX_WEIGHTS] == EPOCH: # Copy weights at end of learning run pytorch_rep.copy_weights_to_psyneulink(context) diff --git a/tests/composition/test_autodiffcomposition.py b/tests/composition/test_autodiffcomposition.py index 4b8c5255d3..5d61836e37 100644 --- a/tests/composition/test_autodiffcomposition.py +++ b/tests/composition/test_autodiffcomposition.py @@ -2907,12 +2907,13 @@ def test_optimizer_specs(self, learning_rate, weight_decay, optimizer_type, expe # fp32 results are different due to rounding if pytest.helpers.llvm_current_fp_precision() == 'fp32' and \ - autodiff_mode != pnl.ExecutionMode.PyTorch and \ + autodiff_mode is not pnl.ExecutionMode.PyTorch and \ optimizer_type == 'sgd' and \ learning_rate == 10: expected = [[[0.9918830394744873]], [[0.9982172846794128]], [[0.9978305697441101]], [[0.9994590878486633]]] + # FIXME: LLVM version is broken with learning rate == 1.5 - if learning_rate != 1.5 or autodiff_mode == pnl.ExecutionMode.PyTorch: + if learning_rate != 1.5 or autodiff_mode is pnl.ExecutionMode.PyTorch: np.testing.assert_allclose(results, expected) diff --git a/tests/composition/test_composition.py b/tests/composition/test_composition.py index 65df4cd2a4..d1d404d725 100644 --- a/tests/composition/test_composition.py +++ b/tests/composition/test_composition.py @@ -4274,7 +4274,7 @@ def test_one_time_warning_for_run_with_no_inputs(self): comp.run() def _check_comp_ex(self, comp, comparison, comp_mode, struct_name, context=None, is_not=False): - if comp_mode == pnl.ExecutionMode.Python: + if comp_mode is pnl.ExecutionMode.Python: return if context is None: diff --git a/tests/composition/test_control.py b/tests/composition/test_control.py index d3f070d729..a3e5a76bcf 100644 --- a/tests/composition/test_control.py +++ b/tests/composition/test_control.py @@ -2469,7 +2469,7 @@ def test_modulation_simple(self, cost, expected, exp_values, comp_mode): ret = comp.run(inputs={mech: [2]}, num_trials=1, execution_mode=comp_mode) np.testing.assert_allclose(ret, expected) - if comp_mode == pnl.ExecutionMode.Python: + if comp_mode is pnl.ExecutionMode.Python: np.testing.assert_allclose(comp.controller.function.saved_values.flatten(), exp_values) @pytest.mark.benchmark @@ -2531,7 +2531,7 @@ def test_modulation_of_random_state_DDM(self, comp_mode, benchmark, prng): benchmark(comp.run, inputs={ctl_mech:seeds, mech:5.0}, num_trials=len(seeds) * 2, execution_mode=comp_mode) # Python uses fp64 irrespective of the pytest precision setting - precision = 'fp64' if comp_mode == pnl.ExecutionMode.Python else pytest.helpers.llvm_current_fp_precision() + precision = 'fp64' if comp_mode is pnl.ExecutionMode.Python else pytest.helpers.llvm_current_fp_precision() if prng == 'Default': np.testing.assert_allclose(np.squeeze(comp.results[:len(seeds) * 2]), [[100, 21], [100, 23], [100, 20]] * 2) elif prng == 'Philox' and precision == 'fp64': @@ -2644,7 +2644,7 @@ def test_modulation_of_random_state_DDM_Analytical(self, comp_mode, benchmark, p benchmark(comp.run, inputs={ctl_mech:seeds, mech:0.1}, num_trials=len(seeds) * 2, execution_mode=comp_mode) # Python uses fp64 irrespective of the pytest precision setting - precision = 'fp64' if comp_mode == pnl.ExecutionMode.Python else pytest.helpers.llvm_current_fp_precision() + precision = 'fp64' if comp_mode is pnl.ExecutionMode.Python else pytest.helpers.llvm_current_fp_precision() if prng == 'Default': np.testing.assert_allclose(np.squeeze(comp.results[:len(seeds) * 2]), [[-1, 3.99948962], [1, 3.99948962], [-1, 3.99948962]] * 2) elif prng == 'Philox' and precision == 'fp64': @@ -3359,7 +3359,7 @@ def comp_run(inputs, execution_mode): results, saved_values = benchmark(comp_run, inputs, mode) np.testing.assert_array_equal(results, result) - if mode == pnl.ExecutionMode.Python: + if mode is pnl.ExecutionMode.Python: np.testing.assert_array_equal(saved_values.flatten(), [0.75, 1.5, 2.25]) def test_model_based_ocm_with_buffer(self): diff --git a/tests/models/test_greedy_agent.py b/tests/models/test_greedy_agent.py index 3796f38fbb..ae9de598ec 100644 --- a/tests/models/test_greedy_agent.py +++ b/tests/models/test_greedy_agent.py @@ -214,7 +214,7 @@ def action_fn(variable): # np.testing.assert_allclose(run_results, [[0.9705216285127504, -0.1343332460369043]]) np.testing.assert_allclose(run_results, [[0.9705216285127504, -0.1343332460369043]], atol=1e-6, rtol=1e-6) elif prng == 'Philox': - if mode == pnl.ExecutionMode.Python or pytest.helpers.llvm_current_fp_precision() == 'fp64': + if mode is pnl.ExecutionMode.Python or pytest.helpers.llvm_current_fp_precision() == 'fp64': # np.testing.assert_allclose(run_results[0], [[-0.16882940384606543, -0.07280074899749223]]) np.testing.assert_allclose(run_results, [[-0.16882940384606543, -0.07280074899749223]]) elif pytest.helpers.llvm_current_fp_precision() == 'fp32': @@ -225,7 +225,7 @@ def action_fn(variable): else: assert False, "Unknown PRNG!" - if mode == pnl.ExecutionMode.Python and not benchmark.enabled: + if mode is pnl.ExecutionMode.Python and not benchmark.enabled: # FIXME: The results are 'close' for both Philox and MT, # because they're dominated by costs # FIX: Requires 1e-5 tolerance diff --git a/tests/ports/test_output_ports.py b/tests/ports/test_output_ports.py index 5c200e86a7..d70b0ea702 100644 --- a/tests/ports/test_output_ports.py +++ b/tests/ports/test_output_ports.py @@ -46,8 +46,7 @@ def test_output_port_variable_spec(self, mech_mode): ], ids=lambda x: str(x) if len(x) != 1 else '') @pytest.mark.usefixtures("comp_mode_no_per_node") def tests_output_port_variable_spec_composition(self, comp_mode, spec, expected1, expected2): - if (len(spec) == 2) and (spec[1] == pnl.TimeScale.RUN) and \ - ((comp_mode & pnl.ExecutionMode._Exec) == pnl.ExecutionMode._Exec): + if (len(spec) == 2) and (spec[1] == pnl.TimeScale.RUN) and (comp_mode & pnl.ExecutionMode._Exec): pytest.skip("{} is not supported in {}".format(spec[1], comp_mode)) # Test specification of OutputPort's variable