llvm: Refactor ExecutionMode handling (#3182)

Make most compiled modes private for test/development use only. Introduce a new _LLVMPerNode execution mode. Do not fall back to per-node mode in automatic fallback. Use ExecutionMode helper methods rather than COMPILED mask to determine compiled mode.
PrincetonUniversity · Feb 1, 2025 · 30c89d5 · 30c89d5
2 parents d12866f + f2523c7
commit 30c89d5
Show file tree

Hide file tree

Showing 19 changed files with 188 additions and 156 deletions.
diff --git a/conftest.py b/conftest.py
@@ -71,8 +71,7 @@ def pytest_runtest_setup(item):
 def pytest_generate_tests(metafunc):
     mech_and_func_modes = ['Python',
                            pytest.param('LLVM', marks=pytest.mark.llvm),
-                           pytest.param('PTX', marks=[pytest.mark.llvm,
-                                                      pytest.mark.cuda])
+                           pytest.param('PTX', marks=[pytest.mark.llvm, pytest.mark.cuda])
                           ]
 
     if "func_mode" in metafunc.fixturenames:
@@ -81,9 +80,9 @@ def pytest_generate_tests(metafunc):
     if "mech_mode" in metafunc.fixturenames:
         metafunc.parametrize("mech_mode", mech_and_func_modes)
 
-    if "comp_mode_no_llvm" in metafunc.fixturenames:
+    if "comp_mode_no_per_node" in metafunc.fixturenames:
         modes = [m for m in get_comp_execution_modes()
-                 if m.values[0] is not pnlvm.ExecutionMode.LLVM]
+                 if m.values[0] is not pnlvm.ExecutionMode._LLVMPerNode]
         metafunc.parametrize("comp_mode", modes)
 
     elif "comp_mode" in metafunc.fixturenames:
@@ -151,7 +150,7 @@ def pytest_runtest_teardown(item):
     pnlvm.cleanup("llvm" in item.keywords and not skip_cleanup_check)
 
 @pytest.fixture
-def comp_mode_no_llvm():
+def comp_mode_no_per_node():
     # dummy fixture to allow 'comp_mode' filtering
     pass
 
@@ -187,8 +186,8 @@ def llvm_current_fp_precision():
 @pytest.helpers.register
 def get_comp_execution_modes():
     return [pytest.param(pnlvm.ExecutionMode.Python),
-            pytest.param(pnlvm.ExecutionMode.LLVM, marks=pytest.mark.llvm),
-            pytest.param(pnlvm.ExecutionMode.LLVMExec, marks=pytest.mark.llvm),
+            pytest.param(pnlvm.ExecutionMode._LLVMPerNode, marks=pytest.mark.llvm),
+            pytest.param(pnlvm.ExecutionMode._LLVMExec, marks=pytest.mark.llvm),
             pytest.param(pnlvm.ExecutionMode.LLVMRun, marks=pytest.mark.llvm),
             pytest.param(pnlvm.ExecutionMode.PTXRun, marks=[pytest.mark.llvm,  pytest.mark.cuda])
            ]

diff --git a/docs/source/Compilation.rst b/docs/source/Compilation.rst
@@ -33,8 +33,8 @@ Use
 Compiled form of a model can be invoked by passing one of the following values to the `bin_execute` parameter of `Composition.run`, or `Composition.exec`:
 
   * `ExecutionMode.Python`: Normal python execution
-  * `ExecutionMode.LLVM`: Compile and execute individual nodes. The scheduling loop still runs in Python. If any of the nodes fails to compile, an error is raised. *NOTE:* Schedules that require access to node data will not work correctly.
-  * `ExecutionMode.LLVMExec`: Execution of `Composition.exec` is replaced by a compiled equivalent. If the `Composition` fails to compile, an error is raised.
+  * `ExecutionMode._LLVMPerNode`: Compile and execute individual nodes. The scheduling loop still runs in Python. If any of the nodes fails to compile, an error is raised. *NOTE:* Schedules that require access to node data will not work correctly.
+  * `ExecutionMode._LLVMExec`: Execution of `Composition.exec` is replaced by a compiled equivalent. If the `Composition` fails to compile, an error is raised.
   * `ExecutionMode.LLVMRun`: Execution of `Composition.run` is replaced by a compiled equivalent. If the `Composition` fails to compile, an error is raised.
   * `ExecutionMode.Auto`: This option attempts all three above mentioned granularities, and gracefully falls back to lower granularity. Warnings are raised in place of errors. This is the recommended way to invoke compiled execution as the final fallback is the Python baseline.
 

diff --git a/psyneulink/core/compositions/composition.py b/psyneulink/core/compositions/composition.py
@@ -1122,7 +1122,7 @@
       an AutodffComposition to be run in any mode (e.g., for comparison and/or compatibility purposes).
 
   .. warning::
-    * `ExecutionMode.LLVM` and `ExecutionMode.PyTorch` can only be used in the `learn <AutodiffComposition.learn>`
+    * `ExecutionMode.LLVMRun` and `ExecutionMode.PyTorch` can only be used in the `learn <AutodiffComposition.learn>`
       method of an `AutodiffComposition`;  specifying them in the `learn <Composition.learn>`()` method of a standard
       `Composition` causes an error.
 
@@ -2060,21 +2060,22 @@ def input_function(env, result):
 
 .. _Composition_Compilation_Modes:
 
-    * *True* -- try to use the one that yields the greatesst improvement, progressively reverting to less powerful
-      but more forgiving modes, in the order listed below, for each that fails;
+    * *True* -- try to use the one that yields the greatest improvement, progressively reverting to less powerful
+      but more forgiving modes, trying LLVMRun, _LLVMExec, and Python.
 
     * `ExecutionMode.LLVMRun` - compile and run multiple `TRIAL <TimeScale.TRIAL>`\\s; if successful,
       the compiled binary is semantically equivalent to the execution of the `run <Composition.run>` method
       using the Python interpreter;
 
-    * `ExecutionMode.LLVMExec` -- compile and run each `TRIAL <TimeScale.TRIAL>`, using the Python interpreter
+    * `ExecutionMode._LLVMExec` -- compile and run each `TRIAL <TimeScale.TRIAL>`, using the Python interpreter
       to iterate over them; if successful, the compiled binary for each `TRIAL <TimeScale.TRIAL>` is semantically
       equivalent the execution of the `execute <Composition.execute>` method using the Python interpreter;
+      This mode does not support Trial scope scheduling rules and should not be used outside of development or testing.
 
-    * `ExecutionMode.LLVM` -- compile and run `Node <Composition_Nodes>` of the `Composition` and their `Projections
+    * `ExecutionMode._LLVMPerNode` -- compile and run `Node <Composition_Nodes>` of the `Composition` and their `Projections
       <Projection>`, using the Python interpreter to call the Composition's `scheduler <Composition.scheduler>`,
       execute each Node and iterate over `TRIAL <TimeScale.TRIAL>`\\s; note that, in this mode, scheduling
-      `Conditions <Condition>` that rely on Node `Parameters` is not supported;
+      `Conditions <Condition>` that rely on Node `Parameters` are not supported;
 
     * `ExecutionMode.Python` (same as *False*; the default) -- use the Python interpreter to execute the `Composition`.
 
@@ -2087,7 +2088,7 @@ def input_function(env, result):
          using it with a standard `Composition` is possible, but it will **not** have the expected effect of
          executing its `learn <Composition.learn>` method using PyTorch.
 
-    * `ExecutionMode.PTXrun` -- compile multiple `TRIAL <TimeScale.TRIAL>`\\s  for execution on GPU
+    * `ExecutionMode.PTXRun` -- compile multiple `TRIAL <TimeScale.TRIAL>`\\s  for execution on GPU
       (see `below <Composition_Compilation_PTX>` for additional details).
 
 .. _Composition_Compilation_PyTorch:
@@ -11430,9 +11431,9 @@ def run(
                 try:
                     comp_ex_tags = frozenset({"learning"}) if self._is_learning(context) else frozenset()
                     _comp_ex = pnlvm.CompExecution.get(self, context, additional_tags=comp_ex_tags)
-                    if execution_mode & pnlvm.ExecutionMode.LLVM:
+                    if execution_mode.is_cpu_compiled():
                         results += _comp_ex.run(inputs, num_trials, num_inputs_sets)
-                    elif execution_mode & pnlvm.ExecutionMode.PTX:
+                    elif execution_mode.is_gpu_compiled():
                         results += _comp_ex.cuda_run(inputs, num_trials, num_inputs_sets)
                     else:
                         assert False, "Unknown execution mode: {}".format(execution_mode)
@@ -11835,14 +11836,16 @@ def _execute_controller(self,
                            context=context,
                            node=self.controller)
 
-                if self.controller and not execution_mode & pnlvm.ExecutionMode.COMPILED:
+                if self.controller and not execution_mode.is_compiled():
+
                     context.execution_phase = ContextFlags.PROCESSING
                     self.controller.execute(context=context)
 
                 else:
-                    assert (execution_mode == pnlvm.ExecutionMode.LLVM
-                            or execution_mode & pnlvm.ExecutionMode._Fallback),\
+                    assert execution_mode & pnlvm.ExecutionMode._PerNode
+                    assert execution_mode.is_cpu_compiled(), \
                         f"PROGRAM ERROR: Unrecognized compiled execution_mode: '{execution_mode}'."
+
                     _comp_ex.freeze_values()
                     _comp_ex.execute_node(self.controller)
 
@@ -12054,30 +12057,27 @@ def execute(
                     self._initialize_from_context(context, base_context, override=False)
                     context.composition = self
 
-            # Run compiled execution (if compiled execution was requested
+            # Try compiled execution (if compiled execution was requested)
             # NOTE: This should be as high up as possible,
             # but still after the context has been initialized
-            if execution_mode & pnlvm.ExecutionMode.COMPILED:
-                is_simulation = (context is not None and
-                                 ContextFlags.SIMULATION_MODE in context.runmode)
-                # Try running in Exec mode first
-                if (execution_mode & pnlvm.ExecutionMode._Exec):
-                    # There's no mode to execute simulations.
+            if execution_mode.is_compiled():
+
+                assert execution_mode.is_cpu_compiled(), "Unsupported execution mode: {}".format(execution_mode)
+
+                is_simulation = (context is not None and ContextFlags.SIMULATION_MODE in context.runmode)
+
+                _comp_ex = pnlvm.CompExecution.get(self, context)
+
+                if execution_mode & pnlvm.ExecutionMode._Exec:
+                    # There's no mode to execute compiled simulations.
                     # Simulations are run as part of the controller node wrapper.
                     assert not is_simulation
+
                     try:
                         llvm_inputs = self._validate_execution_inputs(inputs)
-                        _comp_ex = pnlvm.CompExecution.get(self, context)
-                        if execution_mode & pnlvm.ExecutionMode.LLVM:
-                            _comp_ex.execute(llvm_inputs)
-                        else:
-                            assert False, "Unknown execution mode: {}".format(execution_mode)
+                        _comp_ex.execute(llvm_inputs)
 
-                        report(self,
-                               PROGRESS_REPORT,
-                               report_num=report_num,
-                               content='trial_end',
-                               context=context)
+                        report(self, PROGRESS_REPORT, report_num=report_num, content='trial_end', context=context)
 
                         self._propagate_most_recent_context(context)
                         return _comp_ex.extract_node_output(self.output_CIM)
@@ -12086,27 +12086,18 @@ def execute(
                         if not execution_mode & pnlvm.ExecutionMode._Fallback:
                             raise e from None
 
-                        warnings.warn("Failed to execute `{}': {}".format(self.name, str(e)))
+                        warnings.warn("Failed to compile wrapper for `{}' in `{}': {}".format(self.name, self.name, str(e)))
+                        execution_mode = pnlvm.ExecutionMode.Python
 
-                # Exec failed for some reason, we can still try node level execution_mode
-                # Filter out nested compositions. They are not executed in this mode
-                # Filter out controller if running simulation.
-                mechanisms = (n for n in self._all_nodes
-                              if isinstance(n, Mechanism) and
-                                 (n is not self.controller or not is_simulation))
+                elif execution_mode & pnlvm.ExecutionMode._PerNode:
 
-                assert execution_mode & pnlvm.ExecutionMode.LLVM
-                try:
-                    _comp_ex = pnlvm.CompExecution.get(self, context)
                     # Compile all mechanism wrappers
-                    for m in mechanisms:
-                        _comp_ex._set_bin_node(m)
-                except Exception as e:
-                    if not execution_mode & pnlvm.ExecutionMode._Fallback:
-                        raise e from None
+                    for m in self._all_nodes:
+                        if isinstance(m, Mechanism) and not (m is self.controller and is_simulation):
+                            _comp_ex._set_bin_node(m)
 
-                    warnings.warn("Failed to compile wrapper for `{}' in `{}': {}".format(m.name, self.name, str(e)))
-                    execution_mode = pnlvm.ExecutionMode.Python
+                else:
+                    assert False, "Unsupported execution mode: {}".format(execution_mode)
 
 
             # Generate first frame of animation without any active_items
@@ -12171,12 +12162,15 @@ def execute(
                 inputs = self._validate_execution_inputs(inputs)
                 build_CIM_input = self._build_variable_for_input_CIM(inputs)
 
-            if execution_mode & pnlvm.ExecutionMode.COMPILED:
-                _comp_ex.execute_node(self.input_CIM, inputs)
+            if execution_mode.is_compiled():
                 # FIXME: parameter_CIM should be executed here as well,
                 #        but node execution of nested compositions with
                 #        outside control is not supported yet.
                 assert not self.is_nested or len(self.parameter_CIM.afferents) == 0
+                assert execution_mode & pnlvm.ExecutionMode._PerNode
+                assert execution_mode.is_cpu_compiled()
+
+                _comp_ex.execute_node(self.input_CIM, inputs)
 
             elif self.is_nested:
                 simulation = ContextFlags.SIMULATION_MODE in context.runmode
@@ -12375,7 +12369,10 @@ def execute(
                 # This ensures that the order in which nodes execute does not affect the results of this timestep
                 frozen_values = {}
                 new_values = {}
-                if execution_mode & pnlvm.ExecutionMode.COMPILED:
+                if execution_mode.is_compiled():
+                    assert execution_mode & pnlvm.ExecutionMode._PerNode
+                    assert execution_mode.is_cpu_compiled()
+
                     _comp_ex.freeze_values()
 
                 # PURGE LEARNING IF NOT ENABLED ----------------------------------------------------------------
@@ -12458,8 +12455,12 @@ def execute(
                                 context.replace_flag(ContextFlags.PROCESSING, ContextFlags.LEARNING)
 
                         # Execute Mechanism
-                        if execution_mode & pnlvm.ExecutionMode.COMPILED:
+                        if execution_mode.is_compiled():
+                            assert execution_mode & pnlvm.ExecutionMode._PerNode
+                            assert execution_mode.is_cpu_compiled()
+
                             _comp_ex.execute_node(node)
+
                         else:
                             if node is not self.controller:
                                 mech_context = copy(context)
@@ -12485,7 +12486,10 @@ def execute(
 
                     elif isinstance(node, Composition):
 
-                        if execution_mode & pnlvm.ExecutionMode.COMPILED:
+                        if execution_mode.is_compiled():
+                            assert execution_mode & pnlvm.ExecutionMode._PerNode
+                            assert execution_mode.is_cpu_compiled()
+
                             # Invoking nested composition passes data via Python
                             # structures. Make sure all sources get their latest values
                             srcs = (proj.sender.owner for proj in node.input_CIM.afferents)
@@ -12518,15 +12522,18 @@ def execute(
 
                         # Run node-level compiled nested composition
                         # only if there are no control projections
-                        if execution_mode == pnlvm.ExecutionMode.LLVM and len(node.parameter_CIM.afferents) != 0:
+                        if execution_mode.is_compiled() and len(node.parameter_CIM.afferents) != 0:
                             nested_execution_mode = pnlvm.ExecutionMode.Python
                         else:
                             nested_execution_mode = execution_mode
-                        ret = node.execute(context=context,
-                                           execution_mode=nested_execution_mode)
+
+                        ret = node.execute(context=context, execution_mode=nested_execution_mode)
 
                         # Get output info from nested execution
-                        if execution_mode & pnlvm.ExecutionMode.COMPILED:
+                        if execution_mode.is_compiled():
+                            assert execution_mode & pnlvm.ExecutionMode._PerNode
+                            assert execution_mode.is_cpu_compiled()
+
                             # Update result in binary data structure
                             _comp_ex.insert_node_output(node, ret)
 
@@ -12624,7 +12631,17 @@ def execute(
 
             # Reset context flags
             context.execution_phase = ContextFlags.PROCESSING
-            self.output_CIM.execute(context=context)
+
+            if execution_mode.is_compiled():
+                assert execution_mode & pnlvm.ExecutionMode._PerNode
+                assert execution_mode.is_cpu_compiled()
+
+                _comp_ex.freeze_values()
+                _comp_ex.execute_node(self.output_CIM)
+
+            else:
+                self.output_CIM.execute(context=context)
+
             context.execution_phase = ContextFlags.IDLE
 
             # Animate output_CIM
@@ -12668,22 +12685,19 @@ def execute(
                        content='execute_end',
                        context=context)
 
-            # Extract result here
-            if execution_mode & pnlvm.ExecutionMode.COMPILED:
-                _comp_ex.freeze_values()
-                _comp_ex.execute_node(self.output_CIM)
-                report(self,
-                       PROGRESS_REPORT,
-                       report_num=report_num,
-                       content='trial_end',
-                       context=context)
-                return _comp_ex.extract_node_output(self.output_CIM)
-
             # UPDATE TIME and RETURN ***********************************************************************************
 
             execution_scheduler.get_clock(context)._increment_time(TimeScale.TRIAL)
 
-            return self.get_output_values(context)
+            # Extract result here
+            if execution_mode.is_compiled():
+                assert execution_mode & pnlvm.ExecutionMode._PerNode
+                assert execution_mode.is_cpu_compiled()
+
+                return _comp_ex.extract_node_output(self.output_CIM)
+
+            else:
+                return self.get_output_values(context)
 
     def __call__(self, *args, **kwargs):
         """Execute Composition if any args are provided; else simply return results of last execution.