Skip to content

Commit

Permalink
llvm: Refactor ExecutionMode handling (#3182)
Browse files Browse the repository at this point in the history
Make most compiled modes private for test/development use only.
Introduce a new _LLVMPerNode execution mode.
Do not fall back to per-node mode in automatic fallback.
Use ExecutionMode helper methods rather than COMPILED mask to determine compiled mode.
  • Loading branch information
jvesely authored Feb 1, 2025
2 parents d12866f + f2523c7 commit 30c89d5
Show file tree
Hide file tree
Showing 19 changed files with 188 additions and 156 deletions.
13 changes: 6 additions & 7 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,7 @@ def pytest_runtest_setup(item):
def pytest_generate_tests(metafunc):
mech_and_func_modes = ['Python',
pytest.param('LLVM', marks=pytest.mark.llvm),
pytest.param('PTX', marks=[pytest.mark.llvm,
pytest.mark.cuda])
pytest.param('PTX', marks=[pytest.mark.llvm, pytest.mark.cuda])
]

if "func_mode" in metafunc.fixturenames:
Expand All @@ -81,9 +80,9 @@ def pytest_generate_tests(metafunc):
if "mech_mode" in metafunc.fixturenames:
metafunc.parametrize("mech_mode", mech_and_func_modes)

if "comp_mode_no_llvm" in metafunc.fixturenames:
if "comp_mode_no_per_node" in metafunc.fixturenames:
modes = [m for m in get_comp_execution_modes()
if m.values[0] is not pnlvm.ExecutionMode.LLVM]
if m.values[0] is not pnlvm.ExecutionMode._LLVMPerNode]
metafunc.parametrize("comp_mode", modes)

elif "comp_mode" in metafunc.fixturenames:
Expand Down Expand Up @@ -151,7 +150,7 @@ def pytest_runtest_teardown(item):
pnlvm.cleanup("llvm" in item.keywords and not skip_cleanup_check)

@pytest.fixture
def comp_mode_no_llvm():
def comp_mode_no_per_node():
# dummy fixture to allow 'comp_mode' filtering
pass

Expand Down Expand Up @@ -187,8 +186,8 @@ def llvm_current_fp_precision():
@pytest.helpers.register
def get_comp_execution_modes():
return [pytest.param(pnlvm.ExecutionMode.Python),
pytest.param(pnlvm.ExecutionMode.LLVM, marks=pytest.mark.llvm),
pytest.param(pnlvm.ExecutionMode.LLVMExec, marks=pytest.mark.llvm),
pytest.param(pnlvm.ExecutionMode._LLVMPerNode, marks=pytest.mark.llvm),
pytest.param(pnlvm.ExecutionMode._LLVMExec, marks=pytest.mark.llvm),
pytest.param(pnlvm.ExecutionMode.LLVMRun, marks=pytest.mark.llvm),
pytest.param(pnlvm.ExecutionMode.PTXRun, marks=[pytest.mark.llvm, pytest.mark.cuda])
]
Expand Down
4 changes: 2 additions & 2 deletions docs/source/Compilation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ Use
Compiled form of a model can be invoked by passing one of the following values to the `bin_execute` parameter of `Composition.run`, or `Composition.exec`:

* `ExecutionMode.Python`: Normal python execution
* `ExecutionMode.LLVM`: Compile and execute individual nodes. The scheduling loop still runs in Python. If any of the nodes fails to compile, an error is raised. *NOTE:* Schedules that require access to node data will not work correctly.
* `ExecutionMode.LLVMExec`: Execution of `Composition.exec` is replaced by a compiled equivalent. If the `Composition` fails to compile, an error is raised.
* `ExecutionMode._LLVMPerNode`: Compile and execute individual nodes. The scheduling loop still runs in Python. If any of the nodes fails to compile, an error is raised. *NOTE:* Schedules that require access to node data will not work correctly.
* `ExecutionMode._LLVMExec`: Execution of `Composition.exec` is replaced by a compiled equivalent. If the `Composition` fails to compile, an error is raised.
* `ExecutionMode.LLVMRun`: Execution of `Composition.run` is replaced by a compiled equivalent. If the `Composition` fails to compile, an error is raised.
* `ExecutionMode.Auto`: This option attempts all three above mentioned granularities, and gracefully falls back to lower granularity. Warnings are raised in place of errors. This is the recommended way to invoke compiled execution as the final fallback is the Python baseline.

Expand Down
150 changes: 82 additions & 68 deletions psyneulink/core/compositions/composition.py
Original file line number Diff line number Diff line change
Expand Up @@ -1122,7 +1122,7 @@
an AutodffComposition to be run in any mode (e.g., for comparison and/or compatibility purposes).

.. warning::
* `ExecutionMode.LLVM` and `ExecutionMode.PyTorch` can only be used in the `learn <AutodiffComposition.learn>`
* `ExecutionMode.LLVMRun` and `ExecutionMode.PyTorch` can only be used in the `learn <AutodiffComposition.learn>`
method of an `AutodiffComposition`; specifying them in the `learn <Composition.learn>`()` method of a standard
`Composition` causes an error.

Expand Down Expand Up @@ -2060,21 +2060,22 @@ def input_function(env, result):

.. _Composition_Compilation_Modes:

* *True* -- try to use the one that yields the greatesst improvement, progressively reverting to less powerful
but more forgiving modes, in the order listed below, for each that fails;
* *True* -- try to use the one that yields the greatest improvement, progressively reverting to less powerful
but more forgiving modes, trying LLVMRun, _LLVMExec, and Python.

* `ExecutionMode.LLVMRun` - compile and run multiple `TRIAL <TimeScale.TRIAL>`\\s; if successful,
the compiled binary is semantically equivalent to the execution of the `run <Composition.run>` method
using the Python interpreter;

* `ExecutionMode.LLVMExec` -- compile and run each `TRIAL <TimeScale.TRIAL>`, using the Python interpreter
* `ExecutionMode._LLVMExec` -- compile and run each `TRIAL <TimeScale.TRIAL>`, using the Python interpreter
to iterate over them; if successful, the compiled binary for each `TRIAL <TimeScale.TRIAL>` is semantically
equivalent the execution of the `execute <Composition.execute>` method using the Python interpreter;
This mode does not support Trial scope scheduling rules and should not be used outside of development or testing.

* `ExecutionMode.LLVM` -- compile and run `Node <Composition_Nodes>` of the `Composition` and their `Projections
* `ExecutionMode._LLVMPerNode` -- compile and run `Node <Composition_Nodes>` of the `Composition` and their `Projections
<Projection>`, using the Python interpreter to call the Composition's `scheduler <Composition.scheduler>`,
execute each Node and iterate over `TRIAL <TimeScale.TRIAL>`\\s; note that, in this mode, scheduling
`Conditions <Condition>` that rely on Node `Parameters` is not supported;
`Conditions <Condition>` that rely on Node `Parameters` are not supported;

* `ExecutionMode.Python` (same as *False*; the default) -- use the Python interpreter to execute the `Composition`.

Expand All @@ -2087,7 +2088,7 @@ def input_function(env, result):
using it with a standard `Composition` is possible, but it will **not** have the expected effect of
executing its `learn <Composition.learn>` method using PyTorch.

* `ExecutionMode.PTXrun` -- compile multiple `TRIAL <TimeScale.TRIAL>`\\s for execution on GPU
* `ExecutionMode.PTXRun` -- compile multiple `TRIAL <TimeScale.TRIAL>`\\s for execution on GPU
(see `below <Composition_Compilation_PTX>` for additional details).

.. _Composition_Compilation_PyTorch:
Expand Down Expand Up @@ -11430,9 +11431,9 @@ def run(
try:
comp_ex_tags = frozenset({"learning"}) if self._is_learning(context) else frozenset()
_comp_ex = pnlvm.CompExecution.get(self, context, additional_tags=comp_ex_tags)
if execution_mode & pnlvm.ExecutionMode.LLVM:
if execution_mode.is_cpu_compiled():
results += _comp_ex.run(inputs, num_trials, num_inputs_sets)
elif execution_mode & pnlvm.ExecutionMode.PTX:
elif execution_mode.is_gpu_compiled():
results += _comp_ex.cuda_run(inputs, num_trials, num_inputs_sets)
else:
assert False, "Unknown execution mode: {}".format(execution_mode)
Expand Down Expand Up @@ -11835,14 +11836,16 @@ def _execute_controller(self,
context=context,
node=self.controller)

if self.controller and not execution_mode & pnlvm.ExecutionMode.COMPILED:
if self.controller and not execution_mode.is_compiled():

context.execution_phase = ContextFlags.PROCESSING
self.controller.execute(context=context)

else:
assert (execution_mode == pnlvm.ExecutionMode.LLVM
or execution_mode & pnlvm.ExecutionMode._Fallback),\
assert execution_mode & pnlvm.ExecutionMode._PerNode
assert execution_mode.is_cpu_compiled(), \
f"PROGRAM ERROR: Unrecognized compiled execution_mode: '{execution_mode}'."

_comp_ex.freeze_values()
_comp_ex.execute_node(self.controller)

Expand Down Expand Up @@ -12054,30 +12057,27 @@ def execute(
self._initialize_from_context(context, base_context, override=False)
context.composition = self

# Run compiled execution (if compiled execution was requested
# Try compiled execution (if compiled execution was requested)
# NOTE: This should be as high up as possible,
# but still after the context has been initialized
if execution_mode & pnlvm.ExecutionMode.COMPILED:
is_simulation = (context is not None and
ContextFlags.SIMULATION_MODE in context.runmode)
# Try running in Exec mode first
if (execution_mode & pnlvm.ExecutionMode._Exec):
# There's no mode to execute simulations.
if execution_mode.is_compiled():

assert execution_mode.is_cpu_compiled(), "Unsupported execution mode: {}".format(execution_mode)

is_simulation = (context is not None and ContextFlags.SIMULATION_MODE in context.runmode)

_comp_ex = pnlvm.CompExecution.get(self, context)

if execution_mode & pnlvm.ExecutionMode._Exec:
# There's no mode to execute compiled simulations.
# Simulations are run as part of the controller node wrapper.
assert not is_simulation

try:
llvm_inputs = self._validate_execution_inputs(inputs)
_comp_ex = pnlvm.CompExecution.get(self, context)
if execution_mode & pnlvm.ExecutionMode.LLVM:
_comp_ex.execute(llvm_inputs)
else:
assert False, "Unknown execution mode: {}".format(execution_mode)
_comp_ex.execute(llvm_inputs)

report(self,
PROGRESS_REPORT,
report_num=report_num,
content='trial_end',
context=context)
report(self, PROGRESS_REPORT, report_num=report_num, content='trial_end', context=context)

self._propagate_most_recent_context(context)
return _comp_ex.extract_node_output(self.output_CIM)
Expand All @@ -12086,27 +12086,18 @@ def execute(
if not execution_mode & pnlvm.ExecutionMode._Fallback:
raise e from None

warnings.warn("Failed to execute `{}': {}".format(self.name, str(e)))
warnings.warn("Failed to compile wrapper for `{}' in `{}': {}".format(self.name, self.name, str(e)))
execution_mode = pnlvm.ExecutionMode.Python

# Exec failed for some reason, we can still try node level execution_mode
# Filter out nested compositions. They are not executed in this mode
# Filter out controller if running simulation.
mechanisms = (n for n in self._all_nodes
if isinstance(n, Mechanism) and
(n is not self.controller or not is_simulation))
elif execution_mode & pnlvm.ExecutionMode._PerNode:

assert execution_mode & pnlvm.ExecutionMode.LLVM
try:
_comp_ex = pnlvm.CompExecution.get(self, context)
# Compile all mechanism wrappers
for m in mechanisms:
_comp_ex._set_bin_node(m)
except Exception as e:
if not execution_mode & pnlvm.ExecutionMode._Fallback:
raise e from None
for m in self._all_nodes:
if isinstance(m, Mechanism) and not (m is self.controller and is_simulation):
_comp_ex._set_bin_node(m)

warnings.warn("Failed to compile wrapper for `{}' in `{}': {}".format(m.name, self.name, str(e)))
execution_mode = pnlvm.ExecutionMode.Python
else:
assert False, "Unsupported execution mode: {}".format(execution_mode)


# Generate first frame of animation without any active_items
Expand Down Expand Up @@ -12171,12 +12162,15 @@ def execute(
inputs = self._validate_execution_inputs(inputs)
build_CIM_input = self._build_variable_for_input_CIM(inputs)

if execution_mode & pnlvm.ExecutionMode.COMPILED:
_comp_ex.execute_node(self.input_CIM, inputs)
if execution_mode.is_compiled():
# FIXME: parameter_CIM should be executed here as well,
# but node execution of nested compositions with
# outside control is not supported yet.
assert not self.is_nested or len(self.parameter_CIM.afferents) == 0
assert execution_mode & pnlvm.ExecutionMode._PerNode
assert execution_mode.is_cpu_compiled()

_comp_ex.execute_node(self.input_CIM, inputs)

elif self.is_nested:
simulation = ContextFlags.SIMULATION_MODE in context.runmode
Expand Down Expand Up @@ -12375,7 +12369,10 @@ def execute(
# This ensures that the order in which nodes execute does not affect the results of this timestep
frozen_values = {}
new_values = {}
if execution_mode & pnlvm.ExecutionMode.COMPILED:
if execution_mode.is_compiled():
assert execution_mode & pnlvm.ExecutionMode._PerNode
assert execution_mode.is_cpu_compiled()

_comp_ex.freeze_values()

# PURGE LEARNING IF NOT ENABLED ----------------------------------------------------------------
Expand Down Expand Up @@ -12458,8 +12455,12 @@ def execute(
context.replace_flag(ContextFlags.PROCESSING, ContextFlags.LEARNING)

# Execute Mechanism
if execution_mode & pnlvm.ExecutionMode.COMPILED:
if execution_mode.is_compiled():
assert execution_mode & pnlvm.ExecutionMode._PerNode
assert execution_mode.is_cpu_compiled()

_comp_ex.execute_node(node)

else:
if node is not self.controller:
mech_context = copy(context)
Expand All @@ -12485,7 +12486,10 @@ def execute(

elif isinstance(node, Composition):

if execution_mode & pnlvm.ExecutionMode.COMPILED:
if execution_mode.is_compiled():
assert execution_mode & pnlvm.ExecutionMode._PerNode
assert execution_mode.is_cpu_compiled()

# Invoking nested composition passes data via Python
# structures. Make sure all sources get their latest values
srcs = (proj.sender.owner for proj in node.input_CIM.afferents)
Expand Down Expand Up @@ -12518,15 +12522,18 @@ def execute(

# Run node-level compiled nested composition
# only if there are no control projections
if execution_mode == pnlvm.ExecutionMode.LLVM and len(node.parameter_CIM.afferents) != 0:
if execution_mode.is_compiled() and len(node.parameter_CIM.afferents) != 0:
nested_execution_mode = pnlvm.ExecutionMode.Python
else:
nested_execution_mode = execution_mode
ret = node.execute(context=context,
execution_mode=nested_execution_mode)

ret = node.execute(context=context, execution_mode=nested_execution_mode)

# Get output info from nested execution
if execution_mode & pnlvm.ExecutionMode.COMPILED:
if execution_mode.is_compiled():
assert execution_mode & pnlvm.ExecutionMode._PerNode
assert execution_mode.is_cpu_compiled()

# Update result in binary data structure
_comp_ex.insert_node_output(node, ret)

Expand Down Expand Up @@ -12624,7 +12631,17 @@ def execute(

# Reset context flags
context.execution_phase = ContextFlags.PROCESSING
self.output_CIM.execute(context=context)

if execution_mode.is_compiled():
assert execution_mode & pnlvm.ExecutionMode._PerNode
assert execution_mode.is_cpu_compiled()

_comp_ex.freeze_values()
_comp_ex.execute_node(self.output_CIM)

else:
self.output_CIM.execute(context=context)

context.execution_phase = ContextFlags.IDLE

# Animate output_CIM
Expand Down Expand Up @@ -12668,22 +12685,19 @@ def execute(
content='execute_end',
context=context)

# Extract result here
if execution_mode & pnlvm.ExecutionMode.COMPILED:
_comp_ex.freeze_values()
_comp_ex.execute_node(self.output_CIM)
report(self,
PROGRESS_REPORT,
report_num=report_num,
content='trial_end',
context=context)
return _comp_ex.extract_node_output(self.output_CIM)

# UPDATE TIME and RETURN ***********************************************************************************

execution_scheduler.get_clock(context)._increment_time(TimeScale.TRIAL)

return self.get_output_values(context)
# Extract result here
if execution_mode.is_compiled():
assert execution_mode & pnlvm.ExecutionMode._PerNode
assert execution_mode.is_cpu_compiled()

return _comp_ex.extract_node_output(self.output_CIM)

else:
return self.get_output_values(context)

def __call__(self, *args, **kwargs):
"""Execute Composition if any args are provided; else simply return results of last execution.
Expand Down
Loading

0 comments on commit 30c89d5

Please sign in to comment.