diff --git a/conftest.py b/conftest.py index 2761218504..6ee8bac6fa 100644 --- a/conftest.py +++ b/conftest.py @@ -71,8 +71,7 @@ def pytest_runtest_setup(item): def pytest_generate_tests(metafunc): mech_and_func_modes = ['Python', pytest.param('LLVM', marks=pytest.mark.llvm), - pytest.param('PTX', marks=[pytest.mark.llvm, - pytest.mark.cuda]) + pytest.param('PTX', marks=[pytest.mark.llvm, pytest.mark.cuda]) ] if "func_mode" in metafunc.fixturenames: @@ -81,9 +80,9 @@ def pytest_generate_tests(metafunc): if "mech_mode" in metafunc.fixturenames: metafunc.parametrize("mech_mode", mech_and_func_modes) - if "comp_mode_no_llvm" in metafunc.fixturenames: + if "comp_mode_no_per_node" in metafunc.fixturenames: modes = [m for m in get_comp_execution_modes() - if m.values[0] is not pnlvm.ExecutionMode.LLVM] + if m.values[0] is not pnlvm.ExecutionMode._LLVMPerNode] metafunc.parametrize("comp_mode", modes) elif "comp_mode" in metafunc.fixturenames: @@ -151,7 +150,7 @@ def pytest_runtest_teardown(item): pnlvm.cleanup("llvm" in item.keywords and not skip_cleanup_check) @pytest.fixture -def comp_mode_no_llvm(): +def comp_mode_no_per_node(): # dummy fixture to allow 'comp_mode' filtering pass @@ -187,8 +186,8 @@ def llvm_current_fp_precision(): @pytest.helpers.register def get_comp_execution_modes(): return [pytest.param(pnlvm.ExecutionMode.Python), - pytest.param(pnlvm.ExecutionMode.LLVM, marks=pytest.mark.llvm), - pytest.param(pnlvm.ExecutionMode.LLVMExec, marks=pytest.mark.llvm), + pytest.param(pnlvm.ExecutionMode._LLVMPerNode, marks=pytest.mark.llvm), + pytest.param(pnlvm.ExecutionMode._LLVMExec, marks=pytest.mark.llvm), pytest.param(pnlvm.ExecutionMode.LLVMRun, marks=pytest.mark.llvm), pytest.param(pnlvm.ExecutionMode.PTXRun, marks=[pytest.mark.llvm, pytest.mark.cuda]) ] diff --git a/docs/source/Compilation.rst b/docs/source/Compilation.rst index ce9d9e3847..239c743038 100644 --- a/docs/source/Compilation.rst +++ b/docs/source/Compilation.rst @@ -33,8 +33,8 @@ Use Compiled form of a model can be invoked by passing one of the following values to the `bin_execute` parameter of `Composition.run`, or `Composition.exec`: * `ExecutionMode.Python`: Normal python execution - * `ExecutionMode.LLVM`: Compile and execute individual nodes. The scheduling loop still runs in Python. If any of the nodes fails to compile, an error is raised. *NOTE:* Schedules that require access to node data will not work correctly. - * `ExecutionMode.LLVMExec`: Execution of `Composition.exec` is replaced by a compiled equivalent. If the `Composition` fails to compile, an error is raised. + * `ExecutionMode._LLVMPerNode`: Compile and execute individual nodes. The scheduling loop still runs in Python. If any of the nodes fails to compile, an error is raised. *NOTE:* Schedules that require access to node data will not work correctly. + * `ExecutionMode._LLVMExec`: Execution of `Composition.exec` is replaced by a compiled equivalent. If the `Composition` fails to compile, an error is raised. * `ExecutionMode.LLVMRun`: Execution of `Composition.run` is replaced by a compiled equivalent. If the `Composition` fails to compile, an error is raised. * `ExecutionMode.Auto`: This option attempts all three above mentioned granularities, and gracefully falls back to lower granularity. Warnings are raised in place of errors. This is the recommended way to invoke compiled execution as the final fallback is the Python baseline. diff --git a/psyneulink/core/compositions/composition.py b/psyneulink/core/compositions/composition.py index 5f4b24dfc2..d359cc4792 100644 --- a/psyneulink/core/compositions/composition.py +++ b/psyneulink/core/compositions/composition.py @@ -1122,7 +1122,7 @@ an AutodffComposition to be run in any mode (e.g., for comparison and/or compatibility purposes). .. warning:: - * `ExecutionMode.LLVM` and `ExecutionMode.PyTorch` can only be used in the `learn ` + * `ExecutionMode.LLVMRun` and `ExecutionMode.PyTorch` can only be used in the `learn ` method of an `AutodiffComposition`; specifying them in the `learn `()` method of a standard `Composition` causes an error. @@ -2060,21 +2060,22 @@ def input_function(env, result): .. _Composition_Compilation_Modes: - * *True* -- try to use the one that yields the greatesst improvement, progressively reverting to less powerful - but more forgiving modes, in the order listed below, for each that fails; + * *True* -- try to use the one that yields the greatest improvement, progressively reverting to less powerful + but more forgiving modes, trying LLVMRun, _LLVMExec, and Python. * `ExecutionMode.LLVMRun` - compile and run multiple `TRIAL `\\s; if successful, the compiled binary is semantically equivalent to the execution of the `run ` method using the Python interpreter; - * `ExecutionMode.LLVMExec` -- compile and run each `TRIAL `, using the Python interpreter + * `ExecutionMode._LLVMExec` -- compile and run each `TRIAL `, using the Python interpreter to iterate over them; if successful, the compiled binary for each `TRIAL ` is semantically equivalent the execution of the `execute ` method using the Python interpreter; + This mode does not support Trial scope scheduling rules and should not be used outside of development or testing. - * `ExecutionMode.LLVM` -- compile and run `Node ` of the `Composition` and their `Projections + * `ExecutionMode._LLVMPerNode` -- compile and run `Node ` of the `Composition` and their `Projections `, using the Python interpreter to call the Composition's `scheduler `, execute each Node and iterate over `TRIAL `\\s; note that, in this mode, scheduling - `Conditions ` that rely on Node `Parameters` is not supported; + `Conditions ` that rely on Node `Parameters` are not supported; * `ExecutionMode.Python` (same as *False*; the default) -- use the Python interpreter to execute the `Composition`. @@ -2087,7 +2088,7 @@ def input_function(env, result): using it with a standard `Composition` is possible, but it will **not** have the expected effect of executing its `learn ` method using PyTorch. - * `ExecutionMode.PTXrun` -- compile multiple `TRIAL `\\s for execution on GPU + * `ExecutionMode.PTXRun` -- compile multiple `TRIAL `\\s for execution on GPU (see `below ` for additional details). .. _Composition_Compilation_PyTorch: @@ -11430,9 +11431,9 @@ def run( try: comp_ex_tags = frozenset({"learning"}) if self._is_learning(context) else frozenset() _comp_ex = pnlvm.CompExecution.get(self, context, additional_tags=comp_ex_tags) - if execution_mode & pnlvm.ExecutionMode.LLVM: + if execution_mode.is_cpu_compiled(): results += _comp_ex.run(inputs, num_trials, num_inputs_sets) - elif execution_mode & pnlvm.ExecutionMode.PTX: + elif execution_mode.is_gpu_compiled(): results += _comp_ex.cuda_run(inputs, num_trials, num_inputs_sets) else: assert False, "Unknown execution mode: {}".format(execution_mode) @@ -11835,14 +11836,16 @@ def _execute_controller(self, context=context, node=self.controller) - if self.controller and not execution_mode & pnlvm.ExecutionMode.COMPILED: + if self.controller and not execution_mode.is_compiled(): + context.execution_phase = ContextFlags.PROCESSING self.controller.execute(context=context) else: - assert (execution_mode == pnlvm.ExecutionMode.LLVM - or execution_mode & pnlvm.ExecutionMode._Fallback),\ + assert execution_mode & pnlvm.ExecutionMode._PerNode + assert execution_mode.is_cpu_compiled(), \ f"PROGRAM ERROR: Unrecognized compiled execution_mode: '{execution_mode}'." + _comp_ex.freeze_values() _comp_ex.execute_node(self.controller) @@ -12054,30 +12057,27 @@ def execute( self._initialize_from_context(context, base_context, override=False) context.composition = self - # Run compiled execution (if compiled execution was requested + # Try compiled execution (if compiled execution was requested) # NOTE: This should be as high up as possible, # but still after the context has been initialized - if execution_mode & pnlvm.ExecutionMode.COMPILED: - is_simulation = (context is not None and - ContextFlags.SIMULATION_MODE in context.runmode) - # Try running in Exec mode first - if (execution_mode & pnlvm.ExecutionMode._Exec): - # There's no mode to execute simulations. + if execution_mode.is_compiled(): + + assert execution_mode.is_cpu_compiled(), "Unsupported execution mode: {}".format(execution_mode) + + is_simulation = (context is not None and ContextFlags.SIMULATION_MODE in context.runmode) + + _comp_ex = pnlvm.CompExecution.get(self, context) + + if execution_mode & pnlvm.ExecutionMode._Exec: + # There's no mode to execute compiled simulations. # Simulations are run as part of the controller node wrapper. assert not is_simulation + try: llvm_inputs = self._validate_execution_inputs(inputs) - _comp_ex = pnlvm.CompExecution.get(self, context) - if execution_mode & pnlvm.ExecutionMode.LLVM: - _comp_ex.execute(llvm_inputs) - else: - assert False, "Unknown execution mode: {}".format(execution_mode) + _comp_ex.execute(llvm_inputs) - report(self, - PROGRESS_REPORT, - report_num=report_num, - content='trial_end', - context=context) + report(self, PROGRESS_REPORT, report_num=report_num, content='trial_end', context=context) self._propagate_most_recent_context(context) return _comp_ex.extract_node_output(self.output_CIM) @@ -12086,27 +12086,18 @@ def execute( if not execution_mode & pnlvm.ExecutionMode._Fallback: raise e from None - warnings.warn("Failed to execute `{}': {}".format(self.name, str(e))) + warnings.warn("Failed to compile wrapper for `{}' in `{}': {}".format(self.name, self.name, str(e))) + execution_mode = pnlvm.ExecutionMode.Python - # Exec failed for some reason, we can still try node level execution_mode - # Filter out nested compositions. They are not executed in this mode - # Filter out controller if running simulation. - mechanisms = (n for n in self._all_nodes - if isinstance(n, Mechanism) and - (n is not self.controller or not is_simulation)) + elif execution_mode & pnlvm.ExecutionMode._PerNode: - assert execution_mode & pnlvm.ExecutionMode.LLVM - try: - _comp_ex = pnlvm.CompExecution.get(self, context) # Compile all mechanism wrappers - for m in mechanisms: - _comp_ex._set_bin_node(m) - except Exception as e: - if not execution_mode & pnlvm.ExecutionMode._Fallback: - raise e from None + for m in self._all_nodes: + if isinstance(m, Mechanism) and not (m is self.controller and is_simulation): + _comp_ex._set_bin_node(m) - warnings.warn("Failed to compile wrapper for `{}' in `{}': {}".format(m.name, self.name, str(e))) - execution_mode = pnlvm.ExecutionMode.Python + else: + assert False, "Unsupported execution mode: {}".format(execution_mode) # Generate first frame of animation without any active_items @@ -12171,12 +12162,15 @@ def execute( inputs = self._validate_execution_inputs(inputs) build_CIM_input = self._build_variable_for_input_CIM(inputs) - if execution_mode & pnlvm.ExecutionMode.COMPILED: - _comp_ex.execute_node(self.input_CIM, inputs) + if execution_mode.is_compiled(): # FIXME: parameter_CIM should be executed here as well, # but node execution of nested compositions with # outside control is not supported yet. assert not self.is_nested or len(self.parameter_CIM.afferents) == 0 + assert execution_mode & pnlvm.ExecutionMode._PerNode + assert execution_mode.is_cpu_compiled() + + _comp_ex.execute_node(self.input_CIM, inputs) elif self.is_nested: simulation = ContextFlags.SIMULATION_MODE in context.runmode @@ -12375,7 +12369,10 @@ def execute( # This ensures that the order in which nodes execute does not affect the results of this timestep frozen_values = {} new_values = {} - if execution_mode & pnlvm.ExecutionMode.COMPILED: + if execution_mode.is_compiled(): + assert execution_mode & pnlvm.ExecutionMode._PerNode + assert execution_mode.is_cpu_compiled() + _comp_ex.freeze_values() # PURGE LEARNING IF NOT ENABLED ---------------------------------------------------------------- @@ -12458,8 +12455,12 @@ def execute( context.replace_flag(ContextFlags.PROCESSING, ContextFlags.LEARNING) # Execute Mechanism - if execution_mode & pnlvm.ExecutionMode.COMPILED: + if execution_mode.is_compiled(): + assert execution_mode & pnlvm.ExecutionMode._PerNode + assert execution_mode.is_cpu_compiled() + _comp_ex.execute_node(node) + else: if node is not self.controller: mech_context = copy(context) @@ -12485,7 +12486,10 @@ def execute( elif isinstance(node, Composition): - if execution_mode & pnlvm.ExecutionMode.COMPILED: + if execution_mode.is_compiled(): + assert execution_mode & pnlvm.ExecutionMode._PerNode + assert execution_mode.is_cpu_compiled() + # Invoking nested composition passes data via Python # structures. Make sure all sources get their latest values srcs = (proj.sender.owner for proj in node.input_CIM.afferents) @@ -12518,15 +12522,18 @@ def execute( # Run node-level compiled nested composition # only if there are no control projections - if execution_mode == pnlvm.ExecutionMode.LLVM and len(node.parameter_CIM.afferents) != 0: + if execution_mode.is_compiled() and len(node.parameter_CIM.afferents) != 0: nested_execution_mode = pnlvm.ExecutionMode.Python else: nested_execution_mode = execution_mode - ret = node.execute(context=context, - execution_mode=nested_execution_mode) + + ret = node.execute(context=context, execution_mode=nested_execution_mode) # Get output info from nested execution - if execution_mode & pnlvm.ExecutionMode.COMPILED: + if execution_mode.is_compiled(): + assert execution_mode & pnlvm.ExecutionMode._PerNode + assert execution_mode.is_cpu_compiled() + # Update result in binary data structure _comp_ex.insert_node_output(node, ret) @@ -12624,7 +12631,17 @@ def execute( # Reset context flags context.execution_phase = ContextFlags.PROCESSING - self.output_CIM.execute(context=context) + + if execution_mode.is_compiled(): + assert execution_mode & pnlvm.ExecutionMode._PerNode + assert execution_mode.is_cpu_compiled() + + _comp_ex.freeze_values() + _comp_ex.execute_node(self.output_CIM) + + else: + self.output_CIM.execute(context=context) + context.execution_phase = ContextFlags.IDLE # Animate output_CIM @@ -12668,22 +12685,19 @@ def execute( content='execute_end', context=context) - # Extract result here - if execution_mode & pnlvm.ExecutionMode.COMPILED: - _comp_ex.freeze_values() - _comp_ex.execute_node(self.output_CIM) - report(self, - PROGRESS_REPORT, - report_num=report_num, - content='trial_end', - context=context) - return _comp_ex.extract_node_output(self.output_CIM) - # UPDATE TIME and RETURN *********************************************************************************** execution_scheduler.get_clock(context)._increment_time(TimeScale.TRIAL) - return self.get_output_values(context) + # Extract result here + if execution_mode.is_compiled(): + assert execution_mode & pnlvm.ExecutionMode._PerNode + assert execution_mode.is_cpu_compiled() + + return _comp_ex.extract_node_output(self.output_CIM) + + else: + return self.get_output_values(context) def __call__(self, *args, **kwargs): """Execute Composition if any args are provided; else simply return results of last execution. diff --git a/psyneulink/core/llvm/__init__.py b/psyneulink/core/llvm/__init__.py index f46508e71c..85d3027363 100644 --- a/psyneulink/core/llvm/__init__.py +++ b/psyneulink/core/llvm/__init__.py @@ -49,7 +49,7 @@ class ExecutionMode(enum.Flag): LLVM compile and run Composition `Nodes ` and `Projections ` individually. - LLVMExec + _LLVMExec compile and run each `TRIAL ` individually. LLVMRun @@ -74,19 +74,41 @@ class ExecutionMode(enum.Flag): compile and run multiple `TRIAL `\\s using CUDA for GPU. """ - Python = 0 - PyTorch = enum.auto() - LLVM = enum.auto() - PTX = enum.auto() - _Run = enum.auto() + Python = 0 + PyTorch = enum.auto() + _LLVM = enum.auto() + _PTX = enum.auto() + _PerNode = enum.auto() _Exec = enum.auto() + _Run = enum.auto() _Fallback = enum.auto() - Auto = _Fallback | _Run | _Exec | LLVM - LLVMRun = LLVM | _Run - LLVMExec = LLVM | _Exec - PTXRun = PTX | _Run - COMPILED = ~ (Python | PyTorch) + Auto = _Fallback | _Run | _Exec | _LLVM + PTXRun = _PTX | _Run + LLVMRun = _LLVM | _Run + _LLVMExec = _LLVM | _Exec + _LLVMPerNode = _LLVM | _PerNode + + def is_cpu_compiled(self): + is_cpu_compiled = self & self._LLVM + + # assert that only on of CPU and GPU compiled modes is enabled + if is_cpu_compiled: + assert not self & self._PTX + + return is_cpu_compiled + + def is_gpu_compiled(self): + is_gpu_compiled = self & self._PTX + + # assert that only on of CPU and GPU compiled modes is enabled + if is_gpu_compiled: + assert not self & self._LLVM + + return is_gpu_compiled + + def is_compiled(self): + return self.is_cpu_compiled() or self.is_gpu_compiled() _binary_generation = 0 diff --git a/psyneulink/library/compositions/autodiffcomposition.py b/psyneulink/library/compositions/autodiffcomposition.py index 893fb1b17e..a00e1c19fb 100644 --- a/psyneulink/library/compositions/autodiffcomposition.py +++ b/psyneulink/library/compositions/autodiffcomposition.py @@ -184,7 +184,7 @@ during execution (see `AutodiffComposition_Nested_Modulation` below), which is not supported by PyTorch. .. warning:: - * Specifying `ExecutionMode.LLVM` or `ExecutionMode.PyTorch` in the learn() method of a standard + * Specifying `ExecutionMode.LLVMRun` or `ExecutionMode.PyTorch` in the learn() method of a standard `Composition` causes an error. COMMENT: @@ -204,7 +204,7 @@ `Compilation Modes ` for more information about executing a Composition in compiled mode. .. note:: - Specifying `ExecutionMode.LLVMRUn` in either the `learn ` and `run ` + Specifying `ExecutionMode.LLVMRun` in either the `learn ` and `run ` methods of an AutodiffComposition causes it to (attempt to) use compiled execution in both cases; this is because LLVM compilation supports the use of modulation in PsyNeuLink models (as compared to `PyTorch mode `; see `note ` below). @@ -949,7 +949,7 @@ def create_pathway(node)->list: if node not in self.get_nodes_by_role(NodeRole.TARGET) for pathway in _get_pytorch_backprop_pathway(node)] - if execution_mode == pnlvm.ExecutionMode.PyTorch: + if execution_mode is pnlvm.ExecutionMode.PyTorch: # For PyTorch mode, only need to construct dummy TARGET Nodes, to allow targets to be: # - specified in the same way as for other execution_modes # - trial-by-trial values kept aligned with inputs in batch / minibatch construction @@ -1073,7 +1073,7 @@ def autodiff_forward(self, inputs, targets, before the next time it calls run(), in a call to backward() by do_gradient_optimization() in _batch_inputs() or _batch_function_inputs(), """ - assert execution_mode == pnlvm.ExecutionMode.PyTorch + assert execution_mode is pnlvm.ExecutionMode.PyTorch pytorch_rep = self.parameters.pytorch_representation._get(context) # --------- Do forward computation on current inputs ------------------------------------------------- diff --git a/psyneulink/library/compositions/compositionrunner.py b/psyneulink/library/compositions/compositionrunner.py index 4d45ccc6d0..2468467c07 100644 --- a/psyneulink/library/compositions/compositionrunner.py +++ b/psyneulink/library/compositions/compositionrunner.py @@ -238,11 +238,13 @@ def run_learning(self, Outputs from the final execution """ - if not (execution_mode & ExecutionMode.COMPILED): - self._is_llvm_mode = False - else: + if execution_mode.is_compiled(): + assert execution_mode.is_cpu_compiled() self._is_llvm_mode = True + else: + self._is_llvm_mode = False + if execution_mode is ExecutionMode.Python and learning_rate is not None: # User learning_rate specified in call to learn, so use that by passing it in runtime_params, # excluding any LearningMechanisms for which learning_rate has been individually specified @@ -357,7 +359,7 @@ def run_learning(self, **kwargs) skip_initialization = True - if execution_mode == ExecutionMode.PyTorch: + if execution_mode is ExecutionMode.PyTorch: pytorch_rep = (self._composition.parameters.pytorch_representation._get(context). copy_weights_to_psyneulink(context)) if pytorch_rep and synch_with_pnl_options[MATRIX_WEIGHTS] == MINIBATCH: @@ -370,7 +372,7 @@ def run_learning(self, self._composition.parameters.results.get(context)[-1 * num_epoch_results:], context) # return result of last *trial* (as usual for a call to run) - if execution_mode == ExecutionMode.PyTorch and synch_with_pnl_options[MATRIX_WEIGHTS] == EPOCH: + if execution_mode is ExecutionMode.PyTorch and synch_with_pnl_options[MATRIX_WEIGHTS] == EPOCH: # Copy weights at end of learning run pytorch_rep.copy_weights_to_psyneulink(context) diff --git a/tests/composition/test_autodiffcomposition.py b/tests/composition/test_autodiffcomposition.py index 4b8c5255d3..5d61836e37 100644 --- a/tests/composition/test_autodiffcomposition.py +++ b/tests/composition/test_autodiffcomposition.py @@ -2907,12 +2907,13 @@ def test_optimizer_specs(self, learning_rate, weight_decay, optimizer_type, expe # fp32 results are different due to rounding if pytest.helpers.llvm_current_fp_precision() == 'fp32' and \ - autodiff_mode != pnl.ExecutionMode.PyTorch and \ + autodiff_mode is not pnl.ExecutionMode.PyTorch and \ optimizer_type == 'sgd' and \ learning_rate == 10: expected = [[[0.9918830394744873]], [[0.9982172846794128]], [[0.9978305697441101]], [[0.9994590878486633]]] + # FIXME: LLVM version is broken with learning rate == 1.5 - if learning_rate != 1.5 or autodiff_mode == pnl.ExecutionMode.PyTorch: + if learning_rate != 1.5 or autodiff_mode is pnl.ExecutionMode.PyTorch: np.testing.assert_allclose(results, expected) diff --git a/tests/composition/test_composition.py b/tests/composition/test_composition.py index 15fb27a135..d1d404d725 100644 --- a/tests/composition/test_composition.py +++ b/tests/composition/test_composition.py @@ -3764,8 +3764,8 @@ def test_run_2_mechanisms_double_trial_specs(self, comp_mode): np.testing.assert_allclose(np.array([[75.]]), output) @pytest.mark.parametrize("mode", [pnl.ExecutionMode.Python, - pytest.param(pnl.ExecutionMode.LLVM, marks=pytest.mark.llvm), - pytest.param(pnl.ExecutionMode.LLVMExec, marks=pytest.mark.llvm), + pytest.param(pnl.ExecutionMode._LLVMPerNode, marks=pytest.mark.llvm), + pytest.param(pnl.ExecutionMode._LLVMExec, marks=pytest.mark.llvm), ]) def test_execute_composition(self, mode): comp = Composition() @@ -3864,8 +3864,8 @@ def test_LPP_wrong_component(self): and "that is in deferred init" in str(error_text.value)) @pytest.mark.parametrize("mode", [pnl.ExecutionMode.Python, - pytest.param(pnl.ExecutionMode.LLVM, marks=pytest.mark.llvm), - pytest.param(pnl.ExecutionMode.LLVMExec, marks=pytest.mark.llvm), + pytest.param(pnl.ExecutionMode._LLVMPerNode, marks=pytest.mark.llvm), + pytest.param(pnl.ExecutionMode._LLVMExec, marks=pytest.mark.llvm), ]) def test_execute_no_inputs(self, mode): m_inner = ProcessingMechanism(input_shapes=2) @@ -4274,7 +4274,7 @@ def test_one_time_warning_for_run_with_no_inputs(self): comp.run() def _check_comp_ex(self, comp, comparison, comp_mode, struct_name, context=None, is_not=False): - if comp_mode == pnl.ExecutionMode.Python: + if comp_mode is pnl.ExecutionMode.Python: return if context is None: @@ -4423,8 +4423,8 @@ def test_multiple_runs_with_parameter_change_from_data_struct(self, comp_mode): self._check_comp_ex(comp, None, comp_mode, struct_name, is_not=True) self._check_comp_ex(comp, orig_comp_ex, comp_mode, struct_name, is_not=True) - @pytest.mark.usefixtures("comp_mode_no_llvm") - @pytest.mark.parametrize("comp_mode2", [m for m in pytest.helpers.get_comp_execution_modes() if m.values[0] is not pnl.ExecutionMode.LLVM]) + @pytest.mark.usefixtures("comp_mode_no_per_node") + @pytest.mark.parametrize("comp_mode2", [m for m in pytest.helpers.get_comp_execution_modes() if m.values[0] is not pnl.ExecutionMode._LLVMPerNode]) def test_execution_after_cleanup_enum_param(self, comp_mode, comp_mode2): """ This test checks that compiled sync works for Parameters with Enum values. @@ -6607,8 +6607,8 @@ class TestProperties: @pytest.mark.composition @pytest.mark.parametrize("mode", [pnl.ExecutionMode.Auto, pnl.ExecutionMode.Python, - pytest.param(pnl.ExecutionMode.LLVM, marks=[_fallback_xfail, pytest.mark.llvm]), - pytest.param(pnl.ExecutionMode.LLVMExec, marks=[_fallback_xfail, pytest.mark.llvm]), + pytest.param(pnl.ExecutionMode._LLVMPerNode, marks=[_fallback_xfail, pytest.mark.llvm]), + pytest.param(pnl.ExecutionMode._LLVMExec, marks=[_fallback_xfail, pytest.mark.llvm]), pytest.param(pnl.ExecutionMode.LLVMRun, marks=[_fallback_xfail, pytest.mark.llvm]), pytest.param(pnl.ExecutionMode.PTXRun, marks=[_fallback_xfail, pytest.mark.llvm, pytest.mark.cuda]), ]) diff --git a/tests/composition/test_control.py b/tests/composition/test_control.py index b473953d9f..a3e5a76bcf 100644 --- a/tests/composition/test_control.py +++ b/tests/composition/test_control.py @@ -2469,7 +2469,7 @@ def test_modulation_simple(self, cost, expected, exp_values, comp_mode): ret = comp.run(inputs={mech: [2]}, num_trials=1, execution_mode=comp_mode) np.testing.assert_allclose(ret, expected) - if comp_mode == pnl.ExecutionMode.Python: + if comp_mode is pnl.ExecutionMode.Python: np.testing.assert_allclose(comp.controller.function.saved_values.flatten(), exp_values) @pytest.mark.benchmark @@ -2507,9 +2507,9 @@ def get_val(s, dty): @pytest.mark.benchmark @pytest.mark.control @pytest.mark.composition - # 'LLVM' mode is not supported, because synchronization of compiler and + # '_LLVMPerNode' mode is not supported, because synchronization of compiler and # python values during execution is not implemented. - @pytest.mark.usefixtures("comp_mode_no_llvm") + @pytest.mark.usefixtures("comp_mode_no_per_node") @pytest.mark.parametrize('prng', ['Default', 'Philox']) def test_modulation_of_random_state_DDM(self, comp_mode, benchmark, prng): # set explicit seed to make sure modulation is different @@ -2531,7 +2531,7 @@ def test_modulation_of_random_state_DDM(self, comp_mode, benchmark, prng): benchmark(comp.run, inputs={ctl_mech:seeds, mech:5.0}, num_trials=len(seeds) * 2, execution_mode=comp_mode) # Python uses fp64 irrespective of the pytest precision setting - precision = 'fp64' if comp_mode == pnl.ExecutionMode.Python else pytest.helpers.llvm_current_fp_precision() + precision = 'fp64' if comp_mode is pnl.ExecutionMode.Python else pytest.helpers.llvm_current_fp_precision() if prng == 'Default': np.testing.assert_allclose(np.squeeze(comp.results[:len(seeds) * 2]), [[100, 21], [100, 23], [100, 20]] * 2) elif prng == 'Philox' and precision == 'fp64': @@ -2622,9 +2622,9 @@ def test_modulation_of_initializer_nested(self, ocm_mode): @pytest.mark.benchmark @pytest.mark.control @pytest.mark.composition - # 'LLVM' mode is not supported, because synchronization of compiler and + # '_LLVMPerNode' mode is not supported, because synchronization of compiler and # python values during execution is not implemented. - @pytest.mark.usefixtures("comp_mode_no_llvm") + @pytest.mark.usefixtures("comp_mode_no_per_node") @pytest.mark.parametrize('prng', ['Default', 'Philox']) def test_modulation_of_random_state_DDM_Analytical(self, comp_mode, benchmark, prng): # set explicit seed to make sure modulation is different @@ -2644,7 +2644,7 @@ def test_modulation_of_random_state_DDM_Analytical(self, comp_mode, benchmark, p benchmark(comp.run, inputs={ctl_mech:seeds, mech:0.1}, num_trials=len(seeds) * 2, execution_mode=comp_mode) # Python uses fp64 irrespective of the pytest precision setting - precision = 'fp64' if comp_mode == pnl.ExecutionMode.Python else pytest.helpers.llvm_current_fp_precision() + precision = 'fp64' if comp_mode is pnl.ExecutionMode.Python else pytest.helpers.llvm_current_fp_precision() if prng == 'Default': np.testing.assert_allclose(np.squeeze(comp.results[:len(seeds) * 2]), [[-1, 3.99948962], [1, 3.99948962], [-1, 3.99948962]] * 2) elif prng == 'Philox' and precision == 'fp64': @@ -3359,7 +3359,7 @@ def comp_run(inputs, execution_mode): results, saved_values = benchmark(comp_run, inputs, mode) np.testing.assert_array_equal(results, result) - if mode == pnl.ExecutionMode.Python: + if mode is pnl.ExecutionMode.Python: np.testing.assert_array_equal(saved_values.flatten(), [0.75, 1.5, 2.25]) def test_model_based_ocm_with_buffer(self): diff --git a/tests/composition/test_learning.py b/tests/composition/test_learning.py index 9c22e58f55..2a64ca2a8c 100644 --- a/tests/composition/test_learning.py +++ b/tests/composition/test_learning.py @@ -510,7 +510,7 @@ def test_indepedence_of_learning_pathways_using_same_mechs_in_different_comps(se # Use explicit parametrize instead of the autodiff_mode fixture to avoid # applying marks. This test doesn't execute pytorch or compiled mode - @pytest.mark.parametrize('execution_mode', [pnl.ExecutionMode.LLVM, pnl.ExecutionMode.PyTorch]) + @pytest.mark.parametrize('execution_mode', [pnl.ExecutionMode.LLVMRun, pnl.ExecutionMode.PyTorch]) def test_execution_mode_pytorch_and_LLVM_errors(self, execution_mode): A = TransferMechanism(name="learning-process-mech-A") B = TransferMechanism(name="learning-process-mech-B") diff --git a/tests/mechanisms/test_ddm_mechanism.py b/tests/mechanisms/test_ddm_mechanism.py index 50c1d2eef7..5ed8cea5f5 100644 --- a/tests/mechanisms/test_ddm_mechanism.py +++ b/tests/mechanisms/test_ddm_mechanism.py @@ -693,9 +693,9 @@ def test_DDM_threshold_modulation_integrator(comp_mode): (100.0, 100.0, [[100.0], [76.0]]), ]) # 3/5/2021 - DDM' default behaviour now requires resetting stateful -# functions after each trial. This is not supported in LLVM execution mode. +# functions after each trial. This is not supported in _LLVMPerNode execution mode. # See: https://github.com/PrincetonUniversity/PsyNeuLink/issues/1935 -@pytest.mark.usefixtures("comp_mode_no_llvm") +@pytest.mark.usefixtures("comp_mode_no_per_node") def test_ddm_is_finished(comp_mode, noise, threshold, expected_results): comp = Composition() @@ -711,11 +711,11 @@ def test_ddm_is_finished(comp_mode, noise, threshold, expected_results): @pytest.mark.parametrize("until_finished", ["until_finished", "not_until_finished"]) @pytest.mark.parametrize("threshold_mod", ["threshold_modulated", "threshold_not_modulated"]) # 3/5/2021 - DDM' default behaviour now requires resetting stateful -# functions after each trial. This is not supported in LLVM execution mode. +# functions after each trial. This is not supported in _LLVMPerNode execution mode. # See: https://github.com/PrincetonUniversity/PsyNeuLink/issues/1935 # Moreover, evaluating scheduler conditions in Python is not supported # for compiled execution -@pytest.mark.usefixtures("comp_mode_no_llvm") +@pytest.mark.usefixtures("comp_mode_no_per_node") def test_ddm_is_finished_with_dependency(comp_mode, until_finished, threshold_mod): comp = Composition() @@ -801,9 +801,9 @@ def test_sequence_of_DDM_mechs_in_Composition_Pathway(): @pytest.mark.composition @pytest.mark.ddm_mechanism # 3/5/2021 - DDM' default behaviour now requires resetting stateful -# functions after each trial. This is not supported in LLVM execution mode. +# functions after each trial. This is not supported in _LLVMPerNode execution mode. # See: https://github.com/PrincetonUniversity/PsyNeuLink/issues/1935 -@pytest.mark.usefixtures("comp_mode_no_llvm") +@pytest.mark.usefixtures("comp_mode_no_per_node") def test_DDMMechanism_LCA_equivalent(comp_mode): ddm = DDM(default_variable=[0], diff --git a/tests/mechanisms/test_integrator_mechanism.py b/tests/mechanisms/test_integrator_mechanism.py index 414c6837f1..e7f616b452 100644 --- a/tests/mechanisms/test_integrator_mechanism.py +++ b/tests/mechanisms/test_integrator_mechanism.py @@ -1233,9 +1233,9 @@ def test_has_initializers(self): [np.array([0.5]), np.array([0.9375])], [np.array([0.5]), np.array([0.96875])]]), ], ids=lambda x: str(x) if isinstance(x, pnl.Condition) else "") - # 'LLVM' mode is not supported, because synchronization of compiler and + # '_LLVMPerNode' mode is not supported, because synchronization of compiler and # python values during execution is not implemented. - @pytest.mark.usefixtures("comp_mode_no_llvm") + @pytest.mark.usefixtures("comp_mode_no_per_node") def test_reset_stateful_function_when_composition(self, comp_mode, cond0, cond1, expected): I1 = pnl.IntegratorMechanism() I2 = pnl.IntegratorMechanism() diff --git a/tests/mechanisms/test_mechanisms.py b/tests/mechanisms/test_mechanisms.py index 11c7cc485d..616a76d121 100644 --- a/tests/mechanisms/test_mechanisms.py +++ b/tests/mechanisms/test_mechanisms.py @@ -289,7 +289,7 @@ def test_reset_state_transfer_mechanism(self): np.testing.assert_allclose(original_output, [np.array([[0.5]]), np.array([[0.75]])]) np.testing.assert_allclose(output_after_reinitialization, [np.array([[0.875]]), np.array([[0.9375]])]) - @pytest.mark.usefixtures("comp_mode_no_llvm") + @pytest.mark.usefixtures("comp_mode_no_per_node") def test_reset_integrator_function(self, comp_mode): """This test checks that the Mechanism.integrator_function is reset when the mechanism is""" diff --git a/tests/mechanisms/test_recurrent_transfer_mechanism.py b/tests/mechanisms/test_recurrent_transfer_mechanism.py index 7a60d775c9..199677ab67 100644 --- a/tests/mechanisms/test_recurrent_transfer_mechanism.py +++ b/tests/mechanisms/test_recurrent_transfer_mechanism.py @@ -1110,9 +1110,9 @@ def my_fct(x): [np.array([0.5]), np.array([0.9375])], [np.array([0.5]), np.array([0.96875])]]), ], ids=lambda x: str(x) if isinstance(x, pnl.Condition) else "") - # 'LLVM' mode is not supported, because synchronization of compiler and + # '_LLVMPerNode' mode is not supported, because synchronization of compiler and # python values during execution is not implemented. - @pytest.mark.usefixtures("comp_mode_no_llvm") + @pytest.mark.usefixtures("comp_mode_no_per_node") def test_reset_stateful_function_when_composition(self, comp_mode, cond0, cond1, expected): I1 = pnl.RecurrentTransferMechanism(integrator_mode=True, integration_rate=0.5) @@ -1145,9 +1145,9 @@ def test_reset_stateful_function_when_composition(self, comp_mode, cond0, cond1, ids=["initializers1", "NO initializers1"]) @pytest.mark.parametrize('has_initializers1', [True, False], ids=["initializers2", "NO initializers2"]) - # 'LLVM' mode is not supported, because synchronization of compiler and + # '_LLVMPerNode' mode is not supported, because synchronization of compiler and # python values during execution is not implemented. - @pytest.mark.usefixtures("comp_mode_no_llvm") + @pytest.mark.usefixtures("comp_mode_no_per_node") def test_reset_stateful_function_when_has_initializers_composition(self, comp_mode, cond0, cond1, expected, has_initializers1, has_initializers2): I1 = pnl.RecurrentTransferMechanism(integrator_mode=True, @@ -1179,12 +1179,12 @@ def test_reset_stateful_function_when_has_initializers_composition(self, comp_mo @pytest.mark.composition @pytest.mark.integrator_mechanism @pytest.mark.parametrize('until_finished, expected', [ - (True, [[[[0.96875]]], [[[0.9990234375]]]]), # The 5th and the 10th iteration - (False, [[[[0.5]]], [[[0.75]]]]), # The first and the second iteration + (True, [[[0.96875]], [[0.9990234375]]]), # The 5th and the 10th iteration + (False, [[[0.5]], [[0.75]]]), # The first and the second iteration ], ids=['until_finished', 'oneshot']) - # 'LLVM' mode is not supported, because synchronization of compiler and + # '_LLVMPerNode' mode is not supported, because synchronization of compiler and # python values during execution is not implemented. - @pytest.mark.usefixtures("comp_mode_no_llvm") + @pytest.mark.usefixtures("comp_mode_no_per_node") def test_max_executions_before_finished(self, comp_mode, until_finished, expected): I1 = pnl.RecurrentTransferMechanism(integrator_mode=True, integration_rate=0.5, @@ -1197,10 +1197,9 @@ def test_max_executions_before_finished(self, comp_mode, until_finished, expecte results = C.run(inputs={I1: [[1.0]]}, num_trials=1, execution_mode=comp_mode) if comp_mode is pnl.ExecutionMode.Python: assert I1.parameters.is_finished_flag.get(C) is until_finished + results2 = C.run(inputs={I1: [[1.0]]}, num_trials=1, execution_mode=comp_mode) - if comp_mode is not pnl.ExecutionMode.LLVM: - results = [results] - results2 = [results2] + np.testing.assert_allclose(expected[0], results) np.testing.assert_allclose(expected[1], results2) diff --git a/tests/mechanisms/test_transfer_mechanism.py b/tests/mechanisms/test_transfer_mechanism.py index 6e63307602..a3c0d19814 100644 --- a/tests/mechanisms/test_transfer_mechanism.py +++ b/tests/mechanisms/test_transfer_mechanism.py @@ -1650,9 +1650,9 @@ def test_reset_spec(self): @pytest.mark.transfer_mechanism @pytest.mark.benchmark(group="TransferMechanism") - # 'LLVM' mode is not supported, because synchronization of compiler and + # '_LLVMPerNode' mode is not supported, because synchronization of compiler and # python values during execution is not implemented. - @pytest.mark.usefixtures("comp_mode_no_llvm") + @pytest.mark.usefixtures("comp_mode_no_per_node") def test_termination_measures(self, comp_mode): stim_input = ProcessingMechanism(input_shapes=2, name='Stim Input') stim_percept = TransferMechanism(name='Stimulus', input_shapes=2, function=Logistic) diff --git a/tests/models/test_greedy_agent.py b/tests/models/test_greedy_agent.py index 5a94786a41..ae9de598ec 100644 --- a/tests/models/test_greedy_agent.py +++ b/tests/models/test_greedy_agent.py @@ -101,10 +101,9 @@ def test_simplified_greedy_agent_random(benchmark, comp_mode): @pytest.mark.parametrize('prng', ['Default', 'Philox']) @pytest.mark.parametrize('fp_type', [pnl.core.llvm.ir.DoubleType, pnl.core.llvm.ir.FloatType]) def test_predator_prey(benchmark, mode, ocm_mode, prng, samples, fp_type): - if len(samples) > 10 and mode not in {pnl.ExecutionMode.LLVM, - pnl.ExecutionMode.LLVMExec, - pnl.ExecutionMode.LLVMRun} and \ - ocm_mode not in {'LLVM', 'PTX'}: + + # Skip large test instances that are not CPU compiled, or executed in parallel. + if len(samples) > 10 and not mode.is_compiled() and ocm_mode not in {'LLVM', 'PTX'}: pytest.skip("This test takes too long") # Instantiate LLVMBuilderContext using the preferred fp type @@ -215,7 +214,7 @@ def action_fn(variable): # np.testing.assert_allclose(run_results, [[0.9705216285127504, -0.1343332460369043]]) np.testing.assert_allclose(run_results, [[0.9705216285127504, -0.1343332460369043]], atol=1e-6, rtol=1e-6) elif prng == 'Philox': - if mode == pnl.ExecutionMode.Python or pytest.helpers.llvm_current_fp_precision() == 'fp64': + if mode is pnl.ExecutionMode.Python or pytest.helpers.llvm_current_fp_precision() == 'fp64': # np.testing.assert_allclose(run_results[0], [[-0.16882940384606543, -0.07280074899749223]]) np.testing.assert_allclose(run_results, [[-0.16882940384606543, -0.07280074899749223]]) elif pytest.helpers.llvm_current_fp_precision() == 'fp32': @@ -226,7 +225,7 @@ def action_fn(variable): else: assert False, "Unknown PRNG!" - if mode == pnl.ExecutionMode.Python and not benchmark.enabled: + if mode is pnl.ExecutionMode.Python and not benchmark.enabled: # FIXME: The results are 'close' for both Philox and MT, # because they're dominated by costs # FIX: Requires 1e-5 tolerance diff --git a/tests/ports/test_output_ports.py b/tests/ports/test_output_ports.py index 397d77b23c..d70b0ea702 100644 --- a/tests/ports/test_output_ports.py +++ b/tests/ports/test_output_ports.py @@ -44,10 +44,9 @@ def test_output_port_variable_spec(self, mech_mode): (("num_executions", pnl.TimeScale.PASS), [1], [1]), (("num_executions", pnl.TimeScale.TIME_STEP), [1], [1]), ], ids=lambda x: str(x) if len(x) != 1 else '') - @pytest.mark.usefixtures("comp_mode_no_llvm") + @pytest.mark.usefixtures("comp_mode_no_per_node") def tests_output_port_variable_spec_composition(self, comp_mode, spec, expected1, expected2): - if (len(spec) == 2) and (spec[1] == pnl.TimeScale.RUN) and \ - ((comp_mode & pnl.ExecutionMode._Exec) == pnl.ExecutionMode._Exec): + if (len(spec) == 2) and (spec[1] == pnl.TimeScale.RUN) and (comp_mode & pnl.ExecutionMode._Exec): pytest.skip("{} is not supported in {}".format(spec[1], comp_mode)) # Test specification of OutputPort's variable diff --git a/tests/scheduling/test_condition.py b/tests/scheduling/test_condition.py index a823f534f2..f1b9d73f3e 100644 --- a/tests/scheduling/test_condition.py +++ b/tests/scheduling/test_condition.py @@ -728,7 +728,7 @@ def test_AllHaveRun_2(self): ] ) @pytest.mark.parametrize('threshold', [10, 10.0]) - @pytest.mark.usefixtures("comp_mode_no_llvm") + @pytest.mark.usefixtures("comp_mode_no_per_node") def test_Threshold_parameters( self, parameter, indices, default_variable, integration_rate, expected_results, threshold, comp_mode ): @@ -761,12 +761,10 @@ def test_Threshold_parameters( ('!=', -1, 0, [[[-1]]]), ] ) + @pytest.mark.usefixtures("comp_mode_no_per_node") # Per-node mode doesn't support Parameter access in conditions def test_Threshold_comparators( self, comparator, increment, threshold, expected_results, comp_mode ): - if comp_mode is pnl.ExecutionMode.LLVM: - pytest.skip('ExecutionMode.LLVM does not support Parameter access in conditions') - A = TransferMechanism( integrator_mode=True, integrator_function=pnl.AccumulatorIntegrator(rate=1, increment=increment), @@ -796,11 +794,10 @@ def test_Threshold_comparators( ('!=', -1, -1, 0, 1, [[[-3]]]), ] ) + @pytest.mark.usefixtures("comp_mode_no_per_node") # Per-node mode doesn't support Parameter access in conditions def test_Threshold_tolerances( self, comparator, increment, threshold, atol, rtol, expected_results, comp_mode ): - if comp_mode is pnl.ExecutionMode.LLVM: - pytest.skip('ExecutionMode.LLVM does not support Parameter access in conditions') A = TransferMechanism( integrator_mode=True, diff --git a/tests/scheduling/test_scheduler.py b/tests/scheduling/test_scheduler.py index 07106d6e5e..f7ca28a7f1 100644 --- a/tests/scheduling/test_scheduler.py +++ b/tests/scheduling/test_scheduler.py @@ -1544,9 +1544,9 @@ def test_inline_control_mechanism_example(self): (TimeScale.TRIAL, [[1.5], [0.4375]]), (TimeScale.RUN, [[1.5], [0.4375]])], ids=lambda x: x if isinstance(x, TimeScale) else "") - # 'LLVM' mode is not supported, because synchronization of compiler and + # '_LLVMPerNode' mode is not supported, because synchronization of compiler and # python values during execution is not implemented. - @pytest.mark.usefixtures("comp_mode_no_llvm") + @pytest.mark.usefixtures("comp_mode_no_per_node") def test_time_termination_measures(self, comp_mode, timescale, expected): in_one_pass = timescale in {TimeScale.TIME_STEP, TimeScale.PASS} attention = pnl.TransferMechanism(name='Attention',