diff --git a/docs/source/utility.rst b/docs/source/utility.rst index 6793d1e..f808864 100644 --- a/docs/source/utility.rst +++ b/docs/source/utility.rst @@ -6,10 +6,15 @@ Utility :undoc-members: :show-inheritance: -.. .. automodule:: utility.singleton -.. :members: -.. :undoc-members: -.. :show-inheritance: +.. automodule:: utility.code + :members: + :undoc-members: + :show-inheritance: + +.. automodule:: utility.filesystem + :members: + :undoc-members: + :show-inheritance: .. .. automodule:: utility.registry .. :members: diff --git a/plots.py b/plots.py index f2ad7fc..398f101 100644 --- a/plots.py +++ b/plots.py @@ -35,6 +35,7 @@ from data_io import DataSet, read_from_file from extractors import BaseExtractor, DataAttributes +from utility.code import ExtraCodeFunctionMixin from utility.filesystem import check_file_access_permissions # Import for availability in user-supplied code. @@ -103,7 +104,7 @@ def prepare(self): return [(convert_columns_result, DataAttributes())] -class PlottingTask(YAMLObject): +class PlottingTask(YAMLObject, ExtraCodeFunctionMixin): r""" Generate a plot from the given data. @@ -534,16 +535,9 @@ def set_theme(self, context:str = 'paper', axes_style:str = 'dark'): sb.set_theme(context=self.context, style=self.axes_style) def eval_grid_transform(self): - # Create a copy of the global environment for evaluating the extra - # code fragment so as to not pollute the global namespace itself. - global_env = globals().copy() - - # Compile the code fragment - compiled_grid_transform = compile(self.grid_transform, filename='', mode='exec') - # Actually evaluate the code within the given namespace to allow - # access to all the defined symbols, such as helper functions that are not defined inline. - eval(compiled_grid_transform, global_env) - grid_transform = eval('grid_transform', global_env) + # Compile and evaluate the code fragment in a shallow copy of the global environment. + grid_transform, global_environment = self.evaluate_function(function='grid_transform' + , extra_code=self.grid_transform) return grid_transform diff --git a/run_recipe.py b/run_recipe.py index ef31feb..a20f766 100644 --- a/run_recipe.py +++ b/run_recipe.py @@ -50,21 +50,66 @@ import tag_regular_expressions as tag_regex +# import debug helper for usage in function definitions +from common.debug import start_debug # noqa: F401recipe import Recipe + +from utility.code import compile_and_evaluate_function_definition + _debug = False -def eval_recipe_tag_definitions(recipe, attributes_regex_map, iterationvars_regex_map, parameters_regex_map): +def evaluate_function_definitions(recipe:Recipe): + r""" + Evaluate the functions defined in `recipe.function_definitions` and make the compiled code available in a copy of + the global environment of the interpreter. + A copy of the global environment is used so as to not pollute the global namespace itself. All the defined functions share the same copy. + + Parameters + ---------- + recipe: Recipe + The recipe in which the functions are to be made available. + + Returns + ------- + A shallow copy of the global environment with the compiled functions as members, i.e. a dictionary with the + function names as key and the code as associated value. + """ + # Create a copy of the global environment for evaluating the extra code fragments so as to not pollute the global + # namespace itself. + # + # NOTE: This is a shallow copy and thus a rather simple method to prevent accidental overwrites, *not* a defense + # against deliberately malicious modifications. + global_env = globals().copy() + + for function_name in recipe.function_definitions: + # Get the string with the source code. + function_code = recipe.function_definitions[function_name] + # Actually evaluate the code within the given namespace to allow + # access to all the defined symbols, such as helper functions that are not defined inline. + function, global_env = compile_and_evaluate_function_definition(function_code, function_name, global_env) + # Bind the function to the specified name. + recipe.function_definitions[function_name] = function + + # Return the environment with the compiled functions. + return global_env + + +def eval_recipe_tag_definitions(recipe:Recipe + , attributes_regex_map, iterationvars_regex_map, parameters_regex_map + , function_definitions_global_env:dict + ): def eval_and_add_tags(tag_set_name, regex_map): for tag_name in recipe.evaluation.tags[tag_set_name]: - tag_list = eval(recipe.evaluation.tags[tag_set_name][tag_name]) # pylint: disable=eval-used - logd(f'{tag_name=} {tag_list=}') - evaluated_tag_list = [] + # The `eval` is necessary here since the `transform` function of the tag + # can be an arbitrary function and has to be parsed into a`Callable`. + tag_list = eval(recipe.evaluation.tags[tag_set_name][tag_name], function_definitions_global_env) # pylint: disable=W0123:eval-used + + # Check that the transform is indeed a `Callable` . for tag in tag_list: if not isinstance(tag['transform'], Callable): - tag['transform'] = eval(tag['transform']) # pylint: disable=eval-used - evaluated_tag_list.append(tag) + raise RuntimeError(f'transform for {tag=} is not a Callable!') - regex_map[tag_name] = evaluated_tag_list + regex_map[tag_name] = tag_list if 'attributes' in recipe.evaluation.tags: eval_and_add_tags('attributes', attributes_regex_map) @@ -76,12 +121,13 @@ def eval_and_add_tags(tag_set_name, regex_map): return attributes_regex_map, iterationvars_regex_map, parameters_regex_map -def prepare_evaluation_phase(recipe:Recipe, options, data_repo): +def prepare_evaluation_phase(recipe:Recipe, options, data_repo, function_definitions_global_env:dict): logi(f'prepare_evaluation_phase: {recipe} {recipe.name}') if hasattr(recipe.evaluation, 'tags'): attributes_regex_map, iterationvars_regex_map, parameters_regex_map = eval_recipe_tag_definitions(recipe \ - , tag_regex.attributes_regex_map, tag_regex.iterationvars_regex_map, tag_regex.parameters_regex_map) + , tag_regex.attributes_regex_map, tag_regex.iterationvars_regex_map, tag_regex.parameters_regex_map + , function_definitions_global_env) else: attributes_regex_map, iterationvars_regex_map, parameters_regex_map = \ tag_regex.attributes_regex_map, tag_regex.iterationvars_regex_map, tag_regex.parameters_regex_map @@ -245,11 +291,19 @@ def process_recipe(options): data_repo = {} job_list = [] + # Compile all the functions defined in `function_definitions` and make them available in a shallow copy + # of the runtime environment. + if hasattr(recipe, 'function_definitions'): + function_definitions_global_env = evaluate_function_definitions(recipe) + else: + # Or just use the default environment. + function_definitions_global_env = globals() + if not options.plot_only: if not hasattr(recipe, 'evaluation'): logi('process_recipe: no Evaluation in recipe') return - data_repo, jobs = prepare_evaluation_phase(recipe, options, data_repo) + data_repo, jobs = prepare_evaluation_phase(recipe, options, data_repo, function_definitions_global_env) job_list.extend(jobs) if options.eval_only: diff --git a/transforms.py b/transforms.py index 2452d62..bb03009 100644 --- a/transforms.py +++ b/transforms.py @@ -20,6 +20,8 @@ # Import for availability in user-supplied code. from common.debug import start_ipython_dbg_cmdline, start_debug # noqa: F401 +from utility.code import ExtraCodeFunctionMixin + class Transform(YAMLObject): r""" @@ -84,46 +86,6 @@ def prepare(self): return job_list -class ExtraCodeFunctionMixin: - r""" - A mixin class for providing the functionality to compile and evaluate a - function and an additional, optional code fragment within a separate global environment. - """ - def eval_function(self, function:Union[Callable, str], extra_code:Optional[str]) -> Callable: - r""" - Compile and evaluate the given function and an additional, optional - code fragment within a separate global environment and return the - executable function object. - - Parameters - ---------- - function : Union[Callable, str] - The name of the function or a function object. - - extra_code : Optional[str] - This can contain additional code for the transform function, such as - the definition of a function over multiple lines or split into multiple - functions for readibility. - """ - # create a copy of the global environment for evaluating the extra - # code fragment so as to not pollute the global namespace itself - global_env = globals().copy() - - if isinstance(extra_code, str): - # compile the code fragment - compiled_extra_code = compile(extra_code, filename='', mode='exec') - # actually evaluate the code within the given namespace to allow - # access to all the defined symbols, such as helper functions that are not defined inline - eval(compiled_extra_code, global_env) # pylint: disable=W0123:eval-used - - if isinstance(function, Callable): - evaluated_function = function - else: - evaluated_function = eval(function, global_env) # pylint: disable=W0123:eval-used - - return evaluated_function - - class ConcatTransform(Transform, YAMLObject): r""" A transform for concatenating all DataFrames from the given datasets. @@ -364,7 +326,7 @@ def process(self, data, attributes) -> pd.DataFrame: # extra_code in a separate global namespace. # The compilation of the extra code has to happen in the thread/process # of the processing worker since code objects can't be serialized. - function = self.eval_function(self.function, self.extra_code) + function, _ = self.evaluate_function(self.function, self.extra_code) result = function(data) @@ -445,7 +407,7 @@ def process(self, data, attributes): logw(f'ColumnFunctionTransform return is empty!') return pd.DataFrame() - function = self.eval_function(self.function, None) + function, _ = self.evaluate_function(self.function, None) data[self.output_column] = data[self.input_column].apply(function) @@ -552,7 +514,7 @@ def aggregate_frame(self, data): # extra_code in a separate global namespace. # The compilation of the extra code has to happen in the thread/process # of the processing worker since code objects can't be serialized. - aggregation_function = self.eval_function(self.aggregation_function, self.extra_code) + aggregation_function, _ = self.evaluate_function(self.aggregation_function, self.extra_code) if len(self.grouping_columns) == 1: grouping_columns = self.grouping_columns[0] @@ -698,7 +660,7 @@ def aggregate_frame(self, data): # extra_code in a separate global namespace. # The compilation of the extra code has to happen in the thread/process # of the processing worker since code objects can't be serialized. - transform_function = self.eval_function(self.transform_function, self.extra_code) + transform_function, _ = self.evaluate_function(self.transform_function, self.extra_code) if len(self.grouping_columns) == 1: grouping_columns = self.grouping_columns[0] diff --git a/utility/code.py b/utility/code.py new file mode 100644 index 0000000..4d34a3b --- /dev/null +++ b/utility/code.py @@ -0,0 +1,85 @@ +from typing import Any, Callable, Optional, Union + +def compile_and_evaluate_function_definition(source:str, function_name:str , global_environment:dict[str, Any]) -> tuple[Callable, dict[str, Any]]: + r""" + Evaluate the sequence of code statements in the given `source` and return the function object bound to the specified `function_name` name. + + Parameters + ---------- + source : str + The source code of the code snippet to compile and evaluate. + function_name : str + The name of the function to return. + global_env : dict + The global runtime environment to evaluate the given code in. + + Returns + ------- + Callable + The resulting function object. + dict + The runtime environment of the function. + + Raises + ------ + SyntaxError + If the supplied source code has a syntax error. + ValueError + If the supplied source code contains null bytes. + """ + + # Compile the sequence of code statements in the given code fragment. + compiled_function_code = compile(source, filename='', mode='exec') + # Actually evaluate the code within the given namespace to allow + # access to all the defined symbols, such as helper functions that are not defined inline. + eval(compiled_function_code, global_environment) # pylint: disable=W0123:eval-used + + # Get the function object of the desired function. + function = eval(function_name, global_environment) # pylint: disable=W0123:eval-used + + return function, global_environment + + +class ExtraCodeFunctionMixin: + r""" + A mixin class for providing the functionality to compile and evaluate a + function and an additional, optional code fragment within a separate global environment. + """ + def evaluate_function(self, function:Union[Callable, str], extra_code:Optional[str]) -> tuple[Callable, dict[str, Any]]: + r""" + Compile and evaluate the given function and an additional, optional + code fragment within a separate global environment and return the + executable function object. + + Parameters + ---------- + function : Union[Callable, str] + The name of the function or a function object. + + extra_code : Optional[str] + This can contain additional code for the function definition, such as the definition of a function with + multiple statements or split into multiple functions for readibility, i.e. every use case where a single + simple python statement doesn't suffice. + + Returns + ------- + Callable + The resulting function object. + dict[str, Any] + The runtime environment of the function. + """ + # Create a copy of the global environment for evaluating the extra code fragment so as to not pollute the + # global namespace itself. + global_env = globals().copy() + + if isinstance(function, str) and isinstance(extra_code, str): + function_object, global_env = compile_and_evaluate_function_definition(extra_code, function, global_env) + return function_object, global_env + elif not isinstance(function, str) and isinstance(function, Callable): + evaluated_function = function + return evaluated_function, global_env + elif isinstance(function, str): + evaluated_function = eval(function, global_env) # pylint: disable=W0123:eval-used + return evaluated_function, global_env + else: + raise NotImplementedError(f'Cannot compile: {function=} {extra_code=}') \ No newline at end of file