ibr-cm · hagau · Aug 26, 2024 · Aug 26, 2024 · Aug 26, 2024 · Aug 26, 2024
diff --git a/docs/source/utility.rst b/docs/source/utility.rst
@@ -6,10 +6,15 @@ Utility
     :undoc-members:
     :show-inheritance:
 
-.. .. automodule:: utility.singleton
-..     :members:
-..     :undoc-members:
-..     :show-inheritance:
+.. automodule:: utility.code
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. automodule:: utility.filesystem
+    :members:
+    :undoc-members:
+    :show-inheritance:
 
 .. .. automodule:: utility.registry
 ..     :members:

diff --git a/plots.py b/plots.py
@@ -35,6 +35,7 @@
 from data_io import DataSet, read_from_file
 from extractors import BaseExtractor, DataAttributes
 
+from utility.code import ExtraCodeFunctionMixin
 from utility.filesystem import check_file_access_permissions
 
 # Import for availability in user-supplied code.
@@ -103,7 +104,7 @@ def prepare(self):
         return [(convert_columns_result, DataAttributes())]
 
 
-class PlottingTask(YAMLObject):
+class PlottingTask(YAMLObject, ExtraCodeFunctionMixin):
     r"""
     Generate a plot from the given data.
 
@@ -534,16 +535,9 @@ def set_theme(self, context:str = 'paper', axes_style:str = 'dark'):
             sb.set_theme(context=self.context, style=self.axes_style)
 
     def eval_grid_transform(self):
-        # Create a copy of the global environment for evaluating the extra
-        # code fragment so as to not pollute the global namespace itself.
-        global_env = globals().copy()
-
-        # Compile the code fragment
-        compiled_grid_transform = compile(self.grid_transform, filename='<string>', mode='exec')
-        # Actually evaluate the code within the given namespace to allow
-        # access to all the defined symbols, such as helper functions that are not defined inline.
-        eval(compiled_grid_transform, global_env)
-        grid_transform = eval('grid_transform', global_env)
+        # Compile and evaluate the code fragment in a shallow copy of the global environment.
+        grid_transform, global_environment = self.evaluate_function(function='grid_transform'
+                                                                , extra_code=self.grid_transform)
 
         return grid_transform
 

diff --git a/run_recipe.py b/run_recipe.py
@@ -50,21 +50,66 @@
 
 import tag_regular_expressions as tag_regex
 
+# import debug helper for usage in function definitions
+from common.debug import start_debug # noqa: F401recipe import Recipe
+
+from utility.code import compile_and_evaluate_function_definition
+
 _debug = False
 
 
-def eval_recipe_tag_definitions(recipe, attributes_regex_map, iterationvars_regex_map, parameters_regex_map):
+def evaluate_function_definitions(recipe:Recipe):
+    r"""
+    Evaluate the functions defined in `recipe.function_definitions` and make the compiled code available in a copy of
+    the global environment of the interpreter.
+    A copy of the global environment is used so as to not pollute the global namespace itself. All the defined functions share the same copy.
+
+    Parameters
+    ----------
+    recipe: Recipe
+        The recipe in which the functions are to be made available.
+
+    Returns
+    -------
+        A shallow copy of the global environment with the compiled functions as members, i.e. a dictionary with the
+        function names as key and the code as associated value.
+    """
+    # Create a copy of the global environment for evaluating the extra code fragments so as to not pollute the global
+    # namespace itself.
+    #
+    # NOTE: This is a shallow copy and thus a rather simple method to prevent accidental overwrites, *not* a defense
+    # against deliberately malicious modifications.
+    global_env = globals().copy()
+
+    for function_name in recipe.function_definitions:
+        # Get the string with the source code.
+        function_code = recipe.function_definitions[function_name]
+        # Actually evaluate the code within the given namespace to allow
+        # access to all the defined symbols, such as helper functions that are not defined inline.
+        function, global_env = compile_and_evaluate_function_definition(function_code, function_name, global_env)
+        # Bind the function to the specified name.
+        recipe.function_definitions[function_name] = function
+
+    # Return the environment with the compiled functions.
+    return global_env
+
+
+def eval_recipe_tag_definitions(recipe:Recipe
+                                , attributes_regex_map, iterationvars_regex_map, parameters_regex_map
+                                , function_definitions_global_env:dict
+                               ):
     def eval_and_add_tags(tag_set_name, regex_map):
         for tag_name in recipe.evaluation.tags[tag_set_name]:
-            tag_list = eval(recipe.evaluation.tags[tag_set_name][tag_name]) # pylint: disable=eval-used
-            logd(f'{tag_name=} {tag_list=}')
-            evaluated_tag_list = []
+            # The `eval` is necessary here since the `transform` function of the tag
+            # can be an arbitrary function and has to be parsed into a`Callable`.
+            tag_list = eval(recipe.evaluation.tags[tag_set_name][tag_name], function_definitions_global_env) # pylint: disable=W0123:eval-used
+
+            # Check that the transform is indeed a `Callable` .
             for tag in tag_list:
                 if not isinstance(tag['transform'], Callable):
-                    tag['transform'] = eval(tag['transform']) # pylint: disable=eval-used
-                evaluated_tag_list.append(tag)
+                    raise RuntimeError(f'transform for {tag=} is not a Callable!')
 
-            regex_map[tag_name] = evaluated_tag_list
+            regex_map[tag_name] = tag_list
 
     if 'attributes' in recipe.evaluation.tags:
         eval_and_add_tags('attributes', attributes_regex_map)
@@ -76,12 +121,13 @@ def eval_and_add_tags(tag_set_name, regex_map):
     return attributes_regex_map, iterationvars_regex_map, parameters_regex_map
 
 
-def prepare_evaluation_phase(recipe:Recipe, options, data_repo):
+def prepare_evaluation_phase(recipe:Recipe, options, data_repo, function_definitions_global_env:dict):
     logi(f'prepare_evaluation_phase: {recipe}  {recipe.name}')
 
     if hasattr(recipe.evaluation, 'tags'):
         attributes_regex_map, iterationvars_regex_map, parameters_regex_map = eval_recipe_tag_definitions(recipe \
-                , tag_regex.attributes_regex_map, tag_regex.iterationvars_regex_map, tag_regex.parameters_regex_map)
+                , tag_regex.attributes_regex_map, tag_regex.iterationvars_regex_map, tag_regex.parameters_regex_map
+                , function_definitions_global_env)
     else:
         attributes_regex_map, iterationvars_regex_map, parameters_regex_map = \
                 tag_regex.attributes_regex_map, tag_regex.iterationvars_regex_map, tag_regex.parameters_regex_map
@@ -245,11 +291,19 @@ def process_recipe(options):
     data_repo = {}
     job_list = []
 
+    # Compile all the functions defined in `function_definitions` and make them available in a shallow copy
+    # of the runtime environment.
+    if hasattr(recipe, 'function_definitions'):
+        function_definitions_global_env = evaluate_function_definitions(recipe)
+    else:
+        # Or just use the default environment.
+        function_definitions_global_env = globals()
+
     if not options.plot_only:
         if not hasattr(recipe, 'evaluation'):
             logi('process_recipe: no Evaluation in recipe')
             return
-        data_repo, jobs = prepare_evaluation_phase(recipe, options, data_repo)
+        data_repo, jobs = prepare_evaluation_phase(recipe, options, data_repo, function_definitions_global_env)
         job_list.extend(jobs)
 
     if options.eval_only:

diff --git a/transforms.py b/transforms.py
@@ -20,6 +20,8 @@
 # Import for availability in user-supplied code.
 from common.debug import start_ipython_dbg_cmdline, start_debug  # noqa: F401
 
+from utility.code import ExtraCodeFunctionMixin
+
 
 class Transform(YAMLObject):
     r"""
@@ -84,46 +86,6 @@ def prepare(self):
         return job_list
 
 
-class ExtraCodeFunctionMixin:
-    r"""
-    A mixin class for providing the functionality to compile and evaluate a
-    function and an additional, optional code fragment within a separate global environment.
-    """
-    def eval_function(self, function:Union[Callable, str], extra_code:Optional[str]) -> Callable:
-        r"""
-        Compile and evaluate the given function and an additional, optional
-        code fragment within a separate global environment and return the
-        executable function object.
-
-        Parameters
-        ----------
-        function : Union[Callable, str]
-            The name of the function or a function object.
-
-        extra_code : Optional[str]
-            This can contain additional code for the transform function, such as
-            the definition of a function over multiple lines or split into multiple
-            functions for readibility.
-        """
-        # create a copy of the global environment for evaluating the extra
-        # code fragment so as to not pollute the global namespace itself
-        global_env = globals().copy()
-
-        if isinstance(extra_code, str):
-            # compile the code fragment
-            compiled_extra_code = compile(extra_code, filename='<string>', mode='exec')
-            # actually evaluate the code within the given namespace to allow
-            # access to all the defined symbols, such as helper functions that are not defined inline
-            eval(compiled_extra_code, global_env) # pylint: disable=W0123:eval-used
-
-        if isinstance(function, Callable):
-            evaluated_function = function
-        else:
-            evaluated_function = eval(function, global_env) # pylint: disable=W0123:eval-used
-
-        return evaluated_function
-
-
 class ConcatTransform(Transform, YAMLObject):
     r"""
     A transform for concatenating all DataFrames from the given datasets.
@@ -364,7 +326,7 @@ def process(self, data, attributes) -> pd.DataFrame:
         # extra_code in a separate global namespace.
         # The compilation of the extra code has to happen in the thread/process
         # of the processing worker since code objects can't be serialized.
-        function = self.eval_function(self.function, self.extra_code)
+        function, _ = self.evaluate_function(self.function, self.extra_code)
 
         result = function(data)
 
@@ -445,7 +407,7 @@ def process(self, data, attributes):
             logw(f'ColumnFunctionTransform return is empty!')
             return pd.DataFrame()
 
-        function = self.eval_function(self.function, None)
+        function, _ = self.evaluate_function(self.function, None)
 
         data[self.output_column] = data[self.input_column].apply(function)
 
@@ -552,7 +514,7 @@ def aggregate_frame(self, data):
         # extra_code in a separate global namespace.
         # The compilation of the extra code has to happen in the thread/process
         # of the processing worker since code objects can't be serialized.
-        aggregation_function = self.eval_function(self.aggregation_function, self.extra_code)
+        aggregation_function, _ = self.evaluate_function(self.aggregation_function, self.extra_code)
 
         if len(self.grouping_columns) == 1:
             grouping_columns = self.grouping_columns[0]
@@ -698,7 +660,7 @@ def aggregate_frame(self, data):
         # extra_code in a separate global namespace.
         # The compilation of the extra code has to happen in the thread/process
         # of the processing worker since code objects can't be serialized.
-        transform_function = self.eval_function(self.transform_function, self.extra_code)
+        transform_function, _ = self.evaluate_function(self.transform_function, self.extra_code)
 
         if len(self.grouping_columns) == 1:
             grouping_columns = self.grouping_columns[0]

diff --git a/utility/code.py b/utility/code.py
@@ -0,0 +1,85 @@
+from typing import Any, Callable, Optional, Union
+
+def compile_and_evaluate_function_definition(source:str, function_name:str , global_environment:dict[str, Any]) -> tuple[Callable, dict[str, Any]]:
+    r"""
+    Evaluate the sequence of code statements in the given `source` and return the function object bound to the specified `function_name` name.
+
+    Parameters
+    ----------
+    source : str
+        The source code of the code snippet to compile and evaluate.
+    function_name : str
+        The name of the function to return.
+    global_env : dict
+        The global runtime environment to evaluate the given code in.
+
+    Returns
+    -------
+    Callable
+        The resulting function object.
+    dict
+        The runtime environment of the function.
+
+    Raises
+    ------
+    SyntaxError
+        If the supplied source code has a syntax error.
+    ValueError
+        If the supplied source code contains null bytes.
+    """
+
+    # Compile the sequence of code statements in the given code fragment.
+    compiled_function_code = compile(source, filename='<string>', mode='exec')
+    # Actually evaluate the code within the given namespace to allow
+    # access to all the defined symbols, such as helper functions that are not defined inline.
+    eval(compiled_function_code, global_environment) # pylint: disable=W0123:eval-used
+
+    # Get the function object of the desired function.
+    function = eval(function_name, global_environment) # pylint: disable=W0123:eval-used
+
+    return function, global_environment
+
+
+class ExtraCodeFunctionMixin:
+    r"""
+    A mixin class for providing the functionality to compile and evaluate a
+    function and an additional, optional code fragment within a separate global environment.
+    """
+    def evaluate_function(self, function:Union[Callable, str], extra_code:Optional[str]) -> tuple[Callable, dict[str, Any]]:
+        r"""
+        Compile and evaluate the given function and an additional, optional
+        code fragment within a separate global environment and return the
+        executable function object.
+
+        Parameters
+        ----------
+        function : Union[Callable, str]
+            The name of the function or a function object.
+
+        extra_code : Optional[str]
+            This can contain additional code for the function definition, such as the definition of a function with
+            multiple statements or split into multiple functions for readibility, i.e. every use case where a single
+            simple python statement doesn't suffice.
+
+        Returns
+        -------
+        Callable
+            The resulting function object.
+        dict[str, Any]
+            The runtime environment of the function.
+        """
+        # Create a copy of the global environment for evaluating the extra code fragment so as to not pollute the
+        # global namespace itself.
+        global_env = globals().copy()
+
+        if isinstance(function, str) and isinstance(extra_code, str):
+            function_object, global_env = compile_and_evaluate_function_definition(extra_code, function, global_env)
+            return function_object, global_env
+        elif not isinstance(function, str) and isinstance(function, Callable):
+            evaluated_function = function
+            return evaluated_function, global_env
+        elif isinstance(function, str):
+            evaluated_function = eval(function, global_env) # pylint: disable=W0123:eval-used
+            return evaluated_function, global_env
+        else:
+            raise NotImplementedError(f'Cannot compile: {function=}  {extra_code=}')