Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add function definitions section to the recipe #18

Merged
merged 5 commits into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions docs/source/utility.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,15 @@ Utility
:undoc-members:
:show-inheritance:

.. .. automodule:: utility.singleton
.. :members:
.. :undoc-members:
.. :show-inheritance:
.. automodule:: utility.code
:members:
:undoc-members:
:show-inheritance:

.. automodule:: utility.filesystem
:members:
:undoc-members:
:show-inheritance:

.. .. automodule:: utility.registry
.. :members:
Expand Down
16 changes: 5 additions & 11 deletions plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from data_io import DataSet, read_from_file
from extractors import BaseExtractor, DataAttributes

from utility.code import ExtraCodeFunctionMixin
from utility.filesystem import check_file_access_permissions

# Import for availability in user-supplied code.
Expand Down Expand Up @@ -103,7 +104,7 @@ def prepare(self):
return [(convert_columns_result, DataAttributes())]


class PlottingTask(YAMLObject):
class PlottingTask(YAMLObject, ExtraCodeFunctionMixin):
r"""
Generate a plot from the given data.

Expand Down Expand Up @@ -534,16 +535,9 @@ def set_theme(self, context:str = 'paper', axes_style:str = 'dark'):
sb.set_theme(context=self.context, style=self.axes_style)

def eval_grid_transform(self):
# Create a copy of the global environment for evaluating the extra
# code fragment so as to not pollute the global namespace itself.
global_env = globals().copy()

# Compile the code fragment
compiled_grid_transform = compile(self.grid_transform, filename='<string>', mode='exec')
# Actually evaluate the code within the given namespace to allow
# access to all the defined symbols, such as helper functions that are not defined inline.
eval(compiled_grid_transform, global_env)
grid_transform = eval('grid_transform', global_env)
# Compile and evaluate the code fragment in a shallow copy of the global environment.
grid_transform, global_environment = self.evaluate_function(function='grid_transform'
, extra_code=self.grid_transform)

return grid_transform

Expand Down
74 changes: 64 additions & 10 deletions run_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,21 +50,66 @@

import tag_regular_expressions as tag_regex

# import debug helper for usage in function definitions
from common.debug import start_debug # noqa: F401recipe import Recipe

from utility.code import compile_and_evaluate_function_definition

_debug = False


def eval_recipe_tag_definitions(recipe, attributes_regex_map, iterationvars_regex_map, parameters_regex_map):
def evaluate_function_definitions(recipe:Recipe):
r"""
Evaluate the functions defined in `recipe.function_definitions` and make the compiled code available in a copy of
the global environment of the interpreter.
A copy of the global environment is used so as to not pollute the global namespace itself. All the defined functions share the same copy.

Parameters
----------
recipe: Recipe
The recipe in which the functions are to be made available.

Returns
-------
A shallow copy of the global environment with the compiled functions as members, i.e. a dictionary with the
function names as key and the code as associated value.
"""
# Create a copy of the global environment for evaluating the extra code fragments so as to not pollute the global
# namespace itself.
#
# NOTE: This is a shallow copy and thus a rather simple method to prevent accidental overwrites, *not* a defense
# against deliberately malicious modifications.
global_env = globals().copy()

for function_name in recipe.function_definitions:
# Get the string with the source code.
function_code = recipe.function_definitions[function_name]
# Actually evaluate the code within the given namespace to allow
# access to all the defined symbols, such as helper functions that are not defined inline.
function, global_env = compile_and_evaluate_function_definition(function_code, function_name, global_env)
# Bind the function to the specified name.
recipe.function_definitions[function_name] = function

# Return the environment with the compiled functions.
return global_env


def eval_recipe_tag_definitions(recipe:Recipe
, attributes_regex_map, iterationvars_regex_map, parameters_regex_map
, function_definitions_global_env:dict
):
def eval_and_add_tags(tag_set_name, regex_map):
for tag_name in recipe.evaluation.tags[tag_set_name]:
tag_list = eval(recipe.evaluation.tags[tag_set_name][tag_name]) # pylint: disable=eval-used
logd(f'{tag_name=} {tag_list=}')
evaluated_tag_list = []
# The `eval` is necessary here since the `transform` function of the tag
# can be an arbitrary function and has to be parsed into a`Callable`.
tag_list = eval(recipe.evaluation.tags[tag_set_name][tag_name], function_definitions_global_env) # pylint: disable=W0123:eval-used

# Check that the transform is indeed a `Callable` .
for tag in tag_list:
if not isinstance(tag['transform'], Callable):
tag['transform'] = eval(tag['transform']) # pylint: disable=eval-used
evaluated_tag_list.append(tag)
raise RuntimeError(f'transform for {tag=} is not a Callable!')

regex_map[tag_name] = evaluated_tag_list
regex_map[tag_name] = tag_list

if 'attributes' in recipe.evaluation.tags:
eval_and_add_tags('attributes', attributes_regex_map)
Expand All @@ -76,12 +121,13 @@ def eval_and_add_tags(tag_set_name, regex_map):
return attributes_regex_map, iterationvars_regex_map, parameters_regex_map


def prepare_evaluation_phase(recipe:Recipe, options, data_repo):
def prepare_evaluation_phase(recipe:Recipe, options, data_repo, function_definitions_global_env:dict):
logi(f'prepare_evaluation_phase: {recipe} {recipe.name}')

if hasattr(recipe.evaluation, 'tags'):
attributes_regex_map, iterationvars_regex_map, parameters_regex_map = eval_recipe_tag_definitions(recipe \
, tag_regex.attributes_regex_map, tag_regex.iterationvars_regex_map, tag_regex.parameters_regex_map)
, tag_regex.attributes_regex_map, tag_regex.iterationvars_regex_map, tag_regex.parameters_regex_map
, function_definitions_global_env)
else:
attributes_regex_map, iterationvars_regex_map, parameters_regex_map = \
tag_regex.attributes_regex_map, tag_regex.iterationvars_regex_map, tag_regex.parameters_regex_map
Expand Down Expand Up @@ -245,11 +291,19 @@ def process_recipe(options):
data_repo = {}
job_list = []

# Compile all the functions defined in `function_definitions` and make them available in a shallow copy
# of the runtime environment.
if hasattr(recipe, 'function_definitions'):
function_definitions_global_env = evaluate_function_definitions(recipe)
else:
# Or just use the default environment.
function_definitions_global_env = globals()

if not options.plot_only:
if not hasattr(recipe, 'evaluation'):
logi('process_recipe: no Evaluation in recipe')
return
data_repo, jobs = prepare_evaluation_phase(recipe, options, data_repo)
data_repo, jobs = prepare_evaluation_phase(recipe, options, data_repo, function_definitions_global_env)
job_list.extend(jobs)

if options.eval_only:
Expand Down
50 changes: 6 additions & 44 deletions transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
# Import for availability in user-supplied code.
from common.debug import start_ipython_dbg_cmdline, start_debug # noqa: F401

from utility.code import ExtraCodeFunctionMixin


class Transform(YAMLObject):
r"""
Expand Down Expand Up @@ -84,46 +86,6 @@ def prepare(self):
return job_list


class ExtraCodeFunctionMixin:
r"""
A mixin class for providing the functionality to compile and evaluate a
function and an additional, optional code fragment within a separate global environment.
"""
def eval_function(self, function:Union[Callable, str], extra_code:Optional[str]) -> Callable:
r"""
Compile and evaluate the given function and an additional, optional
code fragment within a separate global environment and return the
executable function object.

Parameters
----------
function : Union[Callable, str]
The name of the function or a function object.

extra_code : Optional[str]
This can contain additional code for the transform function, such as
the definition of a function over multiple lines or split into multiple
functions for readibility.
"""
# create a copy of the global environment for evaluating the extra
# code fragment so as to not pollute the global namespace itself
global_env = globals().copy()

if isinstance(extra_code, str):
# compile the code fragment
compiled_extra_code = compile(extra_code, filename='<string>', mode='exec')
# actually evaluate the code within the given namespace to allow
# access to all the defined symbols, such as helper functions that are not defined inline
eval(compiled_extra_code, global_env) # pylint: disable=W0123:eval-used

if isinstance(function, Callable):
evaluated_function = function
else:
evaluated_function = eval(function, global_env) # pylint: disable=W0123:eval-used

return evaluated_function


class ConcatTransform(Transform, YAMLObject):
r"""
A transform for concatenating all DataFrames from the given datasets.
Expand Down Expand Up @@ -364,7 +326,7 @@ def process(self, data, attributes) -> pd.DataFrame:
# extra_code in a separate global namespace.
# The compilation of the extra code has to happen in the thread/process
# of the processing worker since code objects can't be serialized.
function = self.eval_function(self.function, self.extra_code)
function, _ = self.evaluate_function(self.function, self.extra_code)

result = function(data)

Expand Down Expand Up @@ -445,7 +407,7 @@ def process(self, data, attributes):
logw(f'ColumnFunctionTransform return is empty!')
return pd.DataFrame()

function = self.eval_function(self.function, None)
function, _ = self.evaluate_function(self.function, None)

data[self.output_column] = data[self.input_column].apply(function)

Expand Down Expand Up @@ -552,7 +514,7 @@ def aggregate_frame(self, data):
# extra_code in a separate global namespace.
# The compilation of the extra code has to happen in the thread/process
# of the processing worker since code objects can't be serialized.
aggregation_function = self.eval_function(self.aggregation_function, self.extra_code)
aggregation_function, _ = self.evaluate_function(self.aggregation_function, self.extra_code)

if len(self.grouping_columns) == 1:
grouping_columns = self.grouping_columns[0]
Expand Down Expand Up @@ -698,7 +660,7 @@ def aggregate_frame(self, data):
# extra_code in a separate global namespace.
# The compilation of the extra code has to happen in the thread/process
# of the processing worker since code objects can't be serialized.
transform_function = self.eval_function(self.transform_function, self.extra_code)
transform_function, _ = self.evaluate_function(self.transform_function, self.extra_code)

if len(self.grouping_columns) == 1:
grouping_columns = self.grouping_columns[0]
Expand Down
85 changes: 85 additions & 0 deletions utility/code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
from typing import Any, Callable, Optional, Union

def compile_and_evaluate_function_definition(source:str, function_name:str , global_environment:dict[str, Any]) -> tuple[Callable, dict[str, Any]]:
r"""
Evaluate the sequence of code statements in the given `source` and return the function object bound to the specified `function_name` name.

Parameters
----------
source : str
The source code of the code snippet to compile and evaluate.
function_name : str
The name of the function to return.
global_env : dict
The global runtime environment to evaluate the given code in.

Returns
-------
Callable
The resulting function object.
dict
The runtime environment of the function.

Raises
------
SyntaxError
If the supplied source code has a syntax error.
ValueError
If the supplied source code contains null bytes.
"""

# Compile the sequence of code statements in the given code fragment.
compiled_function_code = compile(source, filename='<string>', mode='exec')
# Actually evaluate the code within the given namespace to allow
# access to all the defined symbols, such as helper functions that are not defined inline.
eval(compiled_function_code, global_environment) # pylint: disable=W0123:eval-used

# Get the function object of the desired function.
function = eval(function_name, global_environment) # pylint: disable=W0123:eval-used

return function, global_environment


class ExtraCodeFunctionMixin:
r"""
A mixin class for providing the functionality to compile and evaluate a
function and an additional, optional code fragment within a separate global environment.
"""
def evaluate_function(self, function:Union[Callable, str], extra_code:Optional[str]) -> tuple[Callable, dict[str, Any]]:
r"""
Compile and evaluate the given function and an additional, optional
code fragment within a separate global environment and return the
executable function object.

Parameters
----------
function : Union[Callable, str]
The name of the function or a function object.

extra_code : Optional[str]
This can contain additional code for the function definition, such as the definition of a function with
multiple statements or split into multiple functions for readibility, i.e. every use case where a single
simple python statement doesn't suffice.

Returns
-------
Callable
The resulting function object.
dict[str, Any]
The runtime environment of the function.
"""
# Create a copy of the global environment for evaluating the extra code fragment so as to not pollute the
# global namespace itself.
global_env = globals().copy()

if isinstance(function, str) and isinstance(extra_code, str):
function_object, global_env = compile_and_evaluate_function_definition(extra_code, function, global_env)
return function_object, global_env
elif not isinstance(function, str) and isinstance(function, Callable):
evaluated_function = function
return evaluated_function, global_env
elif isinstance(function, str):
evaluated_function = eval(function, global_env) # pylint: disable=W0123:eval-used
return evaluated_function, global_env
else:
raise NotImplementedError(f'Cannot compile: {function=} {extra_code=}')