refactor(forms): rewrite structure / jexl evaluator

The new structure / jexl evaluator works a bit differently: Instead of trying to replace evaluation contexts during recursive evaluation (for example is_hidden checks), we now have a local JEXL runtime for each field. Also, the JEXL expressions (or their results, rather) are heavily cached and should speed things up significantly. Note this is WIP, many tests are still failing, but many are already succeeding as well. We're trying to keep the test cases 100% unchanged - the only modifications currently are some improved assertion messages, so debugging becomes easier.
projectcaluma · Jan 24, 2025 · 54e63bd · 54e63bd
1 parent b8deeaa
commit 54e63bd
Show file tree

Hide file tree

Showing 8 changed files with 893 additions and 154 deletions.
diff --git a/caluma/caluma_core/jexl.py b/caluma/caluma_core/jexl.py
@@ -167,6 +167,11 @@ def _length_transform(self, value, *options):
             return None
 
     def evaluate(self, expression, context=None):
+        # log.info(
+        #    "JEXL: evaluating expression <<< %s >>> in context: %s",
+        #    str(expression),
+        #    str(dict(context)),
+        # )
         self._expr_stack.append(expression)
         try:
             return super().evaluate(expression, context)

diff --git a/caluma/caluma_form/domain_logic.py b/caluma/caluma_form/domain_logic.py
@@ -1,4 +1,5 @@
 from graphlib import TopologicalSorter
+from logging import getLogger
 from typing import Optional
 
 from django.db import transaction
@@ -12,6 +13,8 @@
 from caluma.caluma_user.models import BaseUser
 from caluma.utils import update_model
 
+log = getLogger(__name__)
+
 
 class BaseLogic:
     @staticmethod
@@ -156,13 +159,24 @@ def post_save(answer: models.Answer) -> models.Answer:
     def update_calc_dependents(answer):
         if not answer.question.calc_dependents:
             return
+        log.debug("update_calc_dependents(%s)", answer)
 
         root_doc = utils.prefetch_document(answer.document.family_id)
         struc = structure.FieldSet(root_doc, root_doc.form)
 
         for question in models.Question.objects.filter(
             pk__in=answer.question.calc_dependents
         ):
+            log.debug(
+                "update_calc_dependents(%s): updating question %s", answer, question.pk
+            )
+            # FIXME: update_or_create_calc_answer() does not properly
+            # deal with table rows: we start recalculating from the root doc,
+            # but if a recalculation was triggered inside a table row, we need to
+            # do *that* recalc properly as well (we search for dependents here, but
+            # if those dependents are in a row doc, we can't find all of them and)
+            # don't properly update them either (get_field() returns None, because the
+            # affected question is not in the root form)
             update_or_create_calc_answer(question, root_doc, struc)
 
     @classmethod

diff --git a/caluma/caluma_form/jexl2.py b/caluma/caluma_form/jexl2.py
@@ -0,0 +1,138 @@
+from collections import ChainMap
+from contextlib import contextmanager
+from functools import partial
+
+from pyjexl.analysis import ValidatingAnalyzer
+
+from caluma.caluma_form.jexl import QuestionMissing
+from caluma.caluma_form.structure2 import BaseField
+
+from ..caluma_core.jexl import (
+    JEXL,
+    ExtractTransformArgumentAnalyzer,
+    ExtractTransformSubjectAnalyzer,
+    ExtractTransformSubjectAndArgumentsAnalyzer,
+)
+from .structure import Field
+
+"""
+Rewrite of the JEXL handling code.
+
+Design principles:
+
+* The JEXL classes do not deal with context switching between questions anymore
+* The QuestionJexl class only sets up the "runtime", any context is used from the
+  structure2 code
+* We only deal with the *evaluation*, no transform/extraction is happening here - that code
+  is mostly fine and doesn't need a rewrite
+* Caching is done by the structure2 code, not here
+* JEXL evaluation happens lazily, but the results are cached.
+"""
+
+
+class QuestionValidatingAnalyzer(ValidatingAnalyzer):
+    def visit_Transform(self, transform):
+        if transform.name == "answer" and not isinstance(transform.subject.value, str):
+            yield f"{transform.subject.value} is not a valid question slug."
+
+        yield from super().visit_Transform(transform)
+
+
+class QuestionJexl2(JEXL):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+        self.current_structure = []
+
+        self.add_transform("answer", self.answer_transform)
+
+    def get_structure(self):
+        return self.current_structure[-1]
+
+    @contextmanager
+    def use_structure(self, context):
+        self.current_structure.append(context)
+        try:
+            yield
+        finally:
+            self.current_structure.pop()
+
+    def answer_transform(self, question_slug, *args):
+        context = self.get_structure()
+        field = context.get_field(question_slug)
+
+        def _default_or_none(self):
+            if len(args):
+                return args[0]
+            return None
+
+        if not field:
+            raise QuestionMissing(
+                f"Question `{question_slug}` could not be found in form {context.get_form()}"
+            )
+            # TODO: should this be an exception? JEXL is referencing a
+            # non-existing *field* not just a missing answer to an existing
+            # field
+            return _default_or_none()
+
+        if field.is_hidden():
+            # Hidden fields *always* return the empty value, even if we have
+            # a default
+            return field.question.empty_value()
+        elif field.is_empty():
+            # not hidden, but empty
+            return _default_or_none()
+
+        return field.get_value()
+
+    def validate(self, expression, **kwargs):
+        return super().validate(expression, QuestionValidatingAnalyzer)
+
+    def extract_referenced_questions(self, expr):
+        transforms = ["answer"]
+        yield from self.analyze(
+            expr, partial(ExtractTransformSubjectAnalyzer, transforms=transforms)
+        )
+
+    def extract_referenced_questions_with_arguments(self, expr):
+        transforms = ["answer"]
+        yield from self.analyze(
+            expr,
+            partial(ExtractTransformSubjectAndArgumentsAnalyzer, transforms=transforms),
+        )
+
+    def extract_referenced_mapby_questions(self, expr):
+        transforms = ["mapby"]
+        yield from self.analyze(
+            expr, partial(ExtractTransformArgumentAnalyzer, transforms=transforms)
+        )
+
+    def _get_referenced_fields(self, field: Field, expr: str):
+        deps = list(self.extract_referenced_questions_with_arguments(expr))
+        referenced_fields = [self._structure.get_field(slug) for slug, _ in deps]
+
+        referenced_slugs = [ref.question.slug for ref in referenced_fields if ref]
+
+        for slug, args in deps:
+            required = len(args) == 0
+            if slug not in referenced_slugs and required:
+                raise QuestionMissing(
+                    f"Question `{slug}` could not be found in form {field.form}"
+                )
+
+        return [field for field in referenced_fields if field]
+
+    def evaluate(self, expr, context: BaseField, raise_on_error=True):
+        try:
+            with self.use_structure(context):
+                # Sadly, some expressions (such as the answer transform)
+                # need the current context but don't regularly have it available.
+                # Therefore we use this context manager so it can do it's job
+                # Also, combine the global context (self.context) with
+                return super().evaluate(
+                    expr, ChainMap(self.context, context.get_context())
+                )
+        except (TypeError, ValueError, ZeroDivisionError, AttributeError):
+            if raise_on_error:
+                raise
+            return None