Skip to content

Commit

Permalink
Merge pull request #5085 from open-formulieren/issue/5084-fix-perform…
Browse files Browse the repository at this point in the history
…ance-form-definition-update

🚑 Fix performance form definition update
  • Loading branch information
sergei-maertens authored Feb 6, 2025
2 parents 7c10f99 + 86b83cc commit 87c96f1
Show file tree
Hide file tree
Showing 3 changed files with 461 additions and 65 deletions.
16 changes: 0 additions & 16 deletions src/openforms/formio/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,22 +177,6 @@ def is_layout_component(component: Component) -> bool:
return False


def component_in_editgrid(configuration: dict, component: dict) -> bool:
# Get all the editgrid components in the configuration
editgrids = []
for comp in iter_components(configuration=configuration):
if comp["type"] == "editgrid":
editgrids.append(comp)

# Check if the component is in the editgrid
for editgrid in editgrids:
for comp in iter_components(configuration=editgrid):
if comp["key"] == component["key"]:
return True

return False


def get_component_datatype(component):
component_type = component["type"]
if component.get("multiple"):
Expand Down
182 changes: 135 additions & 47 deletions src/openforms/forms/models/form_variable.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
from __future__ import annotations

from copy import deepcopy
from typing import TYPE_CHECKING, ClassVar
import itertools
from copy import copy, deepcopy
from typing import ClassVar

from django.db import models, transaction
from django.db.models import CheckConstraint, Q
from django.utils.translation import gettext_lazy as _

from glom import glom
import elasticapm

from openforms.formio.utils import (
component_in_editgrid,
get_component_datatype,
get_component_default_value,
is_layout_component,
Expand All @@ -26,29 +26,42 @@
)
from openforms.variables.utils import check_initial_value

from .form import Form
from .form_definition import FormDefinition

if TYPE_CHECKING:
from .form import Form


EMPTY_PREFILL_PLUGIN = Q(prefill_plugin="")
EMPTY_PREFILL_ATTRIBUTE = Q(prefill_attribute="")
EMPTY_PREFILL_OPTIONS = Q(prefill_options={})
USER_DEFINED = Q(source=FormVariableSources.user_defined)

# these are the attributes that are derived from the matching formio component,
# see FormVariableManager.synchronize_for. Other attributes are relational or
# related to user defined variables (like service fetch, prefill options...).
UPSERT_ATTRIBUTES_TO_COMPARE: tuple[str, ...] = (
"prefill_plugin",
"prefill_attribute",
"prefill_identifier_role",
"name",
"is_sensitive_data",
"data_type",
"initial_value",
)


class FormVariableManager(models.Manager["FormVariable"]):
use_in_migrations = True

@transaction.atomic
def create_for_form(self, form: "Form") -> None:
form_steps = form.formstep_set.select_related("form_definition")
def create_for_form(self, form: Form) -> None:
form_steps = form.formstep_set.select_related( # pyright: ignore[reportAttributeAccessIssue]
"form_definition"
)

for form_step in form_steps:
self.synchronize_for(form_step.form_definition)

@transaction.atomic
@elasticapm.capture_span(span_type="app.core.models")
@transaction.atomic(savepoint=False)
def synchronize_for(self, form_definition: FormDefinition):
"""
Synchronize the form variables for a given form definition.
Expand All @@ -64,32 +77,40 @@ def synchronize_for(self, form_definition: FormDefinition):
"""
# Build the desired state
desired_variables: list[FormVariable] = []
desired_keys: set[str] = set()
# XXX: looping over the configuration_wrapper is not (yet) viable because it
# also yields the components nested inside edit grids, which we need to ignore.
# So, we stick to iter_components. Performance wise this should be okay since we
# only need to do one pass.
# So, we stick to iter_components. We deliberately do a single pass to process
# the formio definition and then "copy" the information for each affected form
# so that we can avoid excessive component tree processing.
configuration = form_definition.configuration
for component in iter_components(configuration=configuration, recursive=True):
for component in iter_components(
configuration=configuration,
recursive=True,
# components inside edit grids are not real variables
recurse_into_editgrid=False,
):
# we need to ignore components that don't actually hold any values - there's
# no point to create variables for those.
if is_layout_component(component):
continue
if component["type"] in ("content", "softRequiredErrors"):
continue
if component_in_editgrid(configuration, component):
continue

# extract options from the component
prefill_plugin = glom(component, "prefill.plugin", default="") or ""
prefill_attribute = glom(component, "prefill.attribute", default="") or ""
prefill_identifier_role = glom(
component, "prefill.identifierRole", default=IdentifierRoles.main
prefill = component.get("prefill", {})
prefill_plugin = prefill.get("plugin") or ""
prefill_attribute = prefill.get("attribute") or ""
prefill_identifier_role = (
prefill.get("identifierRole") or IdentifierRoles.main
)

key = component["key"]
desired_keys.add(key)
desired_variables.append(
self.model(
form=None, # will be set later when visiting all affected forms
key=component["key"],
key=key,
form_definition=form_definition,
prefill_plugin=prefill_plugin,
prefill_attribute=prefill_attribute,
Expand All @@ -102,8 +123,6 @@ def synchronize_for(self, form_definition: FormDefinition):
)
)

desired_keys = [variable.key for variable in desired_variables]

# if the Formio configuration of the form definition itself is updated and
# components have been removed or their keys have changed, we know for certain
# we can discard those old form variables - it doesn't matter which form they
Expand All @@ -113,41 +132,81 @@ def synchronize_for(self, form_definition: FormDefinition):
)
stale_variables.delete()

# check which form (steps) are affected and patch them up. It is irrelevant whether
# Check which forms are affected and patch them up.
# It is irrelevant whether
# the form definition is re-usable or not, though semantically at most one form step
# should be found for single-use form definitions.
# fmt: off
affected_form_steps = (
form_definition
.formstep_set # pyright: ignore[reportAttributeAccessIssue]
.select_related("form")
affected_forms = (
Form.objects.filter(formstep__form_definition=form_definition)
.order_by("id")
.distinct("id")
.values_list("pk", flat=True)
.iterator()
)
# fmt: on

# Finally, collect all the instances and efficiently upsert them - creating missing
# Collect all the instances and efficiently upsert them - creating missing
# variables and updating existing variables in a single query.
to_upsert: list[FormVariable] = []
for step in affected_form_steps:

# We check which form variables actually need to be updated or inserted. If
# naively sending everything to the UPSERT we are sending pointless data to
# Postgres which puts unnecessary load.
existing_form_variables = (
self.filter(form_definition=form_definition).order_by("form_id").iterator()
)
# keep track of (form_id, form_key) tuples that were considered, so that we can
# efficiently decide whether we can ignore it or not based on existing variables.
seen: set[tuple[int, str]] = set()

# first look at form variables that already exist since we can exclude those
# from the upsert
form_variables_by_form = itertools.groupby(
existing_form_variables, key=lambda fv: fv.form_id
)
for form_id, variables in form_variables_by_form:
assert form_id is not None
variables_by_key = {variable.key: variable for variable in variables}

def _add_variable(variable: FormVariable):
form_specific_variable = copy(variable)
form_specific_variable.form_id = form_id
to_upsert.append(form_specific_variable)

# check whether we need to create or update the variable by comparing against
# existing variables.
for desired_variable in desired_variables:
existing_variable = variables_by_key.get(key := desired_variable.key)
seen.add((form_id, key))

if existing_variable is None:
_add_variable(desired_variable)
continue

# otherwise, check if we need to update or can skip this variable to
# make the upsert query smaller
if not existing_variable.matches(desired_variable):
# it needs to be updated
_add_variable(desired_variable)

# Finally, process variables that don't exist yet at all
for form_id in affected_forms:
for variable in desired_variables:
form_specific_variable = deepcopy(variable)
form_specific_variable.form = step.form
_lookup = (form_id, variable.key)
# it already exists and has been processed
if _lookup in seen:
continue

# it doesn't exist and needs to be created
form_specific_variable = copy(variable)
form_specific_variable.form_id = form_id
to_upsert.append(form_specific_variable)

self.bulk_create(
to_upsert,
# enables UPSERT behaviour so that existing records get updated and missing
# records inserted
update_conflicts=True,
update_fields=(
"prefill_plugin",
"prefill_attribute",
"prefill_identifier_role",
"name",
"is_sensitive_data",
"source",
"data_type",
"initial_value",
),
update_fields=UPSERT_ATTRIBUTES_TO_COMPARE + ("source",),
unique_fields=("form", "key"),
)

Expand All @@ -159,6 +218,7 @@ class FormVariable(models.Model):
help_text=_("Form to which this variable is related"),
on_delete=models.CASCADE,
)
form_id: int | None
form_definition = models.ForeignKey(
to=FormDefinition,
verbose_name=_("form definition"),
Expand Down Expand Up @@ -303,6 +363,11 @@ class Meta:
def __str__(self):
return _("Form variable '{key}'").format(key=self.key)

def save(self, *args, **kwargs):
self.check_data_type_and_initial_value()

super().save(*args, **kwargs)

@property
def json_schema(self) -> JSONObject | None:
return self._json_schema
Expand Down Expand Up @@ -355,7 +420,30 @@ def check_data_type_and_initial_value(self):
if self.source == FormVariableSources.user_defined:
self.initial_value = check_initial_value(self.initial_value, self.data_type)

def save(self, *args, **kwargs):
self.check_data_type_and_initial_value()
def matches(self, other: FormVariable) -> bool:
"""
Check if the other form variable matches this instance.
super().save(*args, **kwargs)
Matching can only be performed on component variables to check if they are
still in sync. Foreign key relations to form etc. are ignored as this doesn't
contain semantic information.
"""
assert (
self.source == FormVariableSources.component
), "Can only compare component variables"
assert (
other.source == FormVariableSources.component
), "Can only compare component variables"
assert self.key == other.key, (
"Different keys are being compared, are you sure you're comparing "
"the right instances?"
)

# these are the attributes that are derived from the matching formio component,
# see FormVariableManager.synchronize_for. Other attributes are relational or
# related to user defined variables (like service fetch, prefill options...).
all_equal = all(
getattr(self, attr) == getattr(other, attr)
for attr in UPSERT_ATTRIBUTES_TO_COMPARE
)
return all_equal
Loading

0 comments on commit 87c96f1

Please sign in to comment.