Skip to content

Commit

Permalink
Merge pull request #531 from FAIRmat-NFDI/Bug_Attributes_Of_Nonexisti…
Browse files Browse the repository at this point in the history
…ng_Fields

Bug fix for adding attributes to non-existing fields being ignored
  • Loading branch information
GinzburgLev authored Jan 31, 2025
2 parents 666a4ea + ada259b commit d3b15e2
Show file tree
Hide file tree
Showing 7 changed files with 222 additions and 12 deletions.
8 changes: 7 additions & 1 deletion src/pynxtools/dataconverter/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,12 +176,18 @@ def transfer_data_into_template(
for entry_name in entry_names:
helpers.write_nexus_def_to_entry(data, entry_name, nxdl_name)
if not skip_verify:
valid = validate_dict_against(
valid, keys_to_remove = validate_dict_against(
nxdl_name,
data,
ignore_undocumented=ignore_undocumented,
)

# remove attributes that belong to non-existing fields

for key in keys_to_remove:
# data.__delitem__(key)
del data[key]

if fail and not valid:
raise ValidationFailed(
"The data does not match the given NXDL. "
Expand Down
3 changes: 3 additions & 0 deletions src/pynxtools/dataconverter/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class ValidationProblem(Enum):
NXdataMissingSignalData = 17
NXdataMissingAxisData = 18
NXdataAxisMismatch = 19
KeyToBeRemoved = 20


class Collector:
Expand Down Expand Up @@ -139,6 +140,8 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar
logger.warning(
f"Length of axis {path} does not match to {value} in dimension {args[0]}"
)
elif log_type == ValidationProblem.KeyToBeRemoved:
logger.warning(f"The attribute {path} will not be written.")

def collect_and_log(
self,
Expand Down
183 changes: 180 additions & 3 deletions src/pynxtools/dataconverter/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def best_namefit_of(name: str, keys: Iterable[str]) -> Optional[str]:

def validate_dict_against(
appdef: str, mapping: Mapping[str, Any], ignore_undocumented: bool = False
) -> bool:
) -> Tuple[bool, List]:
"""
Validates a mapping against the NeXus tree for applicationd definition `appdef`.
Expand All @@ -183,6 +183,7 @@ def validate_dict_against(
Returns:
bool: True if the mapping is valid according to `appdef`, False otherwise.
List: list of keys in mapping that correspond to attributes of non-existing fields
"""

def get_variations_of(node: NexusNode, keys: Mapping[str, Any]) -> List[str]:
Expand Down Expand Up @@ -550,6 +551,181 @@ def recurse_tree(
return
handling_map.get(child.type, handle_unknown_type)(child, keys, prev_path)

def check_attributes_of_nonexisting_field(
node: NexusNode,
) -> list:
"""
This method runs through the mapping dictionary and checks if there are any
attributes assigned to the fields (not groups!) which are not expicitly
present in the mapping.
If there are any found, a warning is logged and the corresponding items are
added to the list returned by the method.
Args:
node (NexusNode): the tree generated from application definition.
Returns:
list: list of keys in mapping that correspond to attributes of
non-existing fields
"""

keys_to_remove = []

for key in mapping:
last_index = key.rfind("/")
if key[last_index + 1] == "@":
# key is an attribute. Find a corresponding parent, check all the other
# children of this parent
attribute_parent_checked = False
for key_iterating in mapping:
# check if key_iterating starts with parent of the key OR any
# allowed variation of the parent of the key
flag, extra_length = startswith_with_variations(
key_iterating, key[0:last_index]
)
# the variation of the path to the parent might have different length
# than the key itself, extra_length is adjusting for that
if flag:
if len(key_iterating) == last_index + extra_length:
# the parent is a field
attribute_parent_checked = True
break
elif (key_iterating[last_index + extra_length] == "/") and (
key_iterating[last_index + extra_length + 1] != "@"
):
# the parent is a group
attribute_parent_checked = True
break
if not attribute_parent_checked:
type_of_parent_from_tree = check_type_with_tree(
node, key[0:last_index]
)
# The parent can be a group with only attributes as children; check
# in the tree built from app. def. Alternatively, parent can be not
# in the tree and then group with only attributes is indistinguishable
# from missing field. Continue without warnings or changes.
if not (
type_of_parent_from_tree == "group"
or type_of_parent_from_tree is None
):
keys_to_remove.append(key)
collector.collect_and_log(
key[0:last_index],
ValidationProblem.AttributeForNonExistingField,
None,
)
collector.collect_and_log(
key,
ValidationProblem.KeyToBeRemoved,
None,
)
return keys_to_remove

def check_type_with_tree(
node: NexusNode,
path: str,
) -> Optional[str]:
"""
Recursively search for the type of the object from Template
(described by path) using subtree hanging below the node.
The path should be relative to the current node.
Args:
node (NexusNode): the subtree to search in.
path (str): the string addressing the object from Mapping (the Template)
to search the type of.
Returns:
Optional str: type of the object as a string, if the object was found
in the tree, None otherwise.
"""

# already arrived to the object
if path == "":
return node.type
# searching for object among the children of the node
next_child_name_index = path[1:].find("/")
if (
next_child_name_index == -1
): # the whole path from element #1 is the child name
next_child_name_index = (
len(path) - 1
) # -1 because we count from element #1, not #0
next_child_class, next_child_name = split_class_and_name_of(
path[1 : next_child_name_index + 1]
)
if (next_child_class is not None) or (next_child_name is not None):
output = None
for child in node.children:
# regexs to separarte the class and the name from full name of the child
child_class_from_node = re.sub(
r"(\@.*)*(\[.*?\])*(\(.*?\))*([a-z]\_)*(\_[a-z])*[a-z]*\s*",
"",
child.__str__(),
)
child_name_from_node = re.sub(
r"(\@.*)*(\(.*?\))*(.*\[)*(\].*)*\s*",
"",
child.__str__(),
)
if (child_class_from_node == next_child_class) or (
child_name_from_node == next_child_name
):
output_new = check_type_with_tree(
child, path[next_child_name_index + 1 :]
)
if output_new is not None:
output = output_new
return output
else:
return None

def startswith_with_variations(
large_str: str, baseline_str: str
) -> Tuple[bool, int]:
"""
Recursively check if the large_str starts with baseline_str or an allowed
equivalent (i.e. .../AXISNAME[energy]/... matches .../energy/...).
Args:
large_str (str): the string to be checked.
baseline_str (str): the string used as a baseline for comparison.
Returns:
bool: True if large_str starts with baseline_str or equivalent, else False.
int: The combined length difference between baseline_str and the equivalent
part of the large_str.
"""
if len(baseline_str.split("/")) == 1:
# if baseline_str has no separators left, match already found
return (True, 0)
first_token_large_str = large_str.split("/")[1]
first_token_baseline_str = baseline_str.split("/")[1]

remaining_large_str = large_str[len(first_token_large_str) + 1 :]
remaining_baseline_str = baseline_str[len(first_token_baseline_str) + 1 :]
if first_token_large_str == first_token_baseline_str:
# exact match of n-th token
return startswith_with_variations(
remaining_large_str, remaining_baseline_str
)
match_check = re.search(r"\[.*?\]", first_token_large_str)
if match_check is None:
# tokens are different and do not contain []
return (False, 0)
variation_first_token_large = match_check.group(0)[1:-1]
if variation_first_token_large == first_token_baseline_str:
# equivalents match
extra_length_this_step = len(first_token_large_str) - len(
first_token_baseline_str
)
a, b = startswith_with_variations(
remaining_large_str, remaining_baseline_str
)
return (a, b + extra_length_this_step)
# default
return (False, 0)

missing_type_err = {
"field": ValidationProblem.MissingRequiredField,
"group": ValidationProblem.MissingRequiredGroup,
Expand Down Expand Up @@ -593,7 +769,8 @@ def recurse_tree(
not_visited_key, ValidationProblem.MissingDocumentation, None
)

return not collector.has_validation_problems()
keys_to_remove = check_attributes_of_nonexisting_field(tree)
return (not collector.has_validation_problems(), keys_to_remove)


def populate_full_tree(node: NexusNode, max_depth: Optional[int] = 5, depth: int = 0):
Expand Down Expand Up @@ -627,4 +804,4 @@ def populate_full_tree(node: NexusNode, max_depth: Optional[int] = 5, depth: int
def validate_data_dict(
_: Mapping[str, Any], read_data: Mapping[str, Any], root: ET._Element
) -> bool:
return validate_dict_against(root.attrib["name"], read_data)
return validate_dict_against(root.attrib["name"], read_data)[0]
2 changes: 1 addition & 1 deletion src/pynxtools/testing/nexus_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def convert_to_nexus(
with self.caplog.at_level(caplog_level):
_ = validate_dict_against(
self.nxdl, read_data, ignore_undocumented=ignore_undocumented
)
)[0]
assert self.caplog.text == ""

add_default_root_attributes(
Expand Down
6 changes: 3 additions & 3 deletions tests/dataconverter/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ def test_validate_data_dict(caplog, data_dict, error_message, request):
"required-field-provided-in-variadic-optional-group",
):
with caplog.at_level(logging.WARNING):
assert validate_dict_against("NXtest", data_dict)
assert validate_dict_against("NXtest", data_dict)[0]
assert caplog.text == ""
# Missing required fields caught by logger with warning
elif request.node.callspec.id in (
Expand All @@ -548,11 +548,11 @@ def test_validate_data_dict(caplog, data_dict, error_message, request):
):
assert "" == caplog.text
captured_logs = caplog.records
assert not validate_dict_against("NXtest", data_dict)
assert not validate_dict_against("NXtest", data_dict)[0]
assert any(error_message in rec.message for rec in captured_logs)
else:
with caplog.at_level(logging.WARNING):
assert not validate_dict_against("NXtest", data_dict)
assert not validate_dict_against("NXtest", data_dict)[0]

assert error_message in caplog.text

Expand Down
2 changes: 1 addition & 1 deletion tests/dataconverter/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,6 @@ def test_has_correct_read_func(reader, caplog):
with caplog.at_level(logging.WARNING):
validate_dict_against(
supported_nxdl, read_data, ignore_undocumented=True
)
)[0]

print(caplog.text)
30 changes: 27 additions & 3 deletions tests/dataconverter/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def get_data_dict():
return {
"/ENTRY[my_entry]/optional_parent/required_child": 1,
"/ENTRY[my_entry]/optional_parent/optional_child": 1,
"/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value_no_attr": 2.0,
"/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value": 2.0,
"/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value/@units": "nm",
"/ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value": True,
Expand Down Expand Up @@ -90,18 +91,41 @@ def alter_dict(new_values: Dict[str, Any], data_dict: Dict[str, Any]) -> Dict[st
pytest.param(get_data_dict(), id="valid-unaltered-data-dict"),
pytest.param(
remove_from_dict(
"/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", get_data_dict()
"/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value_no_attr",
get_data_dict(),
),
id="removed-optional-value",
),
],
)
def test_valid_data_dict(caplog, data_dict):
with caplog.at_level(logging.WARNING):
validate_dict_against("NXtest", data_dict)
assert validate_dict_against("NXtest", data_dict)[0]
assert caplog.text == ""


@pytest.mark.parametrize(
"data_dict, error_message_1, error_message_2",
[
pytest.param(
remove_from_dict(
"/ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value", get_data_dict()
),
"The attribute /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value/@units will not be written.",
"There were attributes set for the field /ENTRY[my_entry]/NXODD_name[nxodd_name]/float_value, but the field does not exist.",
id="removed-optional-value-with-attribute-remaining",
),
],
)
def test_data_dict_attr_with_no_field(
caplog, data_dict, error_message_1, error_message_2
):
with caplog.at_level(logging.WARNING):
assert not validate_dict_against("NXtest", data_dict)[0]
assert error_message_1 in caplog.text
assert error_message_2 in caplog.text


@pytest.mark.parametrize(
"data_dict, error_message",
[
Expand All @@ -116,6 +140,6 @@ def test_valid_data_dict(caplog, data_dict):
)
def test_validation_shows_warning(caplog, data_dict, error_message):
with caplog.at_level(logging.WARNING):
assert not validate_dict_against("NXtest", data_dict)
assert not validate_dict_against("NXtest", data_dict)[0]

assert error_message in caplog.text

0 comments on commit d3b15e2

Please sign in to comment.