diff --git a/.github/workflows/plugin_test.yaml b/.github/workflows/plugin_test.yaml index 39c986a6d..dcbf70135 100644 --- a/.github/workflows/plugin_test.yaml +++ b/.github/workflows/plugin_test.yaml @@ -61,9 +61,12 @@ jobs: repository: FAIRmat-NFDI/${{ matrix.plugin }} path: ${{ matrix.plugin }} ref: ${{ matrix.branch }} + - name: Install nomad + run: | + uv pip install --system nomad-lab@git+https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR.git@pluginise-nexus-code - name: Install ${{ matrix.plugin }} run: | - cd ${{ matrix.plugin }} + cd ${{ matrix.plugin }} uv pip install --system . - name: Run ${{ matrix.plugin }} tests run: | diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index f49cda114..79ee5450c 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -30,9 +30,15 @@ jobs: run: | curl -LsSf https://astral.sh/uv/install.sh | sh uv pip install --system coverage coveralls - - name: Install package + - name: Install pynx without nomad plugin + if: "${{ matrix.python_version == '3.8' }}" run: | uv pip install --system ".[dev]" + - name: Install pynx with nomad plugin + if: "${{ matrix.python_version != '3.8' }}" + run: | + uv pip install --system nomad-lab@git+https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR.git@pluginise-nexus-code + uv pip install --system ".[dev]" - name: Test with pytest run: | coverage run -m pytest -sv --show-capture=no tests diff --git a/README.md b/README.md index e37c9fe9a..60def5dcb 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ pip install git+https://github.com/FAIRmat-NFDI/pynxtools.git `pynxtools` (previously called `nexusutils`) is intended as a parser for combining various instrument output formats and electronic lab notebook (ELN) formats to an hdf5 file according to NeXus application definitions. -Additionally, the software is used in the research data management system NOMAD for +Additionally, the software can be used as a plugin in the research data management system NOMAD for making experimental data searchable and publishable. NOMAD is developed by the FAIRMAT consortium, as a part of the German National Research Data Infrastructure (NFDI). @@ -49,6 +49,13 @@ data into the NeXus standard and visualising the files content. - [**read_nexus**](https://github.com/FAIRmat-NFDI/pynxtools/blob/master/src/pynxtools/nexus/README.md): Outputs a debug log for a given NeXus file. - [**generate_eln**](https://github.com/FAIRmat-NFDI/pynxtools/blob/master/src/pynxtools/eln_mapper/README.md): Outputs ELN files that can be used to add metadata to the dataconverter routine. +# NOMAD integration + +To use pynxtools with NOMAD, simply install it in the same environment as the `nomad-lab` package. +NOMAD will recognize pynxtools as a plugin automatically and offer automatic parsing of `.nxs` files +and a schema for NeXus application definitions. +pynxtools is already included in the NOMAD main deployment and NOMAD NeXus distribution images. + # Documentation Documentation for the different tools can be found [here](https://fairmat-nfdi.github.io/pynxtools/). @@ -111,7 +118,7 @@ on how to build on this work, or to get your parser included into NOMAD, you can ### Does this software require NOMAD or NOMAD OASIS ? -No. The data files produced here can be uploaded to Nomad. Therefore, this acts like the framework to design schemas and instances of data within the NeXus universe. +No. The data files produced here can be uploaded to Nomad. Therefore, this acts like the framework to design schemas and instances of data within the NeXus universe. It can, however, be used as a NOMAD plugin to parse nexus files, please see the section above for details. # Troubleshooting diff --git a/src/pynxtools/_build_wrapper.py b/_build_wrapper.py similarity index 93% rename from src/pynxtools/_build_wrapper.py rename to _build_wrapper.py index ababdb96f..6e2dac695 100644 --- a/src/pynxtools/_build_wrapper.py +++ b/_build_wrapper.py @@ -28,7 +28,9 @@ def get_vcs_version(tag_match="*[0-9]*") -> Optional[str]: "--match", tag_match, ], - cwd=os.path.join(os.path.dirname(__file__), "../pynxtools/definitions"), + cwd=os.path.join( + os.path.dirname(__file__), "src/pynxtools/definitions" + ), check=True, capture_output=True, ) diff --git a/dev-requirements.txt b/dev-requirements.txt index f318f68fe..2001db8ea 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -21,11 +21,7 @@ click==8.1.7 click-default-group==1.2.4 # via pynxtools (pyproject.toml) colorama==0.4.6 - # via - # click - # mkdocs - # mkdocs-material - # pytest + # via mkdocs-material contourpy==1.2.0 # via matplotlib coverage==7.4.4 @@ -59,7 +55,7 @@ jinja2==3.1.3 # mkdocs-material kiwisolver==1.4.5 # via matplotlib -lxml==5.1.0 +lxml==5.2.2 # via pynxtools (pyproject.toml) markdown==3.6 # via @@ -98,7 +94,7 @@ mypy-extensions==1.0.0 # via mypy nodeenv==1.8.0 # via pre-commit -numpy==1.26.4 +numpy==1.22.4 # via # pynxtools (pyproject.toml) # ase @@ -116,7 +112,7 @@ packaging==24.0 # xarray paginate==0.5.6 # via mkdocs-material -pandas==2.2.1 +pandas==1.5.3 # via # pynxtools (pyproject.toml) # xarray @@ -174,7 +170,9 @@ ruff==0.3.4 scipy==1.12.0 # via ase setuptools==70.0.0 - # via nodeenv + # via + # pynxtools (pyproject.toml) + # nodeenv six==1.16.0 # via # anytree @@ -196,8 +194,6 @@ types-requests==2.31.0.20240311 # via pynxtools (pyproject.toml) typing-extensions==4.10.0 # via mypy -tzdata==2024.1 - # via pandas urllib3==2.2.1 # via # requests @@ -208,7 +204,7 @@ virtualenv==20.25.1 # via pre-commit watchdog==4.0.0 # via mkdocs -xarray==2024.2.0 +xarray==2023.12.0 # via pynxtools (pyproject.toml) zipp==3.18.1 # via importlib-metadata diff --git a/pyproject.toml b/pyproject.toml index 8d9bdcdd2..39441f537 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [build-system] requires = ["setuptools>=64.0.1", "setuptools-scm[toml]>=6.2"] -backend-path = ["src/pynxtools"] +backend-path = ["."] build-backend = "_build_wrapper" [project] @@ -35,7 +35,6 @@ dependencies = [ "importlib-metadata", "lxml>=4.9.1", "anytree", - "setuptools>=64.0.1" ] [project.urls] @@ -95,6 +94,7 @@ ellips = [ [project.entry-points.'nomad.plugin'] nexus_parser = "pynxtools.nomad.entrypoints:nexus_parser" nexus_schema = "pynxtools.nomad.entrypoints:nexus_schema" +nexus_data_converter = "pynxtools.nomad.entrypoints:nexus_data_converter" [project.scripts] read_nexus = "pynxtools.nexus.nexus:main" diff --git a/src/pynxtools/__init__.py b/src/pynxtools/__init__.py index 52a749e5b..1b7a222f1 100644 --- a/src/pynxtools/__init__.py +++ b/src/pynxtools/__init__.py @@ -19,13 +19,42 @@ import os import re from datetime import datetime +from subprocess import CalledProcessError, run +from typing import Optional -from pynxtools._build_wrapper import get_vcs_version from pynxtools.definitions.dev_tools.globals.nxdl import get_nxdl_version MAIN_BRANCH_NAME = "fairmat" +def get_vcs_version(tag_match="*[0-9]*") -> Optional[str]: + """ + The version of the Nexus standard and the NeXus Definition language + based on git tags and commits + """ + try: + return ( + run( + [ + "git", + "describe", + "--dirty", + "--tags", + "--long", + "--match", + tag_match, + ], + cwd=os.path.join(os.path.dirname(__file__), "../pynxtools/definitions"), + check=True, + capture_output=True, + ) + .stdout.decode("utf-8") + .strip() + ) + except (FileNotFoundError, CalledProcessError): + return None + + def _build_version(tag: str, distance: int, node: str, dirty: bool) -> str: """ Builds the version string for a given set of git states. diff --git a/src/pynxtools/dataconverter/helpers.py b/src/pynxtools/dataconverter/helpers.py index f221d6826..14cc92c6e 100644 --- a/src/pynxtools/dataconverter/helpers.py +++ b/src/pynxtools/dataconverter/helpers.py @@ -892,6 +892,12 @@ def update_and_warn(key: str, value: str, overwrite=False): get_nexus_version(), overwrite=False, ) + update_and_warn( + f"/ENTRY[{entry_name}]/definition/@URL", + "https://github.com/FAIRmat-NFDI/nexus_definitions/" + f"blob/{get_nexus_version_hash()}", + overwrite=False, + ) def extract_atom_types(formula, mode="hill"): diff --git a/src/pynxtools/dataconverter/nexus_tree.py b/src/pynxtools/dataconverter/nexus_tree.py index a631d7b4d..bbba22c09 100644 --- a/src/pynxtools/dataconverter/nexus_tree.py +++ b/src/pynxtools/dataconverter/nexus_tree.py @@ -37,7 +37,6 @@ from pynxtools.dataconverter.helpers import ( contains_uppercase, get_all_parents_for, - get_nxdl_name_for, get_nxdl_root_and_path, is_appdef, remove_namespace_from_tag, @@ -134,6 +133,14 @@ class NexusNode(NodeMixin): for a tree, i.e., setting the parent of a node is enough to add it to the tree and to its parent's children. For the root this is None. + is_a: List["NexusNode"]: + A list of NexusNodes the current node represents. + This is used for attaching siblings to the current node, e.g., + if the parent appdef has a field `DATA(NXdata)` and the current appdef + has a field `my_data(NXdata)` the relation `my_data` `is_a` `DATA` is set. + parent_of: List["NexusNode"]: + The inverse of the above `is_a`. In the example case + `DATA` `parent_of` `my_data`. """ name: str @@ -217,37 +224,68 @@ def get_path(self) -> str: current_node = current_node.parent return "/" + "/".join(names) - def search_child_with_name( - self, names: Union[Tuple[str, ...], str] + def search_add_child_for_multiple( + self, names: Tuple[str, ...] ) -> Optional["NexusNode"]: """ - This searches a child or children with `names` in the current node. + Searchs and adds a child with one of the names in `names` to the current node. + This calls `search_add_child_for` repeatedly until a child is found. + The found child is then returned. + + Args: + name (Tuple[str, ...]): + A tuple of names of the child to search for. + + Returns: + Optional["NexusNode"]: + The first matching NexusNode for the child name. + If no child is found at all None is returned. + """ + for name in names: + child = self.search_add_child_for(name) + if child is not None: + return child + return None + + def search_add_child_for(self, name: str) -> Optional["NexusNode"]: + """ + This searches a child with name `name` in the current node. If the child is not found as a direct child, it will search in the inheritance chain and add the child to the tree. Args: - names (Union[Tuple[str, ...], str]): - Either a single string or a tuple of string. - In case this is a string the child with the specific name is searched. - If it is a tuple, the first match is used. + name (str): + Name of the child to search for. Returns: Optional[NexusNode]: The node of the child which was added. None if no child was found. """ - if isinstance(names, str): - names = (names,) - for name in names: - direct_child = next((x for x in self.children if x.name == name), None) - if direct_child is not None: - return direct_child - if name in self.get_all_direct_children_names(): - return self.add_inherited_node(name) + tags = ( + "*[self::nx:field or self::nx:group " + "or self::nx:attribute or self::nx:choice]" + ) + for elem in self.inheritance: + xml_elem = elem.xpath( + f"{tags}[@name='{name}']", + namespaces=namespaces, + ) + if not xml_elem and name.isupper(): + xml_elem = elem.xpath( + f"{tags}[@type='NX{name.lower()}' and not(@name)]", + namespaces=namespaces, + ) + if not xml_elem: + continue + existing_child = self.get_child_for(xml_elem[0]) + if existing_child is None: + return self.add_node_from(xml_elem[0]) + return existing_child return None - def get_children_for(self, xml_elem: ET._Element) -> Optional["NexusNode"]: + def get_child_for(self, xml_elem: ET._Element) -> Optional["NexusNode"]: """ - Get the children of the current node which matches xml_elem. + Get the child of the current node, which matches xml_elem. Args: xml_elem (ET._Element): The xml element to search in the children. @@ -257,7 +295,10 @@ def get_children_for(self, xml_elem: ET._Element) -> Optional["NexusNode"]: The NexusNode containing the children. None if there is no initialised children for the xml_node. """ - return next((x for x in self.children if x.inheritance[0] == xml_elem), None) + for child in self.children: + if child.inheritance and child.inheritance[0] == xml_elem: + return child + return None def get_all_direct_children_names( self, @@ -618,22 +659,17 @@ def _check_sibling_namefit(self): if get_nx_namefit(self.name, sibling_name) < 0: continue - sibling_node = self.parent.get_children_for(sibling) + sibling_node = self.parent.get_child_for(sibling) if sibling_node is None: sibling_node = self.parent.add_node_from(sibling) self.is_a.append(sibling_node) sibling_node.parent_of.append(self) min_occurs = ( - (1 if self.optionality == "required" else 0) + (1 if sibling_node.optionality == "required" else 0) if sibling_node.occurrence_limits[0] is None else sibling_node.occurrence_limits[0] ) - min_occurs = ( - 1 - if self.optionality == "required" and min_occurs < 1 - else min_occurs - ) required_children = reduce( lambda x, y: x + (1 if y.optionality == "required" else 0), @@ -641,8 +677,15 @@ def _check_sibling_namefit(self): 0, ) - if required_children >= min_occurs: - self.optionality = "optional" + if ( + sibling_node.optionality == "required" + and required_children >= min_occurs + ): + sibling_node.optionality = "optional" + break + else: + continue + break def _set_occurence_limits(self): """ @@ -817,7 +860,7 @@ def populate_tree_from_parents(node: NexusNode): The current node from which to populate the tree. """ for child in node.get_all_direct_children_names(only_appdef=True): - child_node = node.search_child_with_name(child) + child_node = node.search_add_child_for(child) populate_tree_from_parents(child_node) diff --git a/src/pynxtools/dataconverter/validation.py b/src/pynxtools/dataconverter/validation.py index 2f100a9d7..63a715f6a 100644 --- a/src/pynxtools/dataconverter/validation.py +++ b/src/pynxtools/dataconverter/validation.py @@ -25,7 +25,6 @@ import h5py import lxml.etree as ET import numpy as np -from anytree import Resolver from pynxtools.dataconverter.helpers import ( Collector, @@ -236,14 +235,14 @@ def check_nxdata(): # if the concept for signal is already defined in the appdef # TODO: This appends the base class multiple times # it should be done only once - data_node = node.search_child_with_name((signal, "DATA")) - data_bc_node = node.search_child_with_name("DATA") + data_node = node.search_add_child_for_multiple((signal, "DATA")) + data_bc_node = node.search_add_child_for("DATA") data_node.inheritance.append(data_bc_node.inheritance[0]) for child in data_node.get_all_direct_children_names(): - data_node.search_child_with_name(child) + data_node.search_add_child_for(child) handle_field( - node.search_child_with_name((signal, "DATA")), + node.search_add_child_for_multiple((signal, "DATA")), keys, prev_path=prev_path, ) @@ -268,14 +267,14 @@ def check_nxdata(): # if the concept for the axis is already defined in the appdef # TODO: This appends the base class multiple times # it should be done only once - axis_node = node.search_child_with_name((axis, "AXISNAME")) - axis_bc_node = node.search_child_with_name("AXISNAME") + axis_node = node.search_add_child_for_multiple((axis, "AXISNAME")) + axis_bc_node = node.search_add_child_for("AXISNAME") axis_node.inheritance.append(axis_bc_node.inheritance[0]) for child in axis_node.get_all_direct_children_names(): - axis_node.search_child_with_name(child) + axis_node.search_add_child_for(child) handle_field( - node.search_child_with_name((axis, "AXISNAME")), + node.search_add_child_for_multiple((axis, "AXISNAME")), keys, prev_path=prev_path, ) @@ -515,7 +514,7 @@ def is_documented(key: str, node: NexusNode) -> bool: if best_name is None: return False - node = node.search_child_with_name(best_name) + node = node.search_add_child_for(best_name) if isinstance(mapping[key], dict) and "link" in mapping[key]: # TODO: Follow link and check consistency with current field @@ -620,7 +619,7 @@ def populate_full_tree(node: NexusNode, max_depth: Optional[int] = 5, depth: int # be fixed. return for child in node.get_all_direct_children_names(): - child_node = node.search_child_with_name(child) + child_node = node.search_add_child_for(child) populate_full_tree(child_node, max_depth=max_depth, depth=depth + 1) diff --git a/src/pynxtools/definitions b/src/pynxtools/definitions index 0682943ba..a4a31dfaa 160000 --- a/src/pynxtools/definitions +++ b/src/pynxtools/definitions @@ -1 +1 @@ -Subproject commit 0682943baaef54d4a6386b5433f9721af6d3d81b +Subproject commit a4a31dfaac1c153d0ba49be7ba388404d13d9eea diff --git a/src/pynxtools/nexus-version.txt b/src/pynxtools/nexus-version.txt index 810c811ae..4cdee0882 100644 --- a/src/pynxtools/nexus-version.txt +++ b/src/pynxtools/nexus-version.txt @@ -1 +1 @@ -v2022.07-1199-g0682943ba \ No newline at end of file +v2022.07-1204-ga4a31dfaa \ No newline at end of file diff --git a/src/pynxtools/nomad/dataconverter.py b/src/pynxtools/nomad/dataconverter.py new file mode 100644 index 000000000..b34b10b2b --- /dev/null +++ b/src/pynxtools/nomad/dataconverter.py @@ -0,0 +1,240 @@ +import os.path +import re +from typing import Optional + +import numpy as np +import yaml + +try: + from nomad.datamodel.data import EntryData + from nomad.metainfo import MEnum, Package, Quantity + from nomad.units import ureg +except ImportError as exc: + raise ImportError( + "Could not import nomad package. Please install the package 'nomad-lab'." + ) from exc + +from pynxtools.dataconverter import convert as pynxtools_converter +from pynxtools.dataconverter import writer as pynxtools_writer +from pynxtools.dataconverter.template import Template +from pynxtools.definitions.dev_tools.utils.nxdl_utils import ( + get_app_defs_names, # pylint: disable=import-error +) + +m_package = Package(name="nexus_data_converter") + + +def create_eln_dict(archive): + def transform(quantity_def, section, value, path): + if quantity_def.unit: + Q_ = ureg.Quantity + val_unit = Q_(value, quantity_def.unit) + + default_display_unit = quantity_def.m_annotations.get( + "eln", {"defaultDisplayUnit": None} + ).defaultDisplayUnit + if default_display_unit: + val_unit = val_unit.to(default_display_unit) + + return dict( + value=val_unit.magnitude.tolist() + if isinstance(val_unit.magnitude, np.ndarray) + else val_unit.magnitude, + unit=str(format(val_unit.units, "~")), + ) + return value + + def exclude(quantity_def, section): + return quantity_def.name in ("reader", "input_files", "output", "nxdl") + + eln_dict = archive.m_to_dict(transform=transform, exclude=exclude) + del eln_dict["data"]["m_def"] + + return eln_dict + + +def write_yaml(archive, filename, eln_dict): + with archive.m_context.raw_file(filename, "w") as eln_file: + yaml.dump(eln_dict["data"], eln_file, allow_unicode=True) + + +def populate_nexus_subsection( + template: "Template", + app_def: str, + archive, + logger, + output_file_path: Optional[str] = None, + on_temp_file=False, +): + """Populate nexus subsection in nomad from nexus template. + + There are three ways to populate nexus subsection from nexus template. + 1. First it writes a nexus file (.nxs), then the nexus subsectoin will be populated from + that file. + 2. First it write the data in hdf5 datamodel (in a file in memory), later the nexus + subsection will be populated from that in-memory file. + 3. (This is not yet done.) It directly poulate the nexus subsection from the template. + + Args: + template: Nexus template. + app_def: Name of application def NXxrd_pan. + archive: AntryArchive section. + output_file_path: Output file should be a relative path not absolute path. + logger: nomad logger. + on_temp_file: Whether data will be written in temporary disk, by default False. + + Raises: + Exception: could not trigger processing from NexusParser + Exception: could not trigger processing from NexusParser + """ + _, nxdl_f_path = pynxtools_converter.helpers.get_nxdl_root_and_path(app_def) + + # Writing nxs file, parse and populate NeXus subsection: + if output_file_path: + archive.data.output = os.path.join( + archive.m_context.raw_path(), output_file_path + ) + pynxtools_writer.Writer( + data=template, nxdl_f_path=nxdl_f_path, output_path=archive.data.output + ).write() + try: + from nomad.parsing.nexus.nexus import NexusParser + + nexus_parser = NexusParser() + nexus_parser.parse( + mainfile=archive.data.output, archive=archive, logger=logger + ) + try: + archive.m_context.process_updated_raw_file( + output_file_path, allow_modify=True + ) + except Exception as e: + logger.error( + "could not trigger processing", + mainfile=archive.data.output, + exc_info=e, + ) + raise e + else: + logger.info("triggered processing", mainfile=archive.data.output) + except Exception as e: + logger.error("could not trigger processing", exc_info=e) + raise e + + # Write in temporary file and populate the NeXus section. + elif not output_file_path or on_temp_file: + output_file = "temp_file.nxs" + output_file = os.path.join(archive.m_context.raw_path(), output_file) + logger.info( + "No output NeXus file is found and data is being written temporary file." + ) + try: + pynxtools_writer.Writer( + data=template, nxdl_f_path=nxdl_f_path, output_path=output_file + ).write() + + from nomad.parsing.nexus.nexus import NexusParser + + nexus_parser = NexusParser() + nexus_parser.parse(mainfile=output_file, archive=archive, logger=logger) + # Ensure no local reference with the hdf5file + except Exception as e: + logger.error("could not trigger processing", exc_info=e) + raise e + finally: + if os.path.isfile(output_file): + os.remove(output_file) + + +class ElnYamlConverter(EntryData): + output = Quantity( + type=str, + description="Output yaml file to save all the data. Default: eln_data.yaml", + a_eln=dict(component="StringEditQuantity"), + a_browser=dict(adaptor="RawFileAdaptor"), + default="eln_data.yaml", + ) + + def normalize(self, archive, logger): + super(ElnYamlConverter, self).normalize(archive, logger) + + eln_dict = create_eln_dict(archive) + write_yaml(archive, archive.data.output, eln_dict) + + +class NexusDataConverter(EntryData): + reader = Quantity( + type=MEnum(sorted(list(set(pynxtools_converter.get_names_of_all_readers())))), + description="The reader needed to run the Nexus converter.", + a_eln=dict(component="AutocompleteEditQuantity"), + ) + + nxdl = Quantity( + type=MEnum(sorted(list(set(get_app_defs_names())))), + description="The nxdl needed for running the Nexus converter.", + a_eln=dict(component="AutocompleteEditQuantity"), + ) + + input_files = Quantity( + type=str, + shape=["*"], + description="Input files needed to run the nexus converter.", + a_eln=dict(component="FileEditQuantity"), + a_browser=dict(adaptor="RawFileAdaptor"), + ) + + output = Quantity( + type=str, + description="Output Nexus filename to save all the data. Default: output.nxs", + a_eln=dict(component="StringEditQuantity"), + a_browser=dict(adaptor="RawFileAdaptor"), + default="output.nxs", + ) + + def normalize(self, archive, logger): + super(NexusDataConverter, self).normalize(archive, logger) + + raw_path = archive.m_context.raw_path() + eln_dict = create_eln_dict(archive) + + if archive.data.input_files is None: + archive.data.input_files = [] + + if len(eln_dict["data"]) > 0: + write_yaml(archive, "eln_data.yaml", eln_dict) + + if "eln_data.yaml" not in archive.data.input_files: + archive.data.input_files.append("eln_data.yaml") + + converter_params = { + "reader": archive.data.reader, + "nxdl": re.sub(".nxdl$", "", archive.data.nxdl), + "input_file": [ + os.path.join(raw_path, file) for file in archive.data.input_files + ], + "output": os.path.join(raw_path, archive.data.output), + } + try: + pynxtools_converter.logger = logger + pynxtools_converter.helpers.logger = logger + pynxtools_converter.convert(**converter_params) + except Exception as e: + logger.error( + "could not convert to nxs", mainfile=archive.data.output, exc_info=e + ) + raise e + + try: + archive.m_context.process_updated_raw_file( + archive.data.output, allow_modify=True + ) + except Exception as e: + logger.error( + "could not trigger processing", mainfile=archive.data.output, exc_info=e + ) + raise e + else: + logger.info("triggered processing", mainfile=archive.data.output) + + +m_package.__init_metainfo__() diff --git a/src/pynxtools/nomad/entrypoints.py b/src/pynxtools/nomad/entrypoints.py index 6a5b6f27a..2c8003135 100644 --- a/src/pynxtools/nomad/entrypoints.py +++ b/src/pynxtools/nomad/entrypoints.py @@ -1,4 +1,9 @@ -from nomad.config.models.plugins import ParserEntryPoint, SchemaPackageEntryPoint +try: + from nomad.config.models.plugins import ParserEntryPoint, SchemaPackageEntryPoint +except ImportError as exc: + raise ImportError( + "Could not import nomad package. Please install the package 'nomad-lab'." + ) from exc class NexusParserEntryPoint(ParserEntryPoint): @@ -15,11 +20,25 @@ def load(self): return nexus_metainfo_package +class NexusDataConverterEntryPoint(SchemaPackageEntryPoint): + def load(self): + from pynxtools.nomad.dataconverter import m_package + + return m_package + + nexus_schema = NexusSchemaEntryPoint( name="NeXus", description="The NeXus metainfo package.", ) + +nexus_data_converter = NexusDataConverterEntryPoint( + name="NeXus Dataconverter", + description="The NeXus dataconverter to convert data into the NeXus format.", +) + + nexus_parser = NexusParserEntryPoint( name="pynxtools parser", description="A parser for nexus files.", diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index 3bb0fb4ff..4869bb466 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -16,21 +16,27 @@ # limitations under the License. # -from typing import Optional, Set +from typing import Dict, Optional, Set import lxml.etree as ET import numpy as np -from ase.data import chemical_symbols -from nomad.atomutils import Formula -from nomad.datamodel import EntryArchive -from nomad.datamodel.results import Material, Results -from nomad.metainfo import MSection, nexus -from nomad.metainfo.util import MQuantity, MSubSectionList, resolve_variadic_name -from nomad.parsing import Parser -from nomad.units import ureg -from nomad.utils import get_logger -from pint.errors import UndefinedUnitError +try: + from nomad.atomutils import Formula + from nomad.datamodel import EntryArchive + from nomad.datamodel.results import Material, Results + from nomad.metainfo import MSection + from nomad.metainfo.util import MQuantity, MSubSectionList, resolve_variadic_name + from nomad.parsing import MatchingParser + from nomad.units import ureg + from nomad.utils import get_logger + from pint.errors import UndefinedUnitError +except ImportError as exc: + raise ImportError( + "Could not import nomad package. Please install the package 'nomad-lab'." + ) from exc + +import pynxtools.nomad.schema as nexus_schema from pynxtools.nexus.nexus import HandleNexus @@ -114,12 +120,13 @@ def _get_value(hdf_node): return hdf_node[()].decode() -class NexusParser(Parser): +class NexusParser(MatchingParser): """ NexusParser doc """ - def __init__(self): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) self.archive: Optional[EntryArchive] = None self.nx_root = None self._logger = None @@ -453,10 +460,14 @@ def normalize_chemical_formula(self, chemical_formulas) -> None: self._logger.warn("could not normalize material", exc_info=e) def parse( - self, mainfile: str, archive: EntryArchive, logger=None, child_archives=None - ): + self, + mainfile: str, + archive: EntryArchive, + logger=None, + child_archives: Dict[str, EntryArchive] = None, + ) -> None: self.archive = archive - self.archive.m_create(nexus.NeXus) # type: ignore # pylint: disable=no-member + self.archive.m_create(nexus_schema.NeXus) # type: ignore # pylint: disable=no-member self.nx_root = self.archive.nexus self._logger = logger if logger else get_logger(__name__) self._clear_class_refs() diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index f6fc75998..a2bae966d 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -26,32 +26,38 @@ from typing import Dict, List, Optional, Union import numpy as np -from nomad.datamodel import EntryArchive -from nomad.metainfo import ( - Attribute, - Bytes, - Datetime, - Definition, - MEnum, - Package, - Quantity, - Section, - SubSection, -) -from nomad.metainfo.data_type import ( - Bytes, - Datatype, - Datetime, - Number, - m_bool, - m_complex128, - m_float64, - m_int, - m_int64, - m_str, -) -from nomad.utils import get_logger, strip -from toposort import toposort_flatten + +try: + from nomad.datamodel import EntryArchive + from nomad.metainfo import ( + Attribute, + Bytes, + Datetime, + Definition, + MEnum, + Package, + Quantity, + Section, + SubSection, + ) + from nomad.metainfo.data_type import ( + Bytes, + Datatype, + Datetime, + Number, + m_bool, + m_complex128, + m_float64, + m_int, + m_int64, + m_str, + ) + from nomad.utils import get_logger, strip + from toposort import toposort_flatten +except ImportError as exc: + raise ImportError( + "Could not import nomad package. Please install the package 'nomad-lab'." + ) from exc from pynxtools.definitions.dev_tools.utils.nxdl_utils import get_nexus_definitions_path diff --git a/src/pynxtools/testing/nexus_conversion.py b/src/pynxtools/testing/nexus_conversion.py index 2f111b81c..9e368c8a7 100644 --- a/src/pynxtools/testing/nexus_conversion.py +++ b/src/pynxtools/testing/nexus_conversion.py @@ -5,6 +5,14 @@ from glob import glob from typing import Literal +try: + from nomad.client import parse + + NOMAD_AVAILABLE = True +except ImportError: + NOMAD_AVAILABLE = False + + from pynxtools.dataconverter.convert import get_reader, transfer_data_into_template from pynxtools.dataconverter.helpers import get_nxdl_root_and_path from pynxtools.dataconverter.validation import validate_dict_against @@ -109,8 +117,23 @@ def convert_to_nexus( Writer(read_data, nxdl_file, self.created_nexus).write() + if NOMAD_AVAILABLE: + kwargs = dict( + strict=True, + parser_name=None, + server_context=False, + username=None, + password=None, + ) + + parse(self.created_nexus, **kwargs) + def check_reproducibility_of_nexus(self): """Reproducibility test for the generated nexus file.""" + IGNORE_LINES = [ + "DEBUG - value: v", + "DEBUG - value: https://github.com/FAIRmat-NFDI/nexus_definitions/blob/", + ] ref_log = get_log_file(self.ref_nexus_file, "ref_nexus.log", self.tmp_path) gen_log = get_log_file(self.created_nexus, "gen_nexus.log", self.tmp_path) with open(gen_log, "r", encoding="utf-8") as gen, open( @@ -122,12 +145,12 @@ def check_reproducibility_of_nexus(self): assert False, "Log files are different" for ind, (gen_l, ref_l) in enumerate(zip(gen_lines, ref_lines)): if gen_l != ref_l: - # skip version conflicts - if gen_l.startswith("DEBUG - value: v") and ref_l.startswith( - "DEBUG - value: v" - ): - continue - assert False, ( - f"Log files are different at line {ind}" - f" generated: {gen_l} \n referenced : {ref_l}" - ) + # skip ignored lines (mainly version conflicts) + for ignore_line in IGNORE_LINES: + if gen_l.startswith(ignore_line) and ref_l.startswith(ignore_line): + break + else: + assert False, ( + f"Log files are different at line {ind}" + f" generated: {gen_l} \n referenced : {ref_l}" + ) diff --git a/tests/data/nexus/Ref_nexus_test.log b/tests/data/nexus/Ref_nexus_test.log index 0f018879d..b7b697d27 100644 --- a/tests/data/nexus/Ref_nexus_test.log +++ b/tests/data/nexus/Ref_nexus_test.log @@ -1253,18 +1253,17 @@ DEBUG - documentation (NXinstrument.nxdl.xml:/MONOCHROMATOR): DEBUG - DEBUG - documentation (NXmonochromator.nxdl.xml:): DEBUG - - A wavelength defining device. - - This is a base class for everything which - selects a wavelength or energy, be it a - monochromator crystal, a velocity selector, - an undulator or whatever. - - The expected units are: - - * wavelength: angstrom - * energy: eV - + A wavelength defining device. + + This is a base class for everything which + selects a wavelength or energy, be it a + monochromator crystal, a velocity selector, + an undulator or whatever. + + The expected units are: + + * wavelength: angstrom + * energy: eV DEBUG - ===== ATTRS (//entry/instrument/monochromator@NX_class) DEBUG - value: NXmonochromator @@ -1297,7 +1296,9 @@ NXmonochromator.nxdl.xml:/energy_error DEBUG - <> DEBUG - DEPRECATED - see https://github.com/nexusformat/definitions/issues/820 DEBUG - documentation (NXmonochromator.nxdl.xml:/energy_error): -DEBUG - energy standard deviation +DEBUG - + energy standard deviation + DEBUG - ===== ATTRS (//entry/instrument/monochromator/energy_error@units) DEBUG - value: eV DEBUG - classpath: ['NXentry', 'NXinstrument', 'NXmonochromator', 'NX_FLOAT'] diff --git a/tests/data/parser/SiO2onSi.ellips.nxs b/tests/data/parser/SiO2onSi.ellips.nxs new file mode 100644 index 000000000..dcb645cd9 Binary files /dev/null and b/tests/data/parser/SiO2onSi.ellips.nxs differ diff --git a/tests/dataconverter/test_convert.py b/tests/dataconverter/test_convert.py index 3e40d27da..9bf32826a 100644 --- a/tests/dataconverter/test_convert.py +++ b/tests/dataconverter/test_convert.py @@ -17,22 +17,20 @@ # """Test cases for the convert script used to access the DataConverter.""" -import logging import os +import shutil from pathlib import Path -import click import h5py import pynxtools.dataconverter.convert as dataconverter import pytest from click.testing import CliRunner from pynxtools.dataconverter.readers.base.reader import BaseReader -from setuptools import distutils def move_xarray_file_to_tmp(tmp_path): """Moves the xarray file, which is used to test linking into the tmp_path directory.""" - distutils.file_util.copy_file( + shutil.copy( os.path.join( os.getcwd(), "src", "pynxtools", "data", "xarray_saved_small_calibration.h5" ), @@ -47,7 +45,7 @@ def restore_xarray_file_from_tmp(tmp_path): os.getcwd(), "src", "pynxtools", "data", "xarray_saved_small_calibration.h5" ) ) - distutils.file_util.move_file( + shutil.move( os.path.join(tmp_path, "xarray_saved_small_calibration.h5"), os.path.join( os.getcwd(), "src", "pynxtools", "data", "xarray_saved_small_calibration.h5" diff --git a/tests/dataconverter/test_helpers.py b/tests/dataconverter/test_helpers.py index 0401862db..32582b5e2 100644 --- a/tests/dataconverter/test_helpers.py +++ b/tests/dataconverter/test_helpers.py @@ -21,10 +21,10 @@ import os import xml.etree.ElementTree as ET from typing import Optional +import shutil import numpy as np import pytest -from setuptools import distutils from pynxtools.dataconverter import helpers from pynxtools.dataconverter.template import Template @@ -148,7 +148,7 @@ def fixture_filled_test_data(template, tmp_path): # Copy original measurement file to tmp dir, # because h5py.ExternalLink is modifying it while # linking the nxs file. - distutils.file_util.copy_file( + shutil.copy( os.path.join( os.getcwd(), "src", "pynxtools", "data", "xarray_saved_small_calibration.h5" ), diff --git a/tests/nomad/test_parsing.py b/tests/nomad/test_parsing.py new file mode 100644 index 000000000..d93e82673 --- /dev/null +++ b/tests/nomad/test_parsing.py @@ -0,0 +1,166 @@ +"""This is a code that performs several tests on nexus tool""" + +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import sys + +import pytest + +if sys.version_info < (3, 9): + pytest.skip("nomad requires python3.9", allow_module_level=True) +else: + from nomad.datamodel import EntryArchive + from nomad.metainfo import Section + from nomad.units import ureg + from nomad.utils import get_logger + +from typing import Any + +from pynxtools.nomad.parser import NexusParser +from pynxtools.nomad.schema import nexus_metainfo_package + + +@pytest.mark.parametrize( + "path,value", + [ + pytest.param("name", "nexus"), + pytest.param("NXobject.name", "NXobject"), + pytest.param("NXentry.nx_kind", "group"), + pytest.param("NXdetector.real_time__field", "*"), + pytest.param("NXentry.DATA.nx_optional", True), + pytest.param("NXentry.DATA.nx_kind", "group"), + pytest.param("NXentry.DATA.nx_optional", True), + pytest.param("NXdetector.real_time__field.name", "real_time__field"), + pytest.param("NXdetector.real_time__field.nx_type", "NX_NUMBER"), + pytest.param("NXdetector.real_time__field.nx_units", "NX_TIME"), + pytest.param("NXarpes.ENTRY.DATA.nx_optional", False), + pytest.param("NXentry.nx_category", "base"), + pytest.param( + "NXdispersion_table.refractive_index__field.nx_type", "NX_COMPLEX" + ), + pytest.param( + "NXdispersive_material.ENTRY.dispersion_x." + "DISPERSION_TABLE.refractive_index__field.nx_type", + "NX_COMPLEX", + ), + pytest.param("NXapm.nx_category", "application"), + ], +) +def test_assert_nexus_metainfo(path: str, value: Any): + """ + Test the existence of nexus metainfo + + + pytest.param('NXdispersive_material.inner_section_definitions[0].sub_sections[1].sub_section.inner_section_definitions[0].quantities[4].more["nx_type"] + + + + """ + current = nexus_metainfo_package + for name in path.split("."): + elements: list = [] + if name.endswith("__field"): + subelement_list = getattr(current, "quantities", None) + if subelement_list: + elements += subelement_list + else: + subelement_list = getattr(current, "section_definitions", None) + if subelement_list: + elements += subelement_list + subelement_list = getattr(current, "sub_sections", None) + if subelement_list: + elements += subelement_list + subelement_list = getattr(current, "attributes", None) + if subelement_list: + elements += subelement_list + subelement_list = current.m_contents() + if subelement_list: + elements += subelement_list + for content in elements: + if getattr(content, "name", None) == name: + current = content # type: ignore + if getattr(current, "sub_section", None): + current = current.section_definition + break + else: + current = getattr(current, name, None) + if current is None: + assert False, f"{path} does not exist" + + if value == "*": + assert current is not None, f"{path} does not exist" + elif value is None: + assert current is None, f"{path} does exist" + else: + assert current == value, f"{path} has wrong value" + + if isinstance(current, Section): + assert current.nx_kind is not None + for base_section in current.all_base_sections: + assert base_section.nx_kind == current.nx_kind + + +def test_nexus_example(): + archive = EntryArchive() + + example_data = "src/pynxtools/data/201805_WSe2_arpes.nxs" + NexusParser().parse(example_data, archive, get_logger(__name__)) + assert archive.nexus.NXarpes.ENTRY[0].SAMPLE[0].pressure__field == ureg.Quantity( + "3.27e-10*millibar" + ) + + instrument = archive.nexus.NXarpes.ENTRY[0].INSTRUMENT[0] + + assert instrument.nx_name == "instrument" + assert instrument.monochromator.energy__field == ureg.Quantity( + "36.49699020385742*electron_volt" + ) + assert instrument.analyser.entrance_slit_size__field == ureg.Quantity( + "750 micrometer" + ) + # good ENUM - x-ray + assert instrument.SOURCE[0].probe__field == "x-ray" + # wrong inherited ENUM - Burst + assert instrument.SOURCE[0].mode__field is None + # wrong inherited ENUM for extended field - 'Free Electron Laser' + assert instrument.SOURCE[0].type__field is None + + data = archive.nexus.NXarpes.ENTRY[0].DATA[0] + assert len(data.AXISNAME__field) == 3 + # there is still a bug in the variadic name resolution, so skip these + # assert data.delays__field is not None + # assert data.angles__field.check("1/Å") + # assert data.delays__field.check("fs") + # but the following still works + assert data.energies__field is not None + assert data.energies__field.check("eV") + # manual name resolution + assert data.AXISNAME__field["angles__field"] is not None + assert ( + data.AXISNAME__field["angles__field"].attributes["nx_data_max"] + == 2.168025463513032 + ) + assert (1 * data.AXISNAME__field["angles__field"].unit).check("1/Å") + assert (1 * data.AXISNAME__field["delays__field"].unit).check("fs") + + +def test_same_name_field_and_group(): + archive = EntryArchive() + example_data = "tests/data/parser/SiO2onSi.ellips.nxs" + NexusParser().parse(example_data, archive, get_logger(__name__)) + archive.m_to_dict(with_out_meta=True)