From 087dea39cf7f7061d080a89190fdb77bbecec4b8 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 29 Mar 2024 09:14:29 +0100 Subject: [PATCH 01/45] add click arguments and options to main function --- mars-cli/mars_cli.py | 96 ++++++++++++++++++++++++++------------------ 1 file changed, 58 insertions(+), 38 deletions(-) diff --git a/mars-cli/mars_cli.py b/mars-cli/mars_cli.py index d615b31..c4c1af4 100644 --- a/mars-cli/mars_cli.py +++ b/mars-cli/mars_cli.py @@ -7,8 +7,9 @@ handle_input_dict, input_json_schema_filepath, ) -import argparse -from argparse import RawTextHelpFormatter +import click +import logging +from mars_lib.isa_json import TargetRepository def create_external_references( @@ -22,17 +23,25 @@ def create_external_references( biosamples_externalReferences: Dictionary containing the mapping between the production: Boolean indicating the environment of BioSamples to use. """ + if production: + logger_name = "production" + biosamples_endpoint = biosamples_endpoints["prod"] + else: + logger_name = "development" + biosamples_endpoint = biosamples_endpoints["dev"] + + logging.basicConfig( + filename=logger_name + ".log", + filemode="w", + format="%(name)s - %(levelname)s - %(message)s", + ) + validate_json_against_schema( json_doc=biosamples_externalReferences, json_schema=input_json_schema_filepath ) token = get_webin_auth_token(biosamples_credentials) header = get_header(token) - if production: - biosamples_endpoint = biosamples_endpoints["prod"] - else: - biosamples_endpoint = biosamples_endpoints["dev"] - for biosample_r in biosamples_externalReferences["biosampleExternalReferences"]: bs_accession = biosample_r["biosampleAccession"] BSrecord = BiosamplesRecord(bs_accession) @@ -44,40 +53,51 @@ def create_external_references( BSrecord.update_remote_record(header) -def main(): - """Main function that handles the argument parsing and passes those to `create_external_references`""" - # Command-line argument parsing - parser = argparse.ArgumentParser(description="Handle biosamples records.") - description = "This script extends a set of existing Biosamples records with a list of provided external references." - parser = argparse.ArgumentParser( - prog="biosamples-externalReferences.py", - description=description, - formatter_class=RawTextHelpFormatter, - ) - parser.add_argument( - "biosamples_credentials", - help="Either a dictionary or filepath to the BioSamples credentials.", - ) - parser.add_argument( - "biosamples_externalReferences", - help="Either a dictionary or filepath to the BioSamples' accessions mapping with external references.", - ) - parser.add_argument( - "--production", - action="store_true", - help="Boolean indicating the usage of the production environment of BioSamples. If not present, the development instance will be used.", - ) - # Handle inputs - parsed_args = parser.parse_args() - biosamples_credentials = handle_input_dict(parsed_args.biosamples_credentials) - biosamples_externalReferences = handle_input_dict( - parsed_args.biosamples_externalReferences +@click.group() +@click.option( + "--development", + is_flag=True, + help="Boolean indicating the usage of the development environment of the target repositories. If not present, the production instances will be used.", +) +def cli(development): + click.echo( + f"Running in {'Development environment' if development else 'Production environment'}" ) - create_external_references( - biosamples_credentials, biosamples_externalReferences, parsed_args.production + +@cli.command() +@click.argument( + "credentials_file", + type=click.File("r"), +) +@click.argument( + "isa_json_file", + type=click.File("r"), +) +@click.option("--submit-to-ena", type=click.BOOL, default=True, help="Submit to ENA.") +@click.option( + "--submit-to-metabolights", + type=click.BOOL, + default=True, + help="Submit to Metabolights.", +) +def submit(credentials_file, isa_json_file, submit_to_ena, submit_to_metabolights): + target_repositories = ["biosamples"] + if submit_to_ena: + target_repositories.append(TargetRepository.ENA) + + if submit_to_metabolights: + target_repositories.append(TargetRepository.METABOLIGHTS) + + click.echo( + f"Staring submission of the ISA JSON to the target repositories: {', '.join(target_repositories)}." ) +@cli.command() +def health_check(): + click.echo("Checking the health of the target repositories.") + + if __name__ == "__main__": - main() + cli() From ca0c459e69e25ca70449ac0178ef741a751ecebf Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 29 Mar 2024 10:43:14 +0100 Subject: [PATCH 02/45] Generate settings file on install --- mars-cli/generate_config.py | 16 ++++++++++++++++ mars-cli/setup.py | 16 +++++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 mars-cli/generate_config.py diff --git a/mars-cli/generate_config.py b/mars-cli/generate_config.py new file mode 100644 index 0000000..39dba0f --- /dev/null +++ b/mars-cli/generate_config.py @@ -0,0 +1,16 @@ +import configparser +import pathlib + +# Create settings file in user's home directory +settings_dir = pathlib.Path.home() / ".mars" +if not settings_dir.exists(): + settings_dir.mkdir() + +settings_path = settings_dir / "settings.ini" +log_path = settings_dir / "app.log" + +config = configparser.ConfigParser() +config["logging"] = {"log_level": "ERROR", "log_file": log_path} + +with open(settings_path, "w") as config_file: + config.write(config_file) diff --git a/mars-cli/setup.py b/mars-cli/setup.py index 69c1ffd..81a9011 100644 --- a/mars-cli/setup.py +++ b/mars-cli/setup.py @@ -1,6 +1,8 @@ -import pathlib +from setuptools.command.install import install from _version import __version__ from setuptools import find_packages, setup +import pathlib +import os with open("requirements.txt", "r") as file: required_deps = file.read().splitlines() @@ -8,8 +10,20 @@ parent_folder = pathlib.Path(__file__).parent.resolve() long_description = (parent_folder / "README.md").read_text(encoding="utf-8") + +class custom_install(install): + def run(self): + # Default install command + install.run(self) + + os.system("python3 generate_config.py") + + setup( name="mars", + cmdclass={ + "install": custom_install, + }, description="Multi-omics Adapter for Repository Submissions", long_description=long_description, long_description_content_type="text/markdown", From 161017f214a9f508c95ec4473de72034eab66207 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 29 Mar 2024 10:54:10 +0100 Subject: [PATCH 03/45] Move external references logic to separate file --- mars-cli/mars_lib/submit.py | 43 +++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 mars-cli/mars_lib/submit.py diff --git a/mars-cli/mars_lib/submit.py b/mars-cli/mars_lib/submit.py new file mode 100644 index 0000000..a47c634 --- /dev/null +++ b/mars-cli/mars_lib/submit.py @@ -0,0 +1,43 @@ +from mars_lib.authentication import get_webin_auth_token +from mars_lib.biosamples_external_references import ( + get_header, + biosamples_endpoints, + BiosamplesRecord, + validate_json_against_schema, + input_json_schema_filepath, +) + + +def create_external_references( + biosamples_credentials, biosamples_externalReferences, production +): + """ + Main function to be executed when script is run. + + Args: + biosamples_credentials: Dictionary with the credentials of the submitter of the existing Biosamples records. + biosamples_externalReferences: Dictionary containing the mapping between the + production: Boolean indicating the environment of BioSamples to use. + """ + if production: + logger_name = "production" + biosamples_endpoint = biosamples_endpoints["prod"] + else: + logger_name = "development" + biosamples_endpoint = biosamples_endpoints["dev"] + + validate_json_against_schema( + json_doc=biosamples_externalReferences, json_schema=input_json_schema_filepath + ) + token = get_webin_auth_token(biosamples_credentials) + header = get_header(token) + + for biosample_r in biosamples_externalReferences["biosampleExternalReferences"]: + bs_accession = biosample_r["biosampleAccession"] + BSrecord = BiosamplesRecord(bs_accession) + BSrecord.fetch_bs_json(biosamples_endpoint) + # To test it without the fetching, you can download it manually and then use: + # BSrecord.load_bs_json(bs_json_file="downloaded-json.json") + new_ext_refs_list = biosample_r["externalReferences"] + BSrecord.extend_externalReferences(new_ext_refs_list) + BSrecord.update_remote_record(header) From 193e506461799ace979af7deb9124a0141c45257 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 29 Mar 2024 11:33:59 +0100 Subject: [PATCH 04/45] change entry point to `cli` --- mars-cli/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mars-cli/setup.py b/mars-cli/setup.py index 81a9011..a8069be 100644 --- a/mars-cli/setup.py +++ b/mars-cli/setup.py @@ -47,7 +47,7 @@ def run(self): }, entry_points={ # Optional "console_scripts": [ - "mars-cli=mars_cli:main", + "mars-cli=mars_cli:cli", ], }, python_requires=">=3.9, <4", From 8fd8525cd3448942256150af223ea5951111f276 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 29 Mar 2024 13:03:09 +0100 Subject: [PATCH 05/45] Don't overwrite config file by default when reinstalling --- mars-cli/generate_config.py | 44 ++++++++++++++++++++++++++++--------- mars-cli/setup.py | 4 +++- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/mars-cli/generate_config.py b/mars-cli/generate_config.py index 39dba0f..68d0fe6 100644 --- a/mars-cli/generate_config.py +++ b/mars-cli/generate_config.py @@ -1,16 +1,40 @@ import configparser import pathlib -# Create settings file in user's home directory -settings_dir = pathlib.Path.home() / ".mars" -if not settings_dir.exists(): - settings_dir.mkdir() -settings_path = settings_dir / "settings.ini" -log_path = settings_dir / "app.log" +def create_settings_file(settings_dir): + """ + Create a settings file with the specified log path and settings path. -config = configparser.ConfigParser() -config["logging"] = {"log_level": "ERROR", "log_file": log_path} + Args: + settings_path (str): The path to the settings file. -with open(settings_path, "w") as config_file: - config.write(config_file) + Returns: + None + """ + log_path = settings_dir / "app.log" + settings_path = settings_dir / "settings.ini" + config = configparser.ConfigParser() + config["logging"] = {"log_level": "ERROR", "log_file": log_path} + + with open(settings_path, "w") as config_file: + config.write(config_file) + + +def generate_config(overwrite): + """ + Generate the configuration file for the MARS CLI. + + Returns: + None + """ + settings_dir = pathlib.Path.home() / ".mars" + if not settings_dir.exists(): + settings_dir.mkdir() + + settings_path = settings_dir / "settings.ini" + + if settings_path.exists() and not overwrite: + return + + create_settings_file(settings_dir) diff --git a/mars-cli/setup.py b/mars-cli/setup.py index a8069be..4fae8dd 100644 --- a/mars-cli/setup.py +++ b/mars-cli/setup.py @@ -3,6 +3,7 @@ from setuptools import find_packages, setup import pathlib import os +from generate_config import generate_config with open("requirements.txt", "r") as file: required_deps = file.read().splitlines() @@ -16,7 +17,8 @@ def run(self): # Default install command install.run(self) - os.system("python3 generate_config.py") + overwrite_settings = os.getenv("OVERWRITE_SETTINGS", "False").lower() == "true" + generate_config(overwrite_settings) setup( From 0c536e38dac01c5762680bc1ac8949f866a83785 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 29 Mar 2024 13:03:41 +0100 Subject: [PATCH 06/45] Add logging --- mars-cli/mars_cli.py | 77 ++++++++++++++++---------------------------- 1 file changed, 27 insertions(+), 50 deletions(-) diff --git a/mars-cli/mars_cli.py b/mars-cli/mars_cli.py index c4c1af4..a8c5fcd 100644 --- a/mars-cli/mars_cli.py +++ b/mars-cli/mars_cli.py @@ -1,56 +1,31 @@ -from mars_lib.authentication import get_webin_auth_token -from mars_lib.biosamples_external_references import ( - get_header, - biosamples_endpoints, - BiosamplesRecord, - validate_json_against_schema, - handle_input_dict, - input_json_schema_filepath, -) +from math import log import click import logging +import pathlib +from configparser import ConfigParser from mars_lib.isa_json import TargetRepository +# Load CLI configuration +home_dir = pathlib.Path.home() +config_file = home_dir / ".mars" / "settings.ini" +fallback_log_file = home_dir / ".mars" / "app.log" + +config = ConfigParser() +config.read(config_file) + +# Logging configuration +log_level = config.get("logging", "log_level", fallback="ERROR") +log_file = config.get("logging", "log_file", fallback=fallback_log_file) +logging.basicConfig( + filename=log_file, + level=log_level, + format="%(asctime)s - %(levelname)s - %(message)s", +) -def create_external_references( - biosamples_credentials, biosamples_externalReferences, production -): - """ - Main function to be executed when script is run. - - Args: - biosamples_credentials: Dictionary with the credentials of the submitter of the existing Biosamples records. - biosamples_externalReferences: Dictionary containing the mapping between the - production: Boolean indicating the environment of BioSamples to use. - """ - if production: - logger_name = "production" - biosamples_endpoint = biosamples_endpoints["prod"] - else: - logger_name = "development" - biosamples_endpoint = biosamples_endpoints["dev"] - - logging.basicConfig( - filename=logger_name + ".log", - filemode="w", - format="%(name)s - %(levelname)s - %(message)s", - ) - - validate_json_against_schema( - json_doc=biosamples_externalReferences, json_schema=input_json_schema_filepath - ) - token = get_webin_auth_token(biosamples_credentials) - header = get_header(token) - for biosample_r in biosamples_externalReferences["biosampleExternalReferences"]: - bs_accession = biosample_r["biosampleAccession"] - BSrecord = BiosamplesRecord(bs_accession) - BSrecord.fetch_bs_json(biosamples_endpoint) - # To test it without the fetching, you can download it manually and then use: - # BSrecord.load_bs_json(bs_json_file="downloaded-json.json") - new_ext_refs_list = biosample_r["externalReferences"] - BSrecord.extend_externalReferences(new_ext_refs_list) - BSrecord.update_remote_record(header) +def print_and_log(msg): + click.echo(msg) + logging.info(msg) @click.group() @@ -60,7 +35,7 @@ def create_external_references( help="Boolean indicating the usage of the development environment of the target repositories. If not present, the production instances will be used.", ) def cli(development): - click.echo( + print_and_log( f"Running in {'Development environment' if development else 'Production environment'}" ) @@ -82,6 +57,7 @@ def cli(development): help="Submit to Metabolights.", ) def submit(credentials_file, isa_json_file, submit_to_ena, submit_to_metabolights): + """Start a submission to the target repositories.""" target_repositories = ["biosamples"] if submit_to_ena: target_repositories.append(TargetRepository.ENA) @@ -89,14 +65,15 @@ def submit(credentials_file, isa_json_file, submit_to_ena, submit_to_metabolight if submit_to_metabolights: target_repositories.append(TargetRepository.METABOLIGHTS) - click.echo( + print_and_log( f"Staring submission of the ISA JSON to the target repositories: {', '.join(target_repositories)}." ) @cli.command() def health_check(): - click.echo("Checking the health of the target repositories.") + """Check the health of the target repositories.""" + print_and_log("Checking the health of the target repositories.") if __name__ == "__main__": From 9464bc26de4f6ed04b6e1bc545dee49579cad671 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 29 Mar 2024 13:28:16 +0100 Subject: [PATCH 07/45] Add constraints on the size of the files and the number of older log files. --- mars-cli/generate_config.py | 7 ++++++- mars-cli/mars_cli.py | 16 ++++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/mars-cli/generate_config.py b/mars-cli/generate_config.py index 68d0fe6..7bff30b 100644 --- a/mars-cli/generate_config.py +++ b/mars-cli/generate_config.py @@ -15,7 +15,12 @@ def create_settings_file(settings_dir): log_path = settings_dir / "app.log" settings_path = settings_dir / "settings.ini" config = configparser.ConfigParser() - config["logging"] = {"log_level": "ERROR", "log_file": log_path} + config["logging"] = { + "log_level": "ERROR", + "log_file": log_path, + "log_max_size": "1024", + "log_max_files": "5", + } with open(settings_path, "w") as config_file: config.write(config_file) diff --git a/mars-cli/mars_cli.py b/mars-cli/mars_cli.py index a8c5fcd..69537a5 100644 --- a/mars-cli/mars_cli.py +++ b/mars-cli/mars_cli.py @@ -4,6 +4,7 @@ import pathlib from configparser import ConfigParser from mars_lib.isa_json import TargetRepository +from logging.handlers import RotatingFileHandler # Load CLI configuration home_dir = pathlib.Path.home() @@ -16,10 +17,21 @@ # Logging configuration log_level = config.get("logging", "log_level", fallback="ERROR") log_file = config.get("logging", "log_file", fallback=fallback_log_file) +log_max_size = int( + config.get("logging", "log_max_size", fallback="1024") +) # in kilobytes. 1 MB by default. +log_max_files = int( + config.get("logging", "log_max_files", fallback="5") +) # number of backup files. 5 by default. + +handler = RotatingFileHandler( + log_file, maxBytes=log_max_size * 1024, backupCount=log_max_files +) +handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")) + logging.basicConfig( - filename=log_file, + handlers=[handler], level=log_level, - format="%(asctime)s - %(levelname)s - %(message)s", ) From 5513c67a6793e7dc879487deccdc978255a7e57f Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 29 Mar 2024 13:40:46 +0100 Subject: [PATCH 08/45] Add logging documentation --- mars-cli/README.md | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/mars-cli/README.md b/mars-cli/README.md index 6a816c0..8163409 100644 --- a/mars-cli/README.md +++ b/mars-cli/README.md @@ -13,6 +13,45 @@ If you want to install the optional testing dependencies as well, useful when co pip install .[test] ``` +If you want to overwrite the `settings.ini` file when reinstalling, you need to set the environmental variable `OVERWRITE_SETTINGS` to `True`: + +```sh +OVERWRITE_SETTINGS=True pip install .[test] +``` + +Once installed, the CLI application will be available from the terminal. + +# Configuration + +Installing this application will also generate a `settings.ini` file in `$HOME/.mars/`. + +``` +[logging] +log_level = ERROR +log_file = /my/logging/directory/.mars/app.log +log_max_size = 1024 +log_max_files = 5 +``` + +## Logging + +The MARS-CLI will automatically log events to a `.log` file. + +__log_level__: The verbosity of logging can be set to three different levels +- CRITICAL: Only critical messages will be logged. __Not recommended!__ +- ERROR: Errors and critical messages will be logged. +- WARNING: Warnings, errors and critical messages will be logged. +- INFO: All events are logged. +- DEBUG: For debugging purpose only. __Not recommended as it might log more sensitive information!__ +The default setting is ERROR. So only errors are logged! + +__log_file__: The path to the log file. By default this will be in `$HOME/.mars/app.log`. + +__log_max_size__: The maximum size in kB for the log file. By default the maximum size is set to 1024 kB or 1 MB. + +__log_max_files__: The maximum number of old log files to keep. By default, this is set to 5 + + # Extending BioSamples' records The Python script ``biosamples-externalReferences.py`` defines a class BiosamplesRecord for managing biosample records. This class is designed to interact with the BioSamples database, allowing operations like fetching, updating, and extending biosample records. The script takes in a dictionary of BioSamples' accessions and their associated external references, and expands the former with the latter. From 4d9cfd5ae9f1cb3e8c191a66f3b0009d0f6a642b Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 29 Mar 2024 21:07:24 +0100 Subject: [PATCH 09/45] Add ISA JSON models for validation --- mars-cli/mars_lib/schemas/model.py | 264 +++++++++++++++++++++++++++++ 1 file changed, 264 insertions(+) create mode 100644 mars-cli/mars_lib/schemas/model.py diff --git a/mars-cli/mars_lib/schemas/model.py b/mars-cli/mars_lib/schemas/model.py new file mode 100644 index 0000000..7cb169a --- /dev/null +++ b/mars-cli/mars_lib/schemas/model.py @@ -0,0 +1,264 @@ +from __future__ import annotations + +from enum import Enum +import re +from typing import List, Optional, Union + +from pydantic import BaseModel, Field, validator + + +class Comment(BaseModel): + id: Optional[str] = Field(alias="@id", default=None) + name: Optional[str] = Field(default=None) + value: Optional[str] = Field(default=None) + + +class OntologySourceReference(BaseModel): + comments: Optional[List[Comment]] = Field(default=[]) + description: Optional[str] = Field(default=None) + file: Optional[str] = Field(default=None) + name: Optional[str] = Field(default=None) + version: Optional[str] = Field(default=None) + + +# TODO: Question: Should these be case-sensitive? +class DataTypeEnum(str, Enum): + RAW_DATA_FILE = "Raw Data File" + DERIVED_DATA_FILE = "Derived Data File" + IMAGE_FILE = "Image File" + SPECTRAL_RAW_DATA_FILE = "Spectral Raw Data File" # TODO: QUESTION: This is not mentioned in the specs (https://isa-specs.readthedocs.io/) + FREE_INDUCTION_DECAY_FILE = "Free Induction Decay File" # TODO: QUESTION: This is not mentioned in the specs (https://isa-specs.readthedocs.io/) + + +class Data(BaseModel): + id: Optional[str] = Field(alias="@id", default=None) + comments: Optional[List[Comment]] = Field(default=[]) + name: Optional[str] = Field(default=None) + type: Optional[DataTypeEnum] = Field(default=None) + + @validator("type") + def apply_enum(cls, v): + if v not in [item.value for item in DataTypeEnum]: + raise ValueError("Invalid material type") + return v + + +class OntologyAnnotation(BaseModel): + id: Optional[str] = Field(alias="@id", default=None) + annotationValue: Union[Optional[str], Optional[float], Optional[int]] = Field( + default=[] + ) + comments: Optional[List[Comment]] = Field(default=[]) + termAccession: Optional[str] = Field(default=None) + termSource: Optional[str] = Field( + description="The abbreviated ontology name. It should correspond to one of the sources as specified in the ontologySourceReference section of the Investigation.", + default=None, + ) + + +class MaterialAttributeValue(BaseModel): + id: Optional[str] = Field(alias="@id", default=None) + characteristicType: Optional[OntologyAnnotation] = Field(default=None) + + +class Factor(BaseModel): + id: Optional[str] = Field(alias="@id", default=None) + comments: Optional[List[Comment]] = Field(default=[]) + factorName: Optional[str] = Field(default=None) + factorType: Optional[OntologyAnnotation] = Field(default=None) + + +class FactorValue(BaseModel): + id: Optional[str] = Field(alias="@id", default=None) + category: Optional[Factor] = Field(default=None) + value: Union[ + Optional[str], Optional[float], Optional[int], Optional[OntologyAnnotation] + ] = Field(default=[]) + unit: Optional[OntologyAnnotation] = Field(default=None) + + +class Source(BaseModel): + id: Optional[str] = Field(alias="@id", default=None) + characteristics: Optional[List[MaterialAttributeValue]] = Field(default=[]) + name: Optional[str] = Field(default=None) + + +class Sample(BaseModel): + id: Optional[str] = Field(alias="@id", default=None) + name: Optional[str] = Field(default=None) + characteristics: Optional[List[MaterialAttributeValue]] = Field(default=[]) + factorValues: Optional[List[FactorValue]] = Field(default=[]) + derivesFrom: Optional[List[Source]] = Field(default=[]) + + +class ProtocolParameter(BaseModel): + id: Optional[str] = Field(alias="@id", default=None) + parameterName: Optional[OntologyAnnotation] = Field(default=None) + + +class ProcessParameterValue(BaseModel): + category: Optional[ProtocolParameter] = Field(default=None) + value: Union[ + Optional[str], Optional[float], Optional[int], Optional[OntologyAnnotation] + ] = Field(default=[]) + unit: Optional[OntologyAnnotation] = Field(default=None) + + +# Helper class for protocol -> components +class Component(BaseModel): + componentName: Optional[str] = Field(default=None) + componentType: Optional[OntologyAnnotation] = Field(default=None) + + +class Protocol(BaseModel): + id: Optional[str] = Field(alias="@id", default=None) + comments: Optional[List[Comment]] = Field(default=[]) + components: Optional[List[Component]] = Field(default=[]) + description: Optional[str] = Field(default=None) + name: Optional[str] = Field(default=None) + parameters: Optional[List[ProtocolParameter]] = Field(default=[]) + protocolType: Optional[OntologyAnnotation] = Field(default=None) + uri: Optional[str] = Field(default=None) + version: Optional[str] = Field(default=None) + + +# Enum for material -> type +# TODO: Question: Should these be case-sensitive? +class MaterialTypeEnum(str, Enum): + EXTRACT_NAME = "Extract Name" + LABELED_EXTRACT_NAME = "Labeled Extract Name" + + +class Material(BaseModel): + id: Optional[str] = Field(alias="@id", default=None) + characteristics: List[MaterialAttributeValue] = Field(default=[]) + comments: Optional[List[Comment]] = Field(default=[]) + name: Optional[str] = Field(default=None) + type: Optional[str] = Field(default=None) + derivesFrom: Optional[List[Material]] = Field(default=[]) + + @validator("type") + def apply_enum(cls, v): + if v not in [item.value for item in MaterialTypeEnum]: + raise ValueError("Invalid material type") + return v + + +class Process(BaseModel): + id: Optional[str] = Field(alias="@id", default=None) + comments: Optional[List[Comment]] = Field(default=[]) + date: Optional[str] = Field(default=None) + executesProtocol: Optional[Protocol] = Field(default=None) + inputs: Optional[Union[List[Source], List[Sample], List[Material], list[Data]]] = ( + Field(default=[]) + ) + name: Optional[str] = Field(default=None) + nextProcess: Optional[Process] = Field(default=None) + outputs: Optional[Union[List[Sample], List[Material], list[Data]]] = Field( + default=[] + ) + parameterValues: Optional[List[ProcessParameterValue]] = Field(default=[]) + performer: Optional[str] = Field(default=None) + previousProcess: Optional[Process] = Field(default=None) + + +class TechnologyType(BaseModel): + ontologyAnnotation: Optional[OntologyAnnotation] = Field(default=None) + + +# Helper for assay -> materials +class AssayMaterialType(BaseModel): + samples: Optional[List[Sample]] = Field(default=[]) + otherMaterials: Optional[List[Material]] = Field(default=[]) + + +class Assay(BaseModel): + id: Optional[str] = Field(alias="@id", default=None) + characteristicCategories: Optional[List[MaterialAttribute]] = Field(default=[]) + comments: Optional[List[Comment]] = Field(default=[]) + dataFiles: Optional[List[Data]] = Field(default=[]) + filename: Optional[str] = Field(default=None) + materials: Optional[AssayMaterialType] = Field(default=None) + measurementType: Optional[OntologyAnnotation] + processSequence: Optional[List[Process]] = Field(default=[]) + technologyPlatform: Optional[str] = Field(default=None) + technologyType: Optional[TechnologyType] = Field(default=None) + unitCategories: Optional[List[OntologyAnnotation]] = Field(default=[]) + + +class Person(BaseModel): + id: Optional[str] = Field(alias="@id", default=None) + address: Optional[str] = Field(default=None) + affiliation: Optional[str] = Field(default=None) + comments: Optional[List[Comment]] = Field(default=[]) + email: Optional[str] = Field(default=None) + fax: Optional[str] = Field(default=None) + firstName: Optional[str] = Field(default=None) + lastName: Optional[str] = Field(default=None) + midInitials: Optional[str] = Field(default=None) + phone: Optional[str] = Field(default=None) + roles: Optional[List[OntologyAnnotation]] = Field(default=[]) + + @validator("phone", "fax") + def check_numbers(cls, v): + if not (re.match(r"^\+\d{1,3}\d{4,}$", v) or v == ""): + raise ValueError("Invalid number format") + return v + + +class Publication(BaseModel): + authorList: Optional[str] = Field(default=None) + comments: Optional[List[Comment]] = Field(default=[]) + doi: Optional[str] = Field(default=None) + pubMedID: Optional[str] = Field(default=None) + status: Optional[OntologyAnnotation] = Field(default=None) + title: Optional[str] = Field(default=None) + + +class StudyMaterialType(BaseModel): + sources: Optional[List[Source]] = Field(default=[]) + samples: Optional[List[Sample]] = Field(default=[]) + otherMaterials: Optional[List[Material]] = Field(default=[]) + + +class MaterialAttribute(BaseModel): + id: Optional[str] = Field(alias="@id", default=None) + characteristicType: Optional[OntologyAnnotation] = Field(default=None) + + +class Study(BaseModel): + id: Optional[str] = Field(alias="@id", default=None) + assays: Optional[List[Assay]] = Field(default=[]) + characteristicCategories: Optional[List[MaterialAttribute]] = Field(default=[]) + comments: Optional[List[Comment]] = Field(default=[]) + description: Optional[str] = Field(default=None) + factors: Optional[List[Factor]] = Field(default=[]) + filename: Optional[str] = Field(default=None) + identifier: Optional[str] = Field(default=None) + materials: Optional[StudyMaterialType] + people: Optional[List[Person]] = Field(default=[]) + processSequence: Optional[List[Process]] = Field(default=[]) + protocols: Optional[List[Protocol]] = Field(default=[]) + publicReleaseDate: Optional[str] = Field(default=None) + publications: Optional[List[Publication]] = Field(default=[]) + studyDesignDescriptors: Optional[List[OntologyAnnotation]] = Field(default=[]) + submissionDate: Optional[str] = Field(default=None) + title: Optional[str] = Field(default=None) + unitCategories: Optional[List[OntologyAnnotation]] = Field(default=[]) + + +class Investigation(BaseModel): + id: Optional[str] = Field(alias="@id", default=None) + comments: Optional[List[Comment]] = Field(default=[]) + description: Optional[str] = Field(default=None) + filename: Optional[str] = Field(default=None) + identifier: Optional[str] = Field(default=None) + ontologySourceReferences: Optional[List[OntologySourceReference]] = Field( + default=[] + ) + people: Optional[List[Person]] = Field(default=[]) + publicReleaseDate: Optional[str] = Field(default=None) + publications: Optional[List[Publication]] = Field(default=[]) + studies: Optional[List[Study]] = Field(default=[]) + submissionDate: Optional[str] = Field(default=None) + title: Optional[str] = Field(default=None) From b4b4b5318f665b0b0f45a228a0aceab8dedcba7b Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Sun, 31 Mar 2024 09:47:03 +0200 Subject: [PATCH 10/45] Add validator for assay comments --- mars-cli/mars_lib/schemas/model.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mars-cli/mars_lib/schemas/model.py b/mars-cli/mars_lib/schemas/model.py index 7cb169a..139cdfe 100644 --- a/mars-cli/mars_lib/schemas/model.py +++ b/mars-cli/mars_lib/schemas/model.py @@ -185,6 +185,15 @@ class Assay(BaseModel): technologyType: Optional[TechnologyType] = Field(default=None) unitCategories: Optional[List[OntologyAnnotation]] = Field(default=[]) + @validator("comments") + def detect_target_repo_comments(cls, v): + target_repo_comments = [comment.name for comment in v] + if len(target_repo_comments) == 0: + raise ValueError("'target repository' comment is missing") + if len(target_repo_comments) > 1: + raise ValueError("Multiple 'target repository' comments found") + return v + class Person(BaseModel): id: Optional[str] = Field(alias="@id", default=None) From e7b85700942ef37d45625bf4d899e87be0114268 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Sun, 31 Mar 2024 11:09:53 +0200 Subject: [PATCH 11/45] Exclude all `__init__.py` files from coverage --- mars-cli/.coveragerc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mars-cli/.coveragerc b/mars-cli/.coveragerc index 2c3ba72..a0ce032 100644 --- a/mars-cli/.coveragerc +++ b/mars-cli/.coveragerc @@ -1,2 +1,2 @@ [run] -omit = mars_lib/__init__.py, mars_lib/submit.py, mars_lib/credential.py +omit = mars_lib/**__init__.py, mars_lib/submit.py, mars_lib/credential.py From 9e2a90b285b430c1594c86169053f3f3bf64b46b Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Sun, 31 Mar 2024 11:10:04 +0200 Subject: [PATCH 12/45] Add log files --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index f630fa5..d2db749 100644 --- a/.gitignore +++ b/.gitignore @@ -57,7 +57,7 @@ cover/ *.pot # Django stuff: -*.log +**.log local_settings.py db.sqlite3 db.sqlite3-journal From 6002c8cf28ed05fa66e92cb30f57b7d67334f592 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Mon, 15 Apr 2024 15:43:27 +0200 Subject: [PATCH 13/45] Make model validation more strict --- mars-cli/mars_lib/schemas/model.py | 75 ++++++++++++++++++------------ 1 file changed, 45 insertions(+), 30 deletions(-) diff --git a/mars-cli/mars_lib/schemas/model.py b/mars-cli/mars_lib/schemas/model.py index 139cdfe..3fcc7d1 100644 --- a/mars-cli/mars_lib/schemas/model.py +++ b/mars-cli/mars_lib/schemas/model.py @@ -4,16 +4,22 @@ import re from typing import List, Optional, Union -from pydantic import BaseModel, Field, validator +from click import Option +from pydantic import BaseModel, Field, validator, ConfigDict -class Comment(BaseModel): +class IsaBase(BaseModel): + # model_config = ConfigDict(extra="allow") + model_config = ConfigDict(extra="forbid") + + +class Comment(IsaBase): id: Optional[str] = Field(alias="@id", default=None) name: Optional[str] = Field(default=None) value: Optional[str] = Field(default=None) -class OntologySourceReference(BaseModel): +class OntologySourceReference(IsaBase): comments: Optional[List[Comment]] = Field(default=[]) description: Optional[str] = Field(default=None) file: Optional[str] = Field(default=None) @@ -30,7 +36,7 @@ class DataTypeEnum(str, Enum): FREE_INDUCTION_DECAY_FILE = "Free Induction Decay File" # TODO: QUESTION: This is not mentioned in the specs (https://isa-specs.readthedocs.io/) -class Data(BaseModel): +class Data(IsaBase): id: Optional[str] = Field(alias="@id", default=None) comments: Optional[List[Comment]] = Field(default=[]) name: Optional[str] = Field(default=None) @@ -43,7 +49,7 @@ def apply_enum(cls, v): return v -class OntologyAnnotation(BaseModel): +class OntologyAnnotation(IsaBase): id: Optional[str] = Field(alias="@id", default=None) annotationValue: Union[Optional[str], Optional[float], Optional[int]] = Field( default=[] @@ -56,19 +62,26 @@ class OntologyAnnotation(BaseModel): ) -class MaterialAttributeValue(BaseModel): +class MaterialAttributeValue(IsaBase): id: Optional[str] = Field(alias="@id", default=None) - characteristicType: Optional[OntologyAnnotation] = Field(default=None) + category: Optional[OntologyAnnotation] = Field(default=None) + value: Union[ + Optional[OntologyAnnotation], Optional[str], Optional[float], Optional[int] + ] = Field(default=None) + unit: Optional[OntologyAnnotation] = Field(default=None) + comments: Optional[List[Comment]] = Field( + default=[] + ) # TODO: QUESTION: This is not mentioned in the specs (https://isa-specs.readthedocs.io/en/latest/isajson.html#material-attribute-value-schema-json) -class Factor(BaseModel): +class Factor(IsaBase): id: Optional[str] = Field(alias="@id", default=None) comments: Optional[List[Comment]] = Field(default=[]) factorName: Optional[str] = Field(default=None) factorType: Optional[OntologyAnnotation] = Field(default=None) -class FactorValue(BaseModel): +class FactorValue(IsaBase): id: Optional[str] = Field(alias="@id", default=None) category: Optional[Factor] = Field(default=None) value: Union[ @@ -77,26 +90,32 @@ class FactorValue(BaseModel): unit: Optional[OntologyAnnotation] = Field(default=None) -class Source(BaseModel): +class Source(IsaBase): id: Optional[str] = Field(alias="@id", default=None) characteristics: Optional[List[MaterialAttributeValue]] = Field(default=[]) name: Optional[str] = Field(default=None) + comments: Optional[List[Comment]] = Field( + default=[] + ) # TODO: QUESTION: This is not mentioned in the specs (https://isa-specs.readthedocs.io/en/latest/isajson.html#source-schema-json) -class Sample(BaseModel): +class Sample(IsaBase): id: Optional[str] = Field(alias="@id", default=None) name: Optional[str] = Field(default=None) characteristics: Optional[List[MaterialAttributeValue]] = Field(default=[]) factorValues: Optional[List[FactorValue]] = Field(default=[]) derivesFrom: Optional[List[Source]] = Field(default=[]) + comments: Optional[List[Comment]] = Field( + default=[] + ) # TODO: QUESTION: This is not mentioned in the specs (https://isa-specs.readthedocs.io/en/latest/isajson.html#sample-schema-json) -class ProtocolParameter(BaseModel): +class ProtocolParameter(IsaBase): id: Optional[str] = Field(alias="@id", default=None) parameterName: Optional[OntologyAnnotation] = Field(default=None) -class ProcessParameterValue(BaseModel): +class ProcessParameterValue(IsaBase): category: Optional[ProtocolParameter] = Field(default=None) value: Union[ Optional[str], Optional[float], Optional[int], Optional[OntologyAnnotation] @@ -105,12 +124,12 @@ class ProcessParameterValue(BaseModel): # Helper class for protocol -> components -class Component(BaseModel): +class Component(IsaBase): componentName: Optional[str] = Field(default=None) componentType: Optional[OntologyAnnotation] = Field(default=None) -class Protocol(BaseModel): +class Protocol(IsaBase): id: Optional[str] = Field(alias="@id", default=None) comments: Optional[List[Comment]] = Field(default=[]) components: Optional[List[Component]] = Field(default=[]) @@ -129,7 +148,7 @@ class MaterialTypeEnum(str, Enum): LABELED_EXTRACT_NAME = "Labeled Extract Name" -class Material(BaseModel): +class Material(IsaBase): id: Optional[str] = Field(alias="@id", default=None) characteristics: List[MaterialAttributeValue] = Field(default=[]) comments: Optional[List[Comment]] = Field(default=[]) @@ -144,7 +163,7 @@ def apply_enum(cls, v): return v -class Process(BaseModel): +class Process(IsaBase): id: Optional[str] = Field(alias="@id", default=None) comments: Optional[List[Comment]] = Field(default=[]) date: Optional[str] = Field(default=None) @@ -162,17 +181,13 @@ class Process(BaseModel): previousProcess: Optional[Process] = Field(default=None) -class TechnologyType(BaseModel): - ontologyAnnotation: Optional[OntologyAnnotation] = Field(default=None) - - # Helper for assay -> materials -class AssayMaterialType(BaseModel): +class AssayMaterialType(IsaBase): samples: Optional[List[Sample]] = Field(default=[]) otherMaterials: Optional[List[Material]] = Field(default=[]) -class Assay(BaseModel): +class Assay(IsaBase): id: Optional[str] = Field(alias="@id", default=None) characteristicCategories: Optional[List[MaterialAttribute]] = Field(default=[]) comments: Optional[List[Comment]] = Field(default=[]) @@ -182,7 +197,7 @@ class Assay(BaseModel): measurementType: Optional[OntologyAnnotation] processSequence: Optional[List[Process]] = Field(default=[]) technologyPlatform: Optional[str] = Field(default=None) - technologyType: Optional[TechnologyType] = Field(default=None) + technologyType: Optional[OntologyAnnotation] = Field(default=None) unitCategories: Optional[List[OntologyAnnotation]] = Field(default=[]) @validator("comments") @@ -195,7 +210,7 @@ def detect_target_repo_comments(cls, v): return v -class Person(BaseModel): +class Person(IsaBase): id: Optional[str] = Field(alias="@id", default=None) address: Optional[str] = Field(default=None) affiliation: Optional[str] = Field(default=None) @@ -215,7 +230,7 @@ def check_numbers(cls, v): return v -class Publication(BaseModel): +class Publication(IsaBase): authorList: Optional[str] = Field(default=None) comments: Optional[List[Comment]] = Field(default=[]) doi: Optional[str] = Field(default=None) @@ -224,18 +239,18 @@ class Publication(BaseModel): title: Optional[str] = Field(default=None) -class StudyMaterialType(BaseModel): +class StudyMaterialType(IsaBase): sources: Optional[List[Source]] = Field(default=[]) samples: Optional[List[Sample]] = Field(default=[]) otherMaterials: Optional[List[Material]] = Field(default=[]) -class MaterialAttribute(BaseModel): +class MaterialAttribute(IsaBase): id: Optional[str] = Field(alias="@id", default=None) characteristicType: Optional[OntologyAnnotation] = Field(default=None) -class Study(BaseModel): +class Study(IsaBase): id: Optional[str] = Field(alias="@id", default=None) assays: Optional[List[Assay]] = Field(default=[]) characteristicCategories: Optional[List[MaterialAttribute]] = Field(default=[]) @@ -256,7 +271,7 @@ class Study(BaseModel): unitCategories: Optional[List[OntologyAnnotation]] = Field(default=[]) -class Investigation(BaseModel): +class Investigation(IsaBase): id: Optional[str] = Field(alias="@id", default=None) comments: Optional[List[Comment]] = Field(default=[]) description: Optional[str] = Field(default=None) From 8f5a752a0c2cf2380509513fed8d673dcb485585 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Mon, 15 Apr 2024 16:11:08 +0200 Subject: [PATCH 14/45] Remove unused variables --- mars-cli/mars_lib/submit.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/mars-cli/mars_lib/submit.py b/mars-cli/mars_lib/submit.py index a47c634..7c095be 100644 --- a/mars-cli/mars_lib/submit.py +++ b/mars-cli/mars_lib/submit.py @@ -20,10 +20,8 @@ def create_external_references( production: Boolean indicating the environment of BioSamples to use. """ if production: - logger_name = "production" biosamples_endpoint = biosamples_endpoints["prod"] else: - logger_name = "development" biosamples_endpoint = biosamples_endpoints["dev"] validate_json_against_schema( From 92fd5876989dba61555904b04ec060694a1a164f Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Mon, 15 Apr 2024 16:11:51 +0200 Subject: [PATCH 15/45] linting --- mars-cli/mars_lib/schemas/__init__.py | 0 mars-cli/mars_lib/schemas/model.py | 1 - 2 files changed, 1 deletion(-) create mode 100644 mars-cli/mars_lib/schemas/__init__.py diff --git a/mars-cli/mars_lib/schemas/__init__.py b/mars-cli/mars_lib/schemas/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mars-cli/mars_lib/schemas/model.py b/mars-cli/mars_lib/schemas/model.py index 3fcc7d1..5ca9d26 100644 --- a/mars-cli/mars_lib/schemas/model.py +++ b/mars-cli/mars_lib/schemas/model.py @@ -4,7 +4,6 @@ import re from typing import List, Optional, Union -from click import Option from pydantic import BaseModel, Field, validator, ConfigDict From 908bb4eeea494ac33c277aacd4489d94331c16d9 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Mon, 15 Apr 2024 16:19:05 +0200 Subject: [PATCH 16/45] Use pydantic classes instead of dictionaries --- mars-cli/mars_lib/isa_json.py | 54 ++++++++++++++------------------- mars-cli/tests/test_isa_json.py | 17 ++++++++--- 2 files changed, 34 insertions(+), 37 deletions(-) diff --git a/mars-cli/mars_lib/isa_json.py b/mars-cli/mars_lib/isa_json.py index 04ac51e..1b01047 100644 --- a/mars-cli/mars_lib/isa_json.py +++ b/mars-cli/mars_lib/isa_json.py @@ -1,21 +1,11 @@ import json from typing import Dict, Union, List -import copy +from mars_lib.schemas.model import Investigation, Assay, Comment +from pydantic import ValidationError TARGET_REPO_KEY = "target repository" -class IsaJsonValidationError(ValueError): - """ - Custom Error object to be used when the validation fails. - This class extends the ValueError class. - """ - - def __init__(self, report, message="The Provided ISA JSON is invalid!"): - self.message = message + "\n" + str(report["errors"]) - super().__init__(self.message) - - class TargetRepository: """ Holds constants, tied to the target repositories. @@ -27,49 +17,49 @@ class TargetRepository: def reduce_isa_json_for_target_repo( - input_isa_json: Dict, target_repo: str -) -> Dict[str, str]: + input_isa_json: Investigation, target_repo: str +) -> Investigation: """ Filters out assays that are not meant to be sent to the specified target repository. Args: - input_isa_json (Dict[str, str]): Input ISA JSON that contains the original information. + input_isa_json (Investigation): Input ISA JSON that contains the original information. target_repo (TargetRepository): Target repository as a constant. Returns: - Dict[str, str]: Filtered ISA JSON. + Investigation: Filtered ISA JSON. """ - filtered_isa_json = copy.deepcopy(input_isa_json) + filtered_isa_json = input_isa_json.model_copy(deep=True) new_studies = [] - studies = filtered_isa_json.pop("studies") + studies = filtered_isa_json.studies for study in studies: - assays = study.pop("assays") + assays = study.assays filtered_assays = [ assay for assay in assays if is_assay_for_target_repo(assay, target_repo) ] if len(filtered_assays) > 0: - study["assays"] = filtered_assays + study.assays = filtered_assays new_studies.append(study) - filtered_isa_json["studies"] = new_studies + filtered_isa_json.studies = new_studies return filtered_isa_json -def detect_target_repo_comment(comments: List[Dict[str, str]]) -> Dict[str, str]: - """_summary_ +def detect_target_repo_comment(comments: List[Comment]) -> Comment: + """Will detect the comment that contains the target repository. Args: - comments (List[Dict[str, str]]): Dictionary of comments. + comments (List[Comment]): List of comments. Returns: - Dict[str, str]: The comment where the name corresponds with the name of the provided target repo. + Comment: The comment where the name corresponds with the name of the provided target repo. """ for comment in comments: - if comment["name"] == TARGET_REPO_KEY: + if comment.name == TARGET_REPO_KEY: return comment -def is_assay_for_target_repo(assay_dict: Dict, target_repo: str) -> bool: +def is_assay_for_target_repo(assay: Assay, target_repo: str) -> bool: """ Defines whether the assays is meant for the target repository. @@ -80,14 +70,14 @@ def is_assay_for_target_repo(assay_dict: Dict, target_repo: str) -> bool: Returns: bool: Boolean defining whether the assay is destined for the provided target repo. """ - target_repo_comment = detect_target_repo_comment(assay_dict["comments"]) - if target_repo_comment["value"] == target_repo: + target_repo_comment = detect_target_repo_comment(assay.comments) + if target_repo_comment.value == target_repo: return True else: return False -def load_isa_json(file_path: str) -> Union[Dict[str, str], IsaJsonValidationError]: +def load_isa_json(file_path: str) -> Union[Dict[str, str], ValidationError]: """ Reads the file and validates it as a valid ISA JSON. @@ -95,11 +85,11 @@ def load_isa_json(file_path: str) -> Union[Dict[str, str], IsaJsonValidationErro file_path (str): Path to ISA JSON as string. Returns: - Union[Dict[str, str], IsaJsonValidationError]: Depending on the validation, returns a filtered ISA JSON or an Error. + Union[Dict[str, str], ValidationError]: Depending on the validation, returns a filtered ISA JSON or a pydantic validation error. """ with open(file_path, "r") as json_file: isa_json = json.load(json_file) # TODO: Once we have an idea on what / how to validate, it should be added here - return isa_json + return Investigation.model_validate(isa_json) diff --git a/mars-cli/tests/test_isa_json.py b/mars-cli/tests/test_isa_json.py index cee4c56..c42ffc8 100644 --- a/mars-cli/tests/test_isa_json.py +++ b/mars-cli/tests/test_isa_json.py @@ -3,11 +3,18 @@ load_isa_json, TargetRepository, ) +import pytest +from pydantic import ValidationError def test_load_isa_json(): # Should test the validation process - pass + valid_isa_json = load_isa_json("../test-data/ISA-BH2023-ALL/isa-bh2023-all.json") + assert len(valid_isa_json.studies) == 1 + assert valid_isa_json.studies[0].identifier == "BH2023" + + with pytest.raises(ValidationError): + load_isa_json("./tests/fixtures/invalid_investigation.json") def test_reduce_isa_json_for_target_repo(): @@ -17,9 +24,9 @@ def test_reduce_isa_json_for_target_repo(): good_isa_json, TargetRepository.ENA ) - good_isa_json_study = good_isa_json["studies"][0] + good_isa_json_study = good_isa_json.studies[0] - filtered_isa_json_study = filtered_isa_json["studies"][0] + filtered_isa_json_study = filtered_isa_json.studies[0] - assert len(good_isa_json_study["assays"]) == 5 - assert len(filtered_isa_json_study["assays"]) == 1 + assert len(good_isa_json_study.assays) == 5 + assert len(filtered_isa_json_study.assays) == 1 From 9eb06040dc20b08a9894540ee2a7b03b4e657fe0 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Mon, 15 Apr 2024 16:21:11 +0200 Subject: [PATCH 17/45] Migrate from `@validator` to `@field_validator` --- mars-cli/mars_lib/schemas/model.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mars-cli/mars_lib/schemas/model.py b/mars-cli/mars_lib/schemas/model.py index 5ca9d26..4114fde 100644 --- a/mars-cli/mars_lib/schemas/model.py +++ b/mars-cli/mars_lib/schemas/model.py @@ -4,7 +4,7 @@ import re from typing import List, Optional, Union -from pydantic import BaseModel, Field, validator, ConfigDict +from pydantic import BaseModel, Field, field_validator, ConfigDict class IsaBase(BaseModel): @@ -41,7 +41,7 @@ class Data(IsaBase): name: Optional[str] = Field(default=None) type: Optional[DataTypeEnum] = Field(default=None) - @validator("type") + @field_validator("type") def apply_enum(cls, v): if v not in [item.value for item in DataTypeEnum]: raise ValueError("Invalid material type") @@ -155,7 +155,7 @@ class Material(IsaBase): type: Optional[str] = Field(default=None) derivesFrom: Optional[List[Material]] = Field(default=[]) - @validator("type") + @field_validator("type") def apply_enum(cls, v): if v not in [item.value for item in MaterialTypeEnum]: raise ValueError("Invalid material type") @@ -199,7 +199,7 @@ class Assay(IsaBase): technologyType: Optional[OntologyAnnotation] = Field(default=None) unitCategories: Optional[List[OntologyAnnotation]] = Field(default=[]) - @validator("comments") + @field_validator("comments") def detect_target_repo_comments(cls, v): target_repo_comments = [comment.name for comment in v] if len(target_repo_comments) == 0: @@ -222,7 +222,7 @@ class Person(IsaBase): phone: Optional[str] = Field(default=None) roles: Optional[List[OntologyAnnotation]] = Field(default=[]) - @validator("phone", "fax") + @field_validator("phone", "fax") def check_numbers(cls, v): if not (re.match(r"^\+\d{1,3}\d{4,}$", v) or v == ""): raise ValueError("Invalid number format") From 35de6055c5f37162b24be5d847924dbe090d0e42 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Mon, 15 Apr 2024 17:19:19 +0200 Subject: [PATCH 18/45] Move TargetRepository and TARGET_REPO_KEY to shared file --- mars-cli/mars_lib/isa_json.py | 13 +------------ mars-cli/mars_lib/target_repo.py | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 12 deletions(-) create mode 100644 mars-cli/mars_lib/target_repo.py diff --git a/mars-cli/mars_lib/isa_json.py b/mars-cli/mars_lib/isa_json.py index 1b01047..8388abe 100644 --- a/mars-cli/mars_lib/isa_json.py +++ b/mars-cli/mars_lib/isa_json.py @@ -2,18 +2,7 @@ from typing import Dict, Union, List from mars_lib.schemas.model import Investigation, Assay, Comment from pydantic import ValidationError - -TARGET_REPO_KEY = "target repository" - - -class TargetRepository: - """ - Holds constants, tied to the target repositories. - """ - - ENA = "ena" - METABOLIGHTS = "metabolights" - BIOSAMPLES = "biosamples" +from mars_lib.target_repo import TARGET_REPO_KEY, TargetRepository def reduce_isa_json_for_target_repo( diff --git a/mars-cli/mars_lib/target_repo.py b/mars-cli/mars_lib/target_repo.py new file mode 100644 index 0000000..84a321c --- /dev/null +++ b/mars-cli/mars_lib/target_repo.py @@ -0,0 +1,14 @@ +from enum import Enum + + +TARGET_REPO_KEY = "target repository" + + +class TargetRepository(str, Enum): + """ + Holds constants, tied to the target repositories. + """ + + ENA = "ena" + METABOLIGHTS = "metabolights" + BIOSAMPLES = "biosamples" From a55b4bf582037e7ac3044f04c867ac4ccb0b88be Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Mon, 15 Apr 2024 17:20:36 +0200 Subject: [PATCH 19/45] Add EVA to TargetRepository --- mars-cli/mars_lib/target_repo.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mars-cli/mars_lib/target_repo.py b/mars-cli/mars_lib/target_repo.py index 84a321c..361f4ee 100644 --- a/mars-cli/mars_lib/target_repo.py +++ b/mars-cli/mars_lib/target_repo.py @@ -12,3 +12,4 @@ class TargetRepository(str, Enum): ENA = "ena" METABOLIGHTS = "metabolights" BIOSAMPLES = "biosamples" + EVA = "eva" From e49a45337e8e01e77f743e695f4ff1107c16ddbc Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Mon, 15 Apr 2024 17:21:23 +0200 Subject: [PATCH 20/45] Remove empty unused validation.py --- mars-cli/mars_lib/validation.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 mars-cli/mars_lib/validation.py diff --git a/mars-cli/mars_lib/validation.py b/mars-cli/mars_lib/validation.py deleted file mode 100644 index e69de29..0000000 From f37bacfcc7b453df4c1178321c4e75d79864d2d7 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Tue, 16 Apr 2024 08:56:29 +0200 Subject: [PATCH 21/45] update target repo validator --- mars-cli/mars_lib/schemas/model.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/mars-cli/mars_lib/schemas/model.py b/mars-cli/mars_lib/schemas/model.py index 4114fde..713657b 100644 --- a/mars-cli/mars_lib/schemas/model.py +++ b/mars-cli/mars_lib/schemas/model.py @@ -5,6 +5,7 @@ from typing import List, Optional, Union from pydantic import BaseModel, Field, field_validator, ConfigDict +from mars_lib.target_repo import TargetRepository, TARGET_REPO_KEY class IsaBase(BaseModel): @@ -193,7 +194,7 @@ class Assay(IsaBase): dataFiles: Optional[List[Data]] = Field(default=[]) filename: Optional[str] = Field(default=None) materials: Optional[AssayMaterialType] = Field(default=None) - measurementType: Optional[OntologyAnnotation] + measurementType: Optional[OntologyAnnotation] = Field(default=None) processSequence: Optional[List[Process]] = Field(default=[]) technologyPlatform: Optional[str] = Field(default=None) technologyType: Optional[OntologyAnnotation] = Field(default=None) @@ -201,12 +202,22 @@ class Assay(IsaBase): @field_validator("comments") def detect_target_repo_comments(cls, v): - target_repo_comments = [comment.name for comment in v] + target_repo_comments = [ + comment for comment in v if comment.name == TARGET_REPO_KEY + ] if len(target_repo_comments) == 0: raise ValueError("'target repository' comment is missing") - if len(target_repo_comments) > 1: + elif len(target_repo_comments) > 1: raise ValueError("Multiple 'target repository' comments found") - return v + else: + if target_repo_comments[0].value in [ + item.value for item in TargetRepository + ]: + return v + else: + raise ValueError( + f"Invalid 'target repository' value: '{target_repo_comments[0].value}'" + ) class Person(IsaBase): From 96826b99cd9b601a767d7e3225e3020cd4f8ea19 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Tue, 16 Apr 2024 08:56:52 +0200 Subject: [PATCH 22/45] add validator tests --- mars-cli/tests/test_isa_json.py | 109 +++++++++++++++++++++++++++++++- 1 file changed, 108 insertions(+), 1 deletion(-) diff --git a/mars-cli/tests/test_isa_json.py b/mars-cli/tests/test_isa_json.py index c42ffc8..a45a55e 100644 --- a/mars-cli/tests/test_isa_json.py +++ b/mars-cli/tests/test_isa_json.py @@ -1,10 +1,11 @@ from mars_lib.isa_json import ( reduce_isa_json_for_target_repo, load_isa_json, - TargetRepository, ) +from mars_lib.target_repo import TargetRepository, TARGET_REPO_KEY import pytest from pydantic import ValidationError +from mars_lib.schemas.model import Data, Material, Assay, Person def test_load_isa_json(): @@ -30,3 +31,109 @@ def test_reduce_isa_json_for_target_repo(): assert len(good_isa_json_study.assays) == 5 assert len(filtered_isa_json_study.assays) == 1 + + +def test_data_type_validator(): + valid_data_json = {"@id": "data_001", "name": "data 1", "type": "Image File"} + + invalid_data_json = { + "@id": "data_001", + "name": "data 1", + "type": "Custom File", # This is not a valid data type + } + + assert Data.model_validate(valid_data_json) + + with pytest.raises(ValidationError): + Data.model_validate(invalid_data_json) + + +def test_material_type_validator(): + valid_material_json = { + "@id": "material_001", + "name": "material 1", + "type": "Extract Name", + } + + invalid_material_json = { + "@id": "material_002", + "name": "material 2", + "type": "Custom Material", # This is not a valid material type + } + + assert Material.model_validate(valid_material_json) + + with pytest.raises(ValidationError): + Material.model_validate(invalid_material_json) + + +def test_target_repo_comment_validator(): + valid_assay_json = { + "@id": "assay_001", + "comments": [ + { + "@id": "comment_001", + "name": "target repository", + "value": TargetRepository.ENA, + } + ], + } + + invalid_assay_json = { + "@id": "assay_002", + "comments": [ + { + "@id": "comment_002", + "name": "target repository", + "value": "my special repo", + } + ], + } + + second_invalid_assay_json = {"@id": "assay_003", "comments": []} + + third_invalid_assay_json = { + "@id": "assay_004", + "comments": [ + { + "@id": "comment_003", + "name": "target repository", + "value": TargetRepository.ENA, + }, + { + "@id": "comment_004", + "name": "target repository", + "value": TargetRepository.METABOLIGHTS, + }, + ], + } + + assert Assay.model_validate(valid_assay_json) + with pytest.raises( + ValidationError, match="Invalid 'target repository' value: 'my special repo'" + ): + Assay.model_validate(invalid_assay_json) + + with pytest.raises(ValidationError, match="'target repository' comment is missing"): + Assay.model_validate(second_invalid_assay_json) + + with pytest.raises( + ValidationError, match="Multiple 'target repository' comments found" + ): + Assay.model_validate(third_invalid_assay_json) + + def test_person_phone_nr_validator(): + valid_person_json = { + "@id": "person_001", + "phone_nr": "+49123456789", + } + + invalid_person_json = { + "@id": "person_002", + "phone_nr": "123456789", + } + + assert Person.model_validate(valid_person_json) + + with pytest.raises(ValidationError, match="Invalid number format"): + Person.model_validate(invalid_person_json) From 6120d68676be161a4f58ae515793b705d42602f6 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Tue, 16 Apr 2024 08:58:22 +0200 Subject: [PATCH 23/45] linting --- mars-cli/mars_lib/isa_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mars-cli/mars_lib/isa_json.py b/mars-cli/mars_lib/isa_json.py index 8388abe..6dfdb74 100644 --- a/mars-cli/mars_lib/isa_json.py +++ b/mars-cli/mars_lib/isa_json.py @@ -2,7 +2,7 @@ from typing import Dict, Union, List from mars_lib.schemas.model import Investigation, Assay, Comment from pydantic import ValidationError -from mars_lib.target_repo import TARGET_REPO_KEY, TargetRepository +from mars_lib.target_repo import TARGET_REPO_KEY def reduce_isa_json_for_target_repo( From 01f4e1aefa89f9bfb0838b5686353ef3c1b20443 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Tue, 16 Apr 2024 14:42:33 +0200 Subject: [PATCH 24/45] Modify function to write different levels --- mars-cli/mars_cli.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/mars-cli/mars_cli.py b/mars-cli/mars_cli.py index 69537a5..809eb61 100644 --- a/mars-cli/mars_cli.py +++ b/mars-cli/mars_cli.py @@ -35,9 +35,16 @@ ) -def print_and_log(msg): +def print_and_log(msg, level="info"): click.echo(msg) - logging.info(msg) + if level == "info": + logging.info(msg) + elif level == "error": + logging.error(msg) + elif level == "warning": + logging.warning(msg) + else: + logging.debug(msg) @click.group() From 839f71bdba0acab18452777ae1def6691e76104a Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Tue, 16 Apr 2024 14:43:15 +0200 Subject: [PATCH 25/45] Fix import statements --- mars-cli/mars_cli.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mars-cli/mars_cli.py b/mars-cli/mars_cli.py index 809eb61..4224270 100644 --- a/mars-cli/mars_cli.py +++ b/mars-cli/mars_cli.py @@ -1,9 +1,8 @@ -from math import log import click import logging import pathlib from configparser import ConfigParser -from mars_lib.isa_json import TargetRepository +from mars_lib.target_repo import TargetRepository from logging.handlers import RotatingFileHandler # Load CLI configuration From 1c4462950847d77f4ada9ef88708972cb96eeb65 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Tue, 16 Apr 2024 14:43:37 +0200 Subject: [PATCH 26/45] Add context to CLI --- mars-cli/mars_cli.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/mars-cli/mars_cli.py b/mars-cli/mars_cli.py index 4224270..9bf97e1 100644 --- a/mars-cli/mars_cli.py +++ b/mars-cli/mars_cli.py @@ -52,11 +52,15 @@ def print_and_log(msg, level="info"): is_flag=True, help="Boolean indicating the usage of the development environment of the target repositories. If not present, the production instances will be used.", ) -def cli(development): +@click.pass_context +def cli(ctx, development): print_and_log( f"Running in {'Development environment' if development else 'Production environment'}" ) + ctx.ensure_object(dict) + ctx.obj["DEVELOPMENT"] = development + @cli.command() @click.argument( @@ -87,9 +91,12 @@ def submit(credentials_file, isa_json_file, submit_to_ena, submit_to_metabolight f"Staring submission of the ISA JSON to the target repositories: {', '.join(target_repositories)}." ) + # TODO: Entry point for the submission logic + @cli.command() -def health_check(): +@click.pass_context +def health_check(ctx): """Check the health of the target repositories.""" print_and_log("Checking the health of the target repositories.") From 81f04fe78d7b21aabe3a1a3116fedac69a9c5ce2 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Tue, 16 Apr 2024 14:51:24 +0200 Subject: [PATCH 27/45] Add URLs to the config file --- mars-cli/generate_config.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/mars-cli/generate_config.py b/mars-cli/generate_config.py index 7bff30b..0f243a3 100644 --- a/mars-cli/generate_config.py +++ b/mars-cli/generate_config.py @@ -22,6 +22,27 @@ def create_settings_file(settings_dir): "log_max_files": "5", } + config["webin"] = { + "development-url": "https://wwwdev.ebi.ac.uk/ena/submit/webin/auth", + "development-token-url": "https://wwwdev.ebi.ac.uk/ena/submit/webin/auth/token", + "production-url": "https://www.ebi.ac.uk/ena/submit/webin/auth", + "production-token-url": "https://www.ebi.ac.uk/ena/submit/webin/auth/token", + } + + config["ena"] = { + "development-url": "https://wwwdev.ebi.ac.uk/ena/submit/webin-v2/", + "development-submission-url": "https://wwwdev.ebi.ac.uk/ena/submit/drop-box/submit/?auth=ENA", + "production-url": "https://www.ebi.ac.uk/ena/submit/webin-v2/", + "production-submission-url": "https://www.ebi.ac.uk/ena/submit/drop-box/submit/?auth=ENA", + } + + config["biosamples"] = { + "development-url": "https://wwwdev.ebi.ac.uk/biosamples/samples/", + "development-submission-url": "https://wwwdev.ebi.ac.uk/biosamples/samples/", + "production-url": "https://www.ebi.ac.uk/biosamples/samples/", + "production-submission-url": "https://www.ebi.ac.uk/biosamples/samples/", + } + with open(settings_path, "w") as config_file: config.write(config_file) From 906568c387b6ca74124aea4414eec7032af160f8 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 19 Apr 2024 14:52:49 +0200 Subject: [PATCH 28/45] Change settings dir dependend of environmental variable. --- mars-cli/generate_config.py | 9 +++++++-- mars-cli/mars_cli.py | 9 ++++++++- mars-cli/setup.py | 3 ++- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/mars-cli/generate_config.py b/mars-cli/generate_config.py index 0f243a3..99e0f66 100644 --- a/mars-cli/generate_config.py +++ b/mars-cli/generate_config.py @@ -47,14 +47,19 @@ def create_settings_file(settings_dir): config.write(config_file) -def generate_config(overwrite): +def generate_config(overwrite, mars_home_dir): """ Generate the configuration file for the MARS CLI. Returns: None """ - settings_dir = pathlib.Path.home() / ".mars" + settings_dir = ( + pathlib.Path.home() / ".mars" + if mars_home_dir == "HOME" + else pathlib.Path(mars_home_dir) / ".mars" + ) + if not settings_dir.exists(): settings_dir.mkdir() diff --git a/mars-cli/mars_cli.py b/mars-cli/mars_cli.py index 9bf97e1..f8c47f1 100644 --- a/mars-cli/mars_cli.py +++ b/mars-cli/mars_cli.py @@ -4,9 +4,16 @@ from configparser import ConfigParser from mars_lib.target_repo import TargetRepository from logging.handlers import RotatingFileHandler +import sys +import os # Load CLI configuration -home_dir = pathlib.Path.home() +home_dir = ( + pathlib.Path(os.getenv("MARS_SETTINGS_DIR")) + if os.getenv("MARS_SETTINGS_DIR") + else pathlib.Path.home() +) + config_file = home_dir / ".mars" / "settings.ini" fallback_log_file = home_dir / ".mars" / "app.log" diff --git a/mars-cli/setup.py b/mars-cli/setup.py index 4fae8dd..6dd6f34 100644 --- a/mars-cli/setup.py +++ b/mars-cli/setup.py @@ -18,7 +18,8 @@ def run(self): install.run(self) overwrite_settings = os.getenv("OVERWRITE_SETTINGS", "False").lower() == "true" - generate_config(overwrite_settings) + mars_home_dir = os.getenv("MARS_SETTINGS_DIR", "HOME") + generate_config(overwrite_settings, mars_home_dir) setup( From e851e4d275b7fc764c8ccfaf9ffaad4204ddd3c6 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 19 Apr 2024 14:53:43 +0200 Subject: [PATCH 29/45] Print to std err when error is logged --- mars-cli/mars_cli.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mars-cli/mars_cli.py b/mars-cli/mars_cli.py index f8c47f1..77bf998 100644 --- a/mars-cli/mars_cli.py +++ b/mars-cli/mars_cli.py @@ -42,14 +42,17 @@ def print_and_log(msg, level="info"): - click.echo(msg) if level == "info": + click.echo(msg) logging.info(msg) elif level == "error": + click.echo(msg, file=sys.stderr) logging.error(msg) elif level == "warning": + click.echo(msg) logging.warning(msg) else: + click.echo(msg) logging.debug(msg) From 51f176ea9a6cc3e8b183a3887ea7665a42a71b5d Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 19 Apr 2024 14:58:58 +0200 Subject: [PATCH 30/45] Add health check command --- mars-cli/mars_cli.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/mars-cli/mars_cli.py b/mars-cli/mars_cli.py index 77bf998..cc5193d 100644 --- a/mars-cli/mars_cli.py +++ b/mars-cli/mars_cli.py @@ -4,6 +4,7 @@ from configparser import ConfigParser from mars_lib.target_repo import TargetRepository from logging.handlers import RotatingFileHandler +import requests import sys import os @@ -59,11 +60,13 @@ def print_and_log(msg, level="info"): @click.group() @click.option( "--development", + "-d", is_flag=True, help="Boolean indicating the usage of the development environment of the target repositories. If not present, the production instances will be used.", ) @click.pass_context def cli(ctx, development): + print_and_log("############# Welcome to the MARS CLI. #############") print_and_log( f"Running in {'Development environment' if development else 'Production environment'}" ) @@ -110,6 +113,47 @@ def health_check(ctx): """Check the health of the target repositories.""" print_and_log("Checking the health of the target repositories.") + if ctx.obj["DEVELOPMENT"]: + print_and_log("Checking development instances.") + webin_url = config.get("webin", "development-url") + ena_url = config.get("ena", "development-url") + biosamples_url = config.get("biosamples", "development-url") + else: + print_and_log("Checking production instances.") + webin_url = config.get("webin", "production-url") + ena_url = config.get("ena", "production-url") + biosamples_url = config.get("biosamples", "production-url") + + # Check webin service + webin_health = requests.get(webin_url) + if webin_health.status_code != 200: + print_and_log( + f"Webin ({webin_url}): Could not reach service! Status code '{webin_health.status_code}'.", + level="error", + ) + else: + print_and_log(f"Webin ({webin_url}) is healthy.") + + # Check ENA service + ena_health = requests.get(ena_url) + if ena_health.status_code != 200: + print_and_log( + f"ENA ({ena_url}): Could not reach service! Status code '{ena_health.status_code}'.", + level="error", + ) + else: + print_and_log(f"ENA ({ena_url}) is healthy.") + + # Check Biosamples service + biosamples_health = requests.get(biosamples_url) + if biosamples_health.status_code != 200: + print_and_log( + f"Biosamples ({biosamples_url}): Could not reach service! Status code '{biosamples_health.status_code}'.", + level="error", + ) + else: + print_and_log(f"Biosamples ({biosamples_url}) is healthy.") + if __name__ == "__main__": cli() From 44e7ad934f3b099fa5012bbc41333b43b38da121 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 19 Apr 2024 15:06:37 +0200 Subject: [PATCH 31/45] Add IsaJson wrapper around Investigation --- mars-cli/mars_lib/schemas/model.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mars-cli/mars_lib/schemas/model.py b/mars-cli/mars_lib/schemas/model.py index 713657b..45951b1 100644 --- a/mars-cli/mars_lib/schemas/model.py +++ b/mars-cli/mars_lib/schemas/model.py @@ -296,3 +296,7 @@ class Investigation(IsaBase): studies: Optional[List[Study]] = Field(default=[]) submissionDate: Optional[str] = Field(default=None) title: Optional[str] = Field(default=None) + + +class IsaJson(IsaBase): + investigation: Investigation From e474ea18e75505cd13fa9313cb643ac68db3fdc9 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 19 Apr 2024 15:23:04 +0200 Subject: [PATCH 32/45] Formatting --- test-data/biosamples-modified-isa.json | 1839 +++++++++++++----------- 1 file changed, 992 insertions(+), 847 deletions(-) diff --git a/test-data/biosamples-modified-isa.json b/test-data/biosamples-modified-isa.json index 80f9319..efe1cac 100644 --- a/test-data/biosamples-modified-isa.json +++ b/test-data/biosamples-modified-isa.json @@ -1,877 +1,1022 @@ { - "investigation" : { - "identifier" : "", - "title" : "Bob's investigation", - "description" : "", - "submissionDate" : "", - "publicReleaseDate" : "", - "ontologySourceReferences" : [ ], - "filename" : "Bob's investigation.txt", - "comments" : [ { - "name" : "ISAjson export time", - "value" : "2022-11-07T08:09:59Z" - }, { - "name" : "SEEK Project name", - "value" : "Bob's PhD project" - }, { - "name" : "SEEK Project ID", - "value" : "http://localhost:3000/single_pages/2" - }, { - "name" : "SEEK Investigation ID", - "value" : "19" - } ], - "publications" : [ ], - "people" : [ { - "lastName" : "Bob", - "firstName" : "Bob", - "midInitials" : "", - "email" : "bob@testing.com", - "phone" : "", - "fax" : "", - "address" : "", - "affiliation" : "", - "roles" : [ { - "termAccession" : "", - "termSource" : "", - "annotationValue" : "" - } ], - "comments" : [ { - "name" : "", - "value" : "", - "@id" : "" - } ], - "@id" : "#people/5" - } ], - "studies" : [ { - "identifier" : "", - "title" : "Arabidopsis thaliana", - "description" : "Nucleic acid sequencing and metabolomics and proteomics of Arabidopsis thaliana in specific experimental conditions to test a specific hypothesis.\r\n", - "submissionDate" : "", - "publicReleaseDate" : "", - "filename" : "Arabidopsis thaliana.txt", - "comments" : [ { - "name" : "SEEK Study ID", - "value" : "10" - }, { - "name" : "SEEK creation date", - "value" : "2022-11-03T16:20:49Z" - } ], - "publications" : [ ], - "people" : [ { - "lastName" : "Bob", - "firstName" : "Bob", - "midInitials" : "", - "email" : "bob@testing.com", - "phone" : "", - "fax" : "", - "address" : "", - "affiliation" : "", - "roles" : [ { - "termAccession" : "", - "termSource" : "", - "annotationValue" : "" - } ], - "comments" : [ { - "name" : "", - "value" : "", - "@id" : "" - } ], - "@id" : "#people/5" - } ], - "studyDesignDescriptors" : [ ], - "characteristicCategories" : [ { - "characteristicType" : { - "annotationValue" : "Title", - "termAccession" : "", - "termSource" : "" - }, - "@id" : "#characteristic_category/Title_317" - }, { - "characteristicType" : { - "annotationValue" : "Description", - "termAccession" : "", - "termSource" : "" - }, - "@id" : "#characteristic_category/Description_318" - }, { - "characteristicType" : { - "annotationValue" : "tax_id", - "termAccession" : "", - "termSource" : "" - }, - "@id" : "#characteristic_category/tax_id_319" - }, { - "characteristicType" : { - "annotationValue" : "organism", - "termAccession" : "", - "termSource" : "" - }, - "@id" : "#characteristic_category/organism_320" - }, { - "characteristicType" : { - "annotationValue" : "cell_type", - "termAccession" : "", - "termSource" : "" - }, - "@id" : "#characteristic_category/cell_type_321" - }, { - "characteristicType" : { - "annotationValue" : "dev_stage", - "termAccession" : "", - "termSource" : "" - }, - "@id" : "#characteristic_category/dev_stage_322" - }, { - "characteristicType" : { - "annotationValue" : "collection_date", - "termAccession" : "", - "termSource" : "" - }, - "@id" : "#characteristic_category/collection_date_323" - }, { - "characteristicType" : { - "annotationValue" : "isolation_source", - "termAccession" : "", - "termSource" : "" - }, - "@id" : "#characteristic_category/isolation_source_324" - }, { - "characteristicType" : { - "annotationValue" : "collected_by", - "termAccession" : "", - "termSource" : "" - }, - "@id" : "#characteristic_category/collected_by_325" - }, { - "characteristicType" : { - "annotationValue" : "geographic location (country and/or sea)", - "termAccession" : "", - "termSource" : "" - }, - "@id" : "#characteristic_category/geographic_location_(country_and/or_sea)_326" - }, { - "characteristicType" : { - "annotationValue" : "submission date", - "termAccession" : "", - "termSource" : "" - }, - "@id" : "#characteristic_category/submission_date_327" - }, { - "characteristicType" : { - "annotationValue" : "status", - "termAccession" : "", - "termSource" : "" - }, - "@id" : "#characteristic_category/status_328" - }, { - "characteristicType" : { - "annotationValue" : "accession", - "termAccession" : "", - "termSource" : "" - }, - "@id" : "#characteristic_category/accession_329" - } ], - "materials" : { - "sources" : [ { - "name" : "plant 1", - "characteristics" : [ { - "category" : { - "@id" : "#characteristic_category/Title_317" - }, - "value" : { - "annotationValue" : "plant 1", - "termSource" : "", - "termAccession" : "" - }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] - } - }, { - "category" : { - "@id" : "#characteristic_category/Description_318" - }, - "value" : { - "annotationValue" : "plant in the lab", - "termSource" : "", - "termAccession" : "" - }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] - } - }, { - "category" : { - "@id" : "#characteristic_category/tax_id_319" - }, - "value" : { - "annotationValue" : "NCBI:txid3702", - "termSource" : "", - "termAccession" : "" - }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] - } - }, { - "category" : { - "@id" : "#characteristic_category/organism_320" - }, - "value" : { - "annotationValue" : "Arabidopsis thaliana", - "termSource" : "", - "termAccession" : "" - }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] - } - }, { - "category" : { - "@id" : "#characteristic_category/cell_type_321" - }, - "value" : { - "annotationValue" : "na", - "termSource" : "", - "termAccession" : "" - }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] - } - }, { - "category" : { - "@id" : "#characteristic_category/dev_stage_322" - }, - "value" : { - "annotationValue" : "budding", - "termSource" : "", - "termAccession" : "" - }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] - } - }, { - "category" : { - "@id" : "#characteristic_category/collection_date_323" - }, - "value" : { - "annotationValue" : "01/01/2022", - "termSource" : "", - "termAccession" : "" - }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] - } - }, { - "category" : { - "@id" : "#characteristic_category/isolation_source_324" - }, - "value" : { - "annotationValue" : "seed", - "termSource" : "", - "termAccession" : "" - }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] - } - }, { - "category" : { - "@id" : "#characteristic_category/collected_by_325" + "investigation": { + "identifier": "", + "title": "Bob's investigation", + "description": "", + "submissionDate": "", + "publicReleaseDate": "", + "ontologySourceReferences": [], + "filename": "Bob's investigation.txt", + "comments": [ + { + "name": "ISAjson export time", + "value": "2022-11-07T08:09:59Z" + }, + { + "name": "SEEK Project name", + "value": "Bob's PhD project" + }, + { + "name": "SEEK Project ID", + "value": "http://localhost:3000/single_pages/2" + }, + { + "name": "SEEK Investigation ID", + "value": "19" + } + ], + "publications": [], + "people": [ + { + "lastName": "Bob", + "firstName": "Bob", + "midInitials": "", + "email": "bob@testing.com", + "phone": "", + "fax": "", + "address": "", + "affiliation": "", + "roles": [ + { + "termAccession": "", + "termSource": "", + "annotationValue": "" + } + ], + "comments": [ + { + "name": "", + "value": "", + "@id": "" + } + ], + "@id": "#people/5" + } + ], + "studies": [ + { + "identifier": "", + "title": "Arabidopsis thaliana", + "description": "Nucleic acid sequencing and metabolomics and proteomics of Arabidopsis thaliana in specific experimental conditions to test a specific hypothesis.\r\n", + "submissionDate": "", + "publicReleaseDate": "", + "filename": "Arabidopsis thaliana.txt", + "comments": [ + { + "name": "SEEK Study ID", + "value": "10" + }, + { + "name": "SEEK creation date", + "value": "2022-11-03T16:20:49Z" + } + ], + "publications": [], + "people": [ + { + "lastName": "Bob", + "firstName": "Bob", + "midInitials": "", + "email": "bob@testing.com", + "phone": "", + "fax": "", + "address": "", + "affiliation": "", + "roles": [ + { + "termAccession": "", + "termSource": "", + "annotationValue": "" + } + ], + "comments": [ + { + "name": "", + "value": "", + "@id": "" + } + ], + "@id": "#people/5" + } + ], + "studyDesignDescriptors": [], + "characteristicCategories": [ + { + "characteristicType": { + "annotationValue": "Title", + "termAccession": "", + "termSource": "" + }, + "@id": "#characteristic_category/Title_317" + }, + { + "characteristicType": { + "annotationValue": "Description", + "termAccession": "", + "termSource": "" }, - "value" : { - "annotationValue" : "Bob", - "termSource" : "", - "termAccession" : "" + "@id": "#characteristic_category/Description_318" + }, + { + "characteristicType": { + "annotationValue": "tax_id", + "termAccession": "", + "termSource": "" }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] - } - }, { - "category" : { - "@id" : "#characteristic_category/geographic_location_(country_and/or_sea)_326" + "@id": "#characteristic_category/tax_id_319" + }, + { + "characteristicType": { + "annotationValue": "organism", + "termAccession": "", + "termSource": "" }, - "value" : { - "annotationValue" : "Belgium", - "termSource" : "", - "termAccession" : "" + "@id": "#characteristic_category/organism_320" + }, + { + "characteristicType": { + "annotationValue": "cell_type", + "termAccession": "", + "termSource": "" }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] - } - }, { - "category" : { - "@id" : "#characteristic_category/submission_date_327" + "@id": "#characteristic_category/cell_type_321" + }, + { + "characteristicType": { + "annotationValue": "dev_stage", + "termAccession": "", + "termSource": "" }, - "value" : { - "annotationValue" : "", - "termSource" : "", - "termAccession" : "" + "@id": "#characteristic_category/dev_stage_322" + }, + { + "characteristicType": { + "annotationValue": "collection_date", + "termAccession": "", + "termSource": "" }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] - } - }, { - "category" : { - "@id" : "#characteristic_category/status_328" + "@id": "#characteristic_category/collection_date_323" + }, + { + "characteristicType": { + "annotationValue": "isolation_source", + "termAccession": "", + "termSource": "" }, - "value" : { - "annotationValue" : "", - "termSource" : "", - "termAccession" : "" + "@id": "#characteristic_category/isolation_source_324" + }, + { + "characteristicType": { + "annotationValue": "collected_by", + "termAccession": "", + "termSource": "" }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] - } - }, { - "category" : { - "@id" : "#characteristic_category/accession_329" + "@id": "#characteristic_category/collected_by_325" + }, + { + "characteristicType": { + "annotationValue": "geographic location (country and/or sea)", + "termAccession": "", + "termSource": "" }, - "value" : { - "annotationValue" : "", - "termSource" : "", - "termAccession" : "" + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_326" + }, + { + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] - } - }, { - "category" : { - "@id" : "#characteristic_category/accession" + "@id": "#characteristic_category/submission_date_327" + }, + { + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" }, - "value" : { - "annotationValue" : "SAMEA130788488" - } - } ], - "@id" : "#source/330" - } ], - "samples" : [ { - "name" : "leaf 1", - "derivesFrom" : [ { - "@id" : "#source/330" - } ], - "characteristics" : [ { - "category" : { - "@id" : "#characteristic_category/accession" + "@id": "#characteristic_category/status_328" + }, + { + "characteristicType": { + "annotationValue": "accession", + "termAccession": "", + "termSource": "" }, - "value" : { - "annotationValue" : "SAMEA130788489" + "@id": "#characteristic_category/accession_329" + } + ], + "materials": { + "sources": [ + { + "name": "plant 1", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/Title_317" + }, + "value": { + "annotationValue": "plant 1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/Description_318" + }, + "value": { + "annotationValue": "plant in the lab", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/tax_id_319" + }, + "value": { + "annotationValue": "NCBI:txid3702", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/organism_320" + }, + "value": { + "annotationValue": "Arabidopsis thaliana", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/cell_type_321" + }, + "value": { + "annotationValue": "na", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/dev_stage_322" + }, + "value": { + "annotationValue": "budding", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collection_date_323" + }, + "value": { + "annotationValue": "01/01/2022", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/isolation_source_324" + }, + "value": { + "annotationValue": "seed", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collected_by_325" + }, + "value": { + "annotationValue": "Bob", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_326" + }, + "value": { + "annotationValue": "Belgium", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_327" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_328" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_329" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession" + }, + "value": { + "annotationValue": "SAMEA130788488" + } + } + ], + "@id": "#source/330" } - } ], - "factorValues" : [ { - "category" : { - "@id" : "" - }, - "value" : { - "annotationValue" : "", - "termSource" : "", - "termAccession" : "" - }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] + ], + "samples": [ + { + "name": "leaf 1", + "derivesFrom": [ + { + "@id": "#source/330" + } + ], + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/accession" + }, + "value": { + "annotationValue": "SAMEA130788489" + } + } + ], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "@id": "#sample/331" } - } ], - "@id" : "#sample/331" - } ] - }, - "protocols" : [ { - "name" : "sample collection", - "protocolType" : { - "annotationValue" : "sample collection", - "termAccession" : "", - "termSource" : "" + ] }, - "description" : "", - "uri" : "", - "version" : "", - "parameters" : [ ], - "components" : [ { - "componentName" : "", - "componentType" : { - "annotationValue" : "", - "termSource" : "", - "termAccession" : "" - } - } ], - "@id" : "#protocol/18_10" - }, { - "name" : "nucleic acid extraction", - "protocolType" : { - "annotationValue" : "nucleic acid extraction", - "termAccession" : "", - "termSource" : "" - }, - "description" : "", - "uri" : "", - "version" : "", - "parameters" : [ ], - "components" : [ { - "componentName" : "", - "componentType" : { - "annotationValue" : "", - "termSource" : "", - "termAccession" : "" - } - } ], - "@id" : "#protocol/19_18" - }, { - "name" : "library construction", - "protocolType" : { - "annotationValue" : "library construction", - "termAccession" : "", - "termSource" : "" - }, - "description" : "", - "uri" : "", - "version" : "", - "parameters" : [ { - "parameterName" : { - "annotationValue" : "library_construction_protocol", - "termAccession" : "", - "termSource" : "" - }, - "@id" : "#parameter/349" - }, { - "parameterName" : { - "annotationValue" : "design_description", - "termAccession" : "", - "termSource" : "" - }, - "@id" : "#parameter/351" - }, { - "parameterName" : { - "annotationValue" : "library source", - "termAccession" : "", - "termSource" : "" - }, - "@id" : "#parameter/352" - }, { - "parameterName" : { - "annotationValue" : "library strategy", - "termAccession" : "", - "termSource" : "" - }, - "@id" : "#parameter/353" - }, { - "parameterName" : { - "annotationValue" : "library selection", - "termAccession" : "", - "termSource" : "" - }, - "@id" : "#parameter/354" - }, { - "parameterName" : { - "annotationValue" : "library layout", - "termAccession" : "", - "termSource" : "" - }, - "@id" : "#parameter/355" - }, { - "parameterName" : { - "annotationValue" : "insert size", - "termAccession" : "", - "termSource" : "" - }, - "@id" : "#parameter/356" - } ], - "components" : [ { - "componentName" : "", - "componentType" : { - "annotationValue" : "", - "termSource" : "", - "termAccession" : "" - } - } ], - "@id" : "#protocol/20_20" - }, { - "name" : "nucleic acid sequencing", - "protocolType" : { - "annotationValue" : "nucleic acid sequencing", - "termAccession" : "", - "termSource" : "" - }, - "description" : "", - "uri" : "", - "version" : "", - "parameters" : [ { - "parameterName" : { - "annotationValue" : "sequencing instrument", - "termAccession" : "", - "termSource" : "" - }, - "@id" : "#parameter/363" - } ], - "components" : [ { - "componentName" : "", - "componentType" : { - "annotationValue" : "", - "termSource" : "", - "termAccession" : "" - } - } ], - "@id" : "#protocol/21_21" - } ], - "processSequence" : [ { - "name" : "", - "executesProtocol" : { - "@id" : "#protocol/18_10" - }, - "parameterValues" : [ ], - "performer" : "", - "date" : "", - "previousProcess" : { }, - "nextProcess" : { }, - "inputs" : [ { - "@id" : "#source/330" - } ], - "outputs" : [ { - "@id" : "#sample/331" - } ], - "@id" : "#process/sample_collection/331" - } ], - "assays" : [ { - "filename" : "a_assays.txt", - "measurementType" : { - "annotationValue" : "", - "termSource" : "", - "termAccession" : "" - }, - "technologyType" : { - "annotationValue" : "", - "termSource" : "", - "termAccession" : "" - }, - "technologyPlatform" : "", - "characteristicCategories" : [ { - "characteristicType" : { - "annotationValue" : "Title", - "termAccession" : "", - "termSource" : "" - }, - "@id" : "#characteristic_category/Title_350" - }, { - "characteristicType" : { - "annotationValue" : "submission date", - "termAccession" : "", - "termSource" : "" - }, - "@id" : "#characteristic_category/submission_date_358" - }, { - "characteristicType" : { - "annotationValue" : "status", - "termAccession" : "", - "termSource" : "" + "protocols": [ + { + "name": "sample collection", + "protocolType": { + "annotationValue": "sample collection", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ], + "@id": "#protocol/18_10" }, - "@id" : "#characteristic_category/status_359" - }, { - "characteristicType" : { - "annotationValue" : "accession", - "termAccession" : "", - "termSource" : "" + { + "name": "nucleic acid extraction", + "protocolType": { + "annotationValue": "nucleic acid extraction", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ], + "@id": "#protocol/19_18" }, - "@id" : "#characteristic_category/accession_360" - } ], - "materials" : { - "samples" : [ { - "@id" : "#sample/331" - } ], - "otherMaterials" : [ { - "name" : "extract 1", - "type" : "Extract Name", - "characteristics" : [ ], - "derivesFrom" : [ { - "@id" : "#sample/331" - } ], - "@id" : "#other_material/332" - }, { - "name" : "library 1", - "type" : "library name", - "characteristics" : [ { - "category" : { - "@id" : "#characteristic_category/Title_350" + { + "name": "library construction", + "protocolType": { + "annotationValue": "library construction", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [ + { + "parameterName": { + "annotationValue": "library_construction_protocol", + "termAccession": "", + "termSource": "" + }, + "@id": "#parameter/349" }, - "value" : { - "annotationValue" : "library 1", - "termSource" : "", - "termAccession" : "" + { + "parameterName": { + "annotationValue": "design_description", + "termAccession": "", + "termSource": "" + }, + "@id": "#parameter/351" }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] - } - }, { - "category" : { - "@id" : "#characteristic_category/submission_date_358" + { + "parameterName": { + "annotationValue": "library source", + "termAccession": "", + "termSource": "" + }, + "@id": "#parameter/352" }, - "value" : { - "annotationValue" : "", - "termSource" : "", - "termAccession" : "" + { + "parameterName": { + "annotationValue": "library strategy", + "termAccession": "", + "termSource": "" + }, + "@id": "#parameter/353" }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] + { + "parameterName": { + "annotationValue": "library selection", + "termAccession": "", + "termSource": "" + }, + "@id": "#parameter/354" + }, + { + "parameterName": { + "annotationValue": "library layout", + "termAccession": "", + "termSource": "" + }, + "@id": "#parameter/355" + }, + { + "parameterName": { + "annotationValue": "insert size", + "termAccession": "", + "termSource": "" + }, + "@id": "#parameter/356" } - }, { - "category" : { - "@id" : "#characteristic_category/status_359" + ], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ], + "@id": "#protocol/20_20" + }, + { + "name": "nucleic acid sequencing", + "protocolType": { + "annotationValue": "nucleic acid sequencing", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [ + { + "parameterName": { + "annotationValue": "sequencing instrument", + "termAccession": "", + "termSource": "" + }, + "@id": "#parameter/363" + } + ], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ], + "@id": "#protocol/21_21" + } + ], + "processSequence": [ + { + "name": "", + "executesProtocol": { + "@id": "#protocol/18_10" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ + { + "@id": "#source/330" + } + ], + "outputs": [ + { + "@id": "#sample/331" + } + ], + "@id": "#process/sample_collection/331" + } + ], + "assays": [ + { + "filename": "a_assays.txt", + "measurementType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "technologyType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "technologyPlatform": "", + "characteristicCategories": [ + { + "characteristicType": { + "annotationValue": "Title", + "termAccession": "", + "termSource": "" + }, + "@id": "#characteristic_category/Title_350" }, - "value" : { - "annotationValue" : "", - "termSource" : "", - "termAccession" : "" + { + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" + }, + "@id": "#characteristic_category/submission_date_358" }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] + { + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" + }, + "@id": "#characteristic_category/status_359" + }, + { + "characteristicType": { + "annotationValue": "accession", + "termAccession": "", + "termSource": "" + }, + "@id": "#characteristic_category/accession_360" } - }, { - "category" : { - "@id" : "#characteristic_category/accession_360" + ], + "materials": { + "samples": [ + { + "@id": "#sample/331" + } + ], + "otherMaterials": [ + { + "name": "extract 1", + "type": "Extract Name", + "characteristics": [], + "derivesFrom": [ + { + "@id": "#sample/331" + } + ], + "@id": "#other_material/332" + }, + { + "name": "library 1", + "type": "library name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/Title_350" + }, + "value": { + "annotationValue": "library 1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_358" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_359" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_360" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#other_material/332" + } + ], + "@id": "#other_material/333" + } + ] + }, + "processSequence": [ + { + "name": "", + "executesProtocol": { + "@id": "#protocol/19_18" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/332" + }, + "nextProcess": { + "@id": "#process/library_construction/332" + }, + "inputs": [ + { + "@id": "#sample/331" + } + ], + "outputs": [ + { + "@id": "#other_material/332" + } + ], + "@id": "#process/nucleic_acid_extraction/332" }, - "value" : { - "annotationValue" : "", - "termSource" : "", - "termAccession" : "" + { + "name": "", + "executesProtocol": { + "@id": "#protocol/20_20" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/349" + }, + "value": { + "annotationValue": "lib prep", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/351" + }, + "value": { + "annotationValue": "Test", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/352" + }, + "value": { + "annotationValue": "OTHER", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/353" + }, + "value": { + "annotationValue": "OTHER", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/354" + }, + "value": { + "annotationValue": "RT-PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/355" + }, + "value": { + "annotationValue": "SINGLE", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/356" + }, + "value": { + "annotationValue": "100", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/nucleic_acid_extraction/333" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/333" + }, + "inputs": [ + { + "@id": "#other_material/332" + } + ], + "outputs": [ + { + "@id": "#other_material/333" + } + ], + "@id": "#process/library_construction/333" }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] + { + "name": "", + "executesProtocol": { + "@id": "#protocol/21_21" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/363" + }, + "value": { + "annotationValue": " MinION", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/334" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/333" + } + ], + "outputs": [ + { + "@id": "#data_file/334" + } + ], + "@id": "#process/nucleic_acid_sequencing/334" } - } ], - "derivesFrom" : [ { - "@id" : "#other_material/332" - } ], - "@id" : "#other_material/333" - } ] - }, - "processSequence" : [ { - "name" : "", - "executesProtocol" : { - "@id" : "#protocol/19_18" - }, - "parameterValues" : [ ], - "performer" : "", - "date" : "", - "previousProcess" : { - "@id" : "#process/sample_collection/332" - }, - "nextProcess" : { - "@id" : "#process/library_construction/332" - }, - "inputs" : [ { - "@id" : "#sample/331" - } ], - "outputs" : [ { - "@id" : "#other_material/332" - } ], - "@id" : "#process/nucleic_acid_extraction/332" - }, { - "name" : "", - "executesProtocol" : { - "@id" : "#protocol/20_20" - }, - "parameterValues" : [ { - "category" : { - "@id" : "#parameter/349" - }, - "value" : { - "annotationValue" : "lib prep", - "termSource" : "", - "termAccession" : "" - }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] - } - }, { - "category" : { - "@id" : "#parameter/351" - }, - "value" : { - "annotationValue" : "Test", - "termSource" : "", - "termAccession" : "" - }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] - } - }, { - "category" : { - "@id" : "#parameter/352" - }, - "value" : { - "annotationValue" : "OTHER", - "termSource" : "", - "termAccession" : "" - }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] - } - }, { - "category" : { - "@id" : "#parameter/353" - }, - "value" : { - "annotationValue" : "OTHER", - "termSource" : "", - "termAccession" : "" - }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] - } - }, { - "category" : { - "@id" : "#parameter/354" - }, - "value" : { - "annotationValue" : "RT-PCR", - "termSource" : "", - "termAccession" : "" - }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] - } - }, { - "category" : { - "@id" : "#parameter/355" - }, - "value" : { - "annotationValue" : "SINGLE", - "termSource" : "", - "termAccession" : "" - }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] - } - }, { - "category" : { - "@id" : "#parameter/356" - }, - "value" : { - "annotationValue" : "100", - "termSource" : "", - "termAccession" : "" - }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] - } - } ], - "performer" : "", - "date" : "", - "previousProcess" : { - "@id" : "#process/nucleic_acid_extraction/333" - }, - "nextProcess" : { - "@id" : "#process/nucleic_acid_sequencing/333" - }, - "inputs" : [ { - "@id" : "#other_material/332" - } ], - "outputs" : [ { - "@id" : "#other_material/333" - } ], - "@id" : "#process/library_construction/333" - }, { - "name" : "", - "executesProtocol" : { - "@id" : "#protocol/21_21" - }, - "parameterValues" : [ { - "category" : { - "@id" : "#parameter/363" - }, - "value" : { - "annotationValue" : " MinION", - "termSource" : "", - "termAccession" : "" - }, - "unit" : { - "termSource" : "", - "termAccession" : "", - "comments" : [ ] - } - } ], - "performer" : "", - "date" : "", - "previousProcess" : { - "@id" : "#process/library_construction/334" - }, - "nextProcess" : { }, - "inputs" : [ { - "@id" : "#other_material/333" - } ], - "outputs" : [ { - "@id" : "#data_file/334" - } ], - "@id" : "#process/nucleic_acid_sequencing/334" - } ], - "dataFiles" : [ { - "name" : "fake2.bam", - "type" : "Raw Data File", - "comments" : [ { - "name" : "file type", - "value" : "bam" - }, { - "name" : "file checksum", - "value" : "9840f585055afc37de353706fd31a377" - }, { - "name" : "submission date", - "value" : "" - }, { - "name" : "status", - "value" : "" - }, { - "name" : "accession", - "value" : "" - } ], - "@id" : "#data/334" - } ], - "unitCategories" : [ ], - "@id" : "#assay/18_20_21" - } ], - "factors" : [ ], - "unitCategories" : [ ] - } ] + ], + "dataFiles": [ + { + "name": "fake2.bam", + "type": "Raw Data File", + "comments": [ + { + "name": "file type", + "value": "bam" + }, + { + "name": "file checksum", + "value": "9840f585055afc37de353706fd31a377" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + }, + { + "name": "accession", + "value": "" + } + ], + "@id": "#data/334" + } + ], + "unitCategories": [], + "@id": "#assay/18_20_21" + } + ], + "factors": [], + "unitCategories": [] + } + ] } -} +} \ No newline at end of file From 4c85ac1985aeaa2fd17cec720c26c893cf3017c9 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 19 Apr 2024 15:25:00 +0200 Subject: [PATCH 33/45] Invalid material type according to the specs: https://isa-specs.readthedocs.io/en/latest/isajson.html#material-attribute-schema-json --- test-data/biosamples-input-isa.json | 2 +- test-data/biosamples-modified-isa.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test-data/biosamples-input-isa.json b/test-data/biosamples-input-isa.json index ed6a8f8..687b23b 100644 --- a/test-data/biosamples-input-isa.json +++ b/test-data/biosamples-input-isa.json @@ -744,7 +744,7 @@ { "@id":"#other_material/333", "name":"library 1", - "type":"library name", + "type": "Extract Name", "characteristics":[ { "category":{ diff --git a/test-data/biosamples-modified-isa.json b/test-data/biosamples-modified-isa.json index efe1cac..b71ad8a 100644 --- a/test-data/biosamples-modified-isa.json +++ b/test-data/biosamples-modified-isa.json @@ -709,7 +709,7 @@ }, { "name": "library 1", - "type": "library name", + "type": "Extract Name", "characteristics": [ { "category": { From 359c502c6eb57b019f9cfde743d0361cfb5d3e1a Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 19 Apr 2024 15:26:39 +0200 Subject: [PATCH 34/45] Formatting --- test-data/biosamples-input-isa.json | 1414 +++++++++++++-------------- 1 file changed, 665 insertions(+), 749 deletions(-) diff --git a/test-data/biosamples-input-isa.json b/test-data/biosamples-input-isa.json index 687b23b..bbd6121 100644 --- a/test-data/biosamples-input-isa.json +++ b/test-data/biosamples-input-isa.json @@ -1,1089 +1,1005 @@ { - "investigation":{ - "identifier":"", - "title":"Bob's investigation", - "description":"", - "submissionDate":"", - "publicReleaseDate":"", - "ontologySourceReferences":[ - - ], - "filename":"Bob's investigation.txt", - "comments":[ + "investigation": { + "identifier": "", + "title": "Bob's investigation", + "description": "", + "submissionDate": "", + "publicReleaseDate": "", + "ontologySourceReferences": [], + "filename": "Bob's investigation.txt", + "comments": [ { - "name":"ISAjson export time", - "value":"2022-11-07T08:09:59Z" + "name": "ISAjson export time", + "value": "2022-11-07T08:09:59Z" }, { - "name":"SEEK Project name", - "value":"Bob's PhD project" + "name": "SEEK Project name", + "value": "Bob's PhD project" }, { - "name":"SEEK Project ID", - "value":"http://localhost:3000/single_pages/2" + "name": "SEEK Project ID", + "value": "http://localhost:3000/single_pages/2" }, { - "name":"SEEK Investigation ID", - "value":"19" + "name": "SEEK Investigation ID", + "value": "19" } ], - "publications":[ - - ], - "people":[ + "publications": [], + "people": [ { - "@id":"#people/5", - "lastName":"Bob", - "firstName":"Bob", - "midInitials":"", - "email":"bob@testing.com", - "phone":"", - "fax":"", - "address":"", - "affiliation":"", - "roles":[ + "@id": "#people/5", + "lastName": "Bob", + "firstName": "Bob", + "midInitials": "", + "email": "bob@testing.com", + "phone": "", + "fax": "", + "address": "", + "affiliation": "", + "roles": [ { - "termAccession":"", - "termSource":"", - "annotationValue":"" + "termAccession": "", + "termSource": "", + "annotationValue": "" } ], - "comments":[ + "comments": [ { - "@id":"", - "value":"", - "name":"" + "@id": "", + "value": "", + "name": "" } ] } ], - "studies":[ + "studies": [ { - "identifier":"", - "title":"Arabidopsis thaliana", - "description":"Nucleic acid sequencing and metabolomics and proteomics of Arabidopsis thaliana in specific experimental conditions to test a specific hypothesis.\r\n", - "submissionDate":"", - "publicReleaseDate":"", - "filename":"Arabidopsis thaliana.txt", - "comments":[ + "identifier": "", + "title": "Arabidopsis thaliana", + "description": "Nucleic acid sequencing and metabolomics and proteomics of Arabidopsis thaliana in specific experimental conditions to test a specific hypothesis.\r\n", + "submissionDate": "", + "publicReleaseDate": "", + "filename": "Arabidopsis thaliana.txt", + "comments": [ { - "name":"SEEK Study ID", - "value":"10" + "name": "SEEK Study ID", + "value": "10" }, { - "name":"SEEK creation date", - "value":"2022-11-03T16:20:49Z" + "name": "SEEK creation date", + "value": "2022-11-03T16:20:49Z" } ], - "publications":[ - - ], - "people":[ + "publications": [], + "people": [ { - "@id":"#people/5", - "lastName":"Bob", - "firstName":"Bob", - "midInitials":"", - "email":"bob@testing.com", - "phone":"", - "fax":"", - "address":"", - "affiliation":"", - "roles":[ + "@id": "#people/5", + "lastName": "Bob", + "firstName": "Bob", + "midInitials": "", + "email": "bob@testing.com", + "phone": "", + "fax": "", + "address": "", + "affiliation": "", + "roles": [ { - "termAccession":"", - "termSource":"", - "annotationValue":"" + "termAccession": "", + "termSource": "", + "annotationValue": "" } ], - "comments":[ + "comments": [ { - "@id":"", - "value":"", - "name":"" + "@id": "", + "value": "", + "name": "" } ] } ], - "studyDesignDescriptors":[ - - ], - "characteristicCategories":[ + "studyDesignDescriptors": [], + "characteristicCategories": [ { - "@id":"#characteristic_category/Title_317", - "characteristicType":{ - "annotationValue":"Title", - "termAccession":"", - "termSource":"" + "@id": "#characteristic_category/Title_317", + "characteristicType": { + "annotationValue": "Title", + "termAccession": "", + "termSource": "" } }, { - "@id":"#characteristic_category/Description_318", - "characteristicType":{ - "annotationValue":"Description", - "termAccession":"", - "termSource":"" + "@id": "#characteristic_category/Description_318", + "characteristicType": { + "annotationValue": "Description", + "termAccession": "", + "termSource": "" } }, { - "@id":"#characteristic_category/tax_id_319", - "characteristicType":{ - "annotationValue":"tax_id", - "termAccession":"", - "termSource":"" + "@id": "#characteristic_category/tax_id_319", + "characteristicType": { + "annotationValue": "tax_id", + "termAccession": "", + "termSource": "" } }, { - "@id":"#characteristic_category/organism_320", - "characteristicType":{ - "annotationValue":"organism", - "termAccession":"", - "termSource":"" + "@id": "#characteristic_category/organism_320", + "characteristicType": { + "annotationValue": "organism", + "termAccession": "", + "termSource": "" } }, { - "@id":"#characteristic_category/cell_type_321", - "characteristicType":{ - "annotationValue":"cell_type", - "termAccession":"", - "termSource":"" + "@id": "#characteristic_category/cell_type_321", + "characteristicType": { + "annotationValue": "cell_type", + "termAccession": "", + "termSource": "" } }, { - "@id":"#characteristic_category/dev_stage_322", - "characteristicType":{ - "annotationValue":"dev_stage", - "termAccession":"", - "termSource":"" + "@id": "#characteristic_category/dev_stage_322", + "characteristicType": { + "annotationValue": "dev_stage", + "termAccession": "", + "termSource": "" } }, { - "@id":"#characteristic_category/collection_date_323", - "characteristicType":{ - "annotationValue":"collection_date", - "termAccession":"", - "termSource":"" + "@id": "#characteristic_category/collection_date_323", + "characteristicType": { + "annotationValue": "collection_date", + "termAccession": "", + "termSource": "" } }, { - "@id":"#characteristic_category/isolation_source_324", - "characteristicType":{ - "annotationValue":"isolation_source", - "termAccession":"", - "termSource":"" + "@id": "#characteristic_category/isolation_source_324", + "characteristicType": { + "annotationValue": "isolation_source", + "termAccession": "", + "termSource": "" } }, { - "@id":"#characteristic_category/collected_by_325", - "characteristicType":{ - "annotationValue":"collected_by", - "termAccession":"", - "termSource":"" + "@id": "#characteristic_category/collected_by_325", + "characteristicType": { + "annotationValue": "collected_by", + "termAccession": "", + "termSource": "" } }, { - "@id":"#characteristic_category/geographic_location_(country_and/or_sea)_326", - "characteristicType":{ - "annotationValue":"geographic location (country and/or sea)", - "termAccession":"", - "termSource":"" + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_326", + "characteristicType": { + "annotationValue": "geographic location (country and/or sea)", + "termAccession": "", + "termSource": "" } }, { - "@id":"#characteristic_category/submission_date_327", - "characteristicType":{ - "annotationValue":"submission date", - "termAccession":"", - "termSource":"" + "@id": "#characteristic_category/submission_date_327", + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" } }, { - "@id":"#characteristic_category/status_328", - "characteristicType":{ - "annotationValue":"status", - "termAccession":"", - "termSource":"" + "@id": "#characteristic_category/status_328", + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" } }, { - "@id":"#characteristic_category/accession_329", - "characteristicType":{ - "annotationValue":"accession", - "termAccession":"", - "termSource":"" + "@id": "#characteristic_category/accession_329", + "characteristicType": { + "annotationValue": "accession", + "termAccession": "", + "termSource": "" } } ], - "materials":{ - "sources":[ + "materials": { + "sources": [ { - "@id":"#source/330", - "name":"plant 1", - "characteristics":[ + "@id": "#source/330", + "name": "plant 1", + "characteristics": [ { - "category":{ - "@id":"#characteristic_category/Title_317" - }, - "value":{ - "annotationValue":"plant 1", - "termSource":"", - "termAccession":"" - }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "category": { + "@id": "#characteristic_category/Title_317" + }, + "value": { + "annotationValue": "plant 1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } }, { - "category":{ - "@id":"#characteristic_category/Description_318" - }, - "value":{ - "annotationValue":"plant in the lab", - "termSource":"", - "termAccession":"" - }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "category": { + "@id": "#characteristic_category/Description_318" + }, + "value": { + "annotationValue": "plant in the lab", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } }, { - "category":{ - "@id":"#characteristic_category/tax_id_319" - }, - "value":{ - "annotationValue":"NCBI:txid3702", - "termSource":"", - "termAccession":"" - }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "category": { + "@id": "#characteristic_category/tax_id_319" + }, + "value": { + "annotationValue": "NCBI:txid3702", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } }, { - "category":{ - "@id":"#characteristic_category/organism_320" - }, - "value":{ - "annotationValue":"Arabidopsis thaliana", - "termSource":"", - "termAccession":"" - }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "category": { + "@id": "#characteristic_category/organism_320" + }, + "value": { + "annotationValue": "Arabidopsis thaliana", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } }, { - "category":{ - "@id":"#characteristic_category/cell_type_321" - }, - "value":{ - "annotationValue":"na", - "termSource":"", - "termAccession":"" - }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "category": { + "@id": "#characteristic_category/cell_type_321" + }, + "value": { + "annotationValue": "na", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } }, { - "category":{ - "@id":"#characteristic_category/dev_stage_322" - }, - "value":{ - "annotationValue":"budding", - "termSource":"", - "termAccession":"" - }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "category": { + "@id": "#characteristic_category/dev_stage_322" + }, + "value": { + "annotationValue": "budding", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } }, { - "category":{ - "@id":"#characteristic_category/collection_date_323" - }, - "value":{ - "annotationValue":"01/01/2022", - "termSource":"", - "termAccession":"" - }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "category": { + "@id": "#characteristic_category/collection_date_323" + }, + "value": { + "annotationValue": "01/01/2022", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } }, { - "category":{ - "@id":"#characteristic_category/isolation_source_324" - }, - "value":{ - "annotationValue":"seed", - "termSource":"", - "termAccession":"" - }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "category": { + "@id": "#characteristic_category/isolation_source_324" + }, + "value": { + "annotationValue": "seed", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } }, { - "category":{ - "@id":"#characteristic_category/collected_by_325" - }, - "value":{ - "annotationValue":"Bob", - "termSource":"", - "termAccession":"" - }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "category": { + "@id": "#characteristic_category/collected_by_325" + }, + "value": { + "annotationValue": "Bob", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } }, { - "category":{ - "@id":"#characteristic_category/geographic_location_(country_and/or_sea)_326" - }, - "value":{ - "annotationValue":"Belgium", - "termSource":"", - "termAccession":"" - }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "category": { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_326" + }, + "value": { + "annotationValue": "Belgium", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } }, { - "category":{ - "@id":"#characteristic_category/submission_date_327" - }, - "value":{ - "annotationValue":"", - "termSource":"", - "termAccession":"" - }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "category": { + "@id": "#characteristic_category/submission_date_327" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } }, { - "category":{ - "@id":"#characteristic_category/status_328" - }, - "value":{ - "annotationValue":"", - "termSource":"", - "termAccession":"" - }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "category": { + "@id": "#characteristic_category/status_328" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } }, { - "category":{ - "@id":"#characteristic_category/accession_329" - }, - "value":{ - "annotationValue":"", - "termSource":"", - "termAccession":"" - }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "category": { + "@id": "#characteristic_category/accession_329" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } } ] } ], - "samples":[ + "samples": [ { - "@id":"#sample/331", - "name":"leaf 1", - "derivesFrom":[ + "@id": "#sample/331", + "name": "leaf 1", + "derivesFrom": [ { - "@id":"#source/330" + "@id": "#source/330" } ], - "characteristics":[ - - ], - "factorValues":[ + "characteristics": [], + "factorValues": [ { - "category":{ - "@id":"" - }, - "value":{ - "annotationValue":"", - "termSource":"", - "termAccession":"" - }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } } ] } ] }, - "protocols":[ + "protocols": [ { - "@id":"#protocol/18_10", - "name":"sample collection", - "protocolType":{ - "annotationValue":"sample collection", - "termAccession":"", - "termSource":"" + "@id": "#protocol/18_10", + "name": "sample collection", + "protocolType": { + "annotationValue": "sample collection", + "termAccession": "", + "termSource": "" }, - "description":"", - "uri":"", - "version":"", - "parameters":[ - - ], - "components":[ + "description": "", + "uri": "", + "version": "", + "parameters": [], + "components": [ { - "componentName":"", - "componentType":{ - "annotationValue":"", - "termSource":"", - "termAccession":"" + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" } } ] }, { - "@id":"#protocol/19_18", - "name":"nucleic acid extraction", - "protocolType":{ - "annotationValue":"nucleic acid extraction", - "termAccession":"", - "termSource":"" + "@id": "#protocol/19_18", + "name": "nucleic acid extraction", + "protocolType": { + "annotationValue": "nucleic acid extraction", + "termAccession": "", + "termSource": "" }, - "description":"", - "uri":"", - "version":"", - "parameters":[ - - ], - "components":[ + "description": "", + "uri": "", + "version": "", + "parameters": [], + "components": [ { - "componentName":"", - "componentType":{ - "annotationValue":"", - "termSource":"", - "termAccession":"" + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" } } ] }, { - "@id":"#protocol/20_20", - "name":"library construction", - "protocolType":{ - "annotationValue":"library construction", - "termAccession":"", - "termSource":"" + "@id": "#protocol/20_20", + "name": "library construction", + "protocolType": { + "annotationValue": "library construction", + "termAccession": "", + "termSource": "" }, - "description":"", - "uri":"", - "version":"", - "parameters":[ + "description": "", + "uri": "", + "version": "", + "parameters": [ { - "@id":"#parameter/349", - "parameterName":{ - "annotationValue":"library_construction_protocol", - "termAccession":"", - "termSource":"" + "@id": "#parameter/349", + "parameterName": { + "annotationValue": "library_construction_protocol", + "termAccession": "", + "termSource": "" } }, { - "@id":"#parameter/351", - "parameterName":{ - "annotationValue":"design_description", - "termAccession":"", - "termSource":"" + "@id": "#parameter/351", + "parameterName": { + "annotationValue": "design_description", + "termAccession": "", + "termSource": "" } }, { - "@id":"#parameter/352", - "parameterName":{ - "annotationValue":"library source", - "termAccession":"", - "termSource":"" + "@id": "#parameter/352", + "parameterName": { + "annotationValue": "library source", + "termAccession": "", + "termSource": "" } }, { - "@id":"#parameter/353", - "parameterName":{ - "annotationValue":"library strategy", - "termAccession":"", - "termSource":"" + "@id": "#parameter/353", + "parameterName": { + "annotationValue": "library strategy", + "termAccession": "", + "termSource": "" } }, { - "@id":"#parameter/354", - "parameterName":{ - "annotationValue":"library selection", - "termAccession":"", - "termSource":"" + "@id": "#parameter/354", + "parameterName": { + "annotationValue": "library selection", + "termAccession": "", + "termSource": "" } }, { - "@id":"#parameter/355", - "parameterName":{ - "annotationValue":"library layout", - "termAccession":"", - "termSource":"" + "@id": "#parameter/355", + "parameterName": { + "annotationValue": "library layout", + "termAccession": "", + "termSource": "" } }, { - "@id":"#parameter/356", - "parameterName":{ - "annotationValue":"insert size", - "termAccession":"", - "termSource":"" + "@id": "#parameter/356", + "parameterName": { + "annotationValue": "insert size", + "termAccession": "", + "termSource": "" } } ], - "components":[ + "components": [ { - "componentName":"", - "componentType":{ - "annotationValue":"", - "termSource":"", - "termAccession":"" + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" } } ] }, { - "@id":"#protocol/21_21", - "name":"nucleic acid sequencing", - "protocolType":{ - "annotationValue":"nucleic acid sequencing", - "termAccession":"", - "termSource":"" + "@id": "#protocol/21_21", + "name": "nucleic acid sequencing", + "protocolType": { + "annotationValue": "nucleic acid sequencing", + "termAccession": "", + "termSource": "" }, - "description":"", - "uri":"", - "version":"", - "parameters":[ + "description": "", + "uri": "", + "version": "", + "parameters": [ { - "@id":"#parameter/363", - "parameterName":{ - "annotationValue":"sequencing instrument", - "termAccession":"", - "termSource":"" + "@id": "#parameter/363", + "parameterName": { + "annotationValue": "sequencing instrument", + "termAccession": "", + "termSource": "" } } ], - "components":[ + "components": [ { - "componentName":"", - "componentType":{ - "annotationValue":"", - "termSource":"", - "termAccession":"" + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" } } ] } ], - "processSequence":[ + "processSequence": [ { - "@id":"#process/sample_collection/331", - "name":"", - "executesProtocol":{ - "@id":"#protocol/18_10" - }, - "parameterValues":[ - - ], - "performer":"", - "date":"", - "previousProcess":{ - + "@id": "#process/sample_collection/331", + "name": "", + "executesProtocol": { + "@id": "#protocol/18_10" }, - "nextProcess":{ - - }, - "inputs":[ + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ { - "@id":"#source/330" + "@id": "#source/330" } ], - "outputs":[ + "outputs": [ { - "@id":"#sample/331" + "@id": "#sample/331" } ] } ], - "assays":[ + "assays": [ { - "@id":"#assay/18_20_21", - "filename":"a_assays.txt", - "measurementType":{ - "annotationValue":"", - "termSource":"", - "termAccession":"" + "@id": "#assay/18_20_21", + "filename": "a_assays.txt", + "measurementType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" }, - "technologyType":{ - "annotationValue":"", - "termSource":"", - "termAccession":"" + "technologyType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" }, - "technologyPlatform":"", - "characteristicCategories":[ + "technologyPlatform": "", + "characteristicCategories": [ { - "@id":"#characteristic_category/Title_350", - "characteristicType":{ - "annotationValue":"Title", - "termAccession":"", - "termSource":"" + "@id": "#characteristic_category/Title_350", + "characteristicType": { + "annotationValue": "Title", + "termAccession": "", + "termSource": "" } }, { - "@id":"#characteristic_category/submission_date_358", - "characteristicType":{ - "annotationValue":"submission date", - "termAccession":"", - "termSource":"" + "@id": "#characteristic_category/submission_date_358", + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" } }, { - "@id":"#characteristic_category/status_359", - "characteristicType":{ - "annotationValue":"status", - "termAccession":"", - "termSource":"" + "@id": "#characteristic_category/status_359", + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" } }, { - "@id":"#characteristic_category/accession_360", - "characteristicType":{ - "annotationValue":"accession", - "termAccession":"", - "termSource":"" + "@id": "#characteristic_category/accession_360", + "characteristicType": { + "annotationValue": "accession", + "termAccession": "", + "termSource": "" } } ], - "materials":{ - "samples":[ + "materials": { + "samples": [ { - "@id":"#sample/331" + "@id": "#sample/331" } ], - "otherMaterials":[ + "otherMaterials": [ { - "@id":"#other_material/332", - "name":"extract 1", - "type":"Extract Name", - "characteristics":[ - - ], - "derivesFrom":[ + "@id": "#other_material/332", + "name": "extract 1", + "type": "Extract Name", + "characteristics": [], + "derivesFrom": [ { - "@id":"#sample/331" + "@id": "#sample/331" } ] }, { - "@id":"#other_material/333", - "name":"library 1", + "@id": "#other_material/333", + "name": "library 1", "type": "Extract Name", - "characteristics":[ + "characteristics": [ { - "category":{ - "@id":"#characteristic_category/Title_350" + "category": { + "@id": "#characteristic_category/Title_350" }, - "value":{ - "annotationValue":"library 1", - "termSource":"", - "termAccession":"" + "value": { + "annotationValue": "library 1", + "termSource": "", + "termAccession": "" }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } }, { - "category":{ - "@id":"#characteristic_category/submission_date_358" + "category": { + "@id": "#characteristic_category/submission_date_358" }, - "value":{ - "annotationValue":"", - "termSource":"", - "termAccession":"" + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } }, { - "category":{ - "@id":"#characteristic_category/status_359" + "category": { + "@id": "#characteristic_category/status_359" }, - "value":{ - "annotationValue":"", - "termSource":"", - "termAccession":"" + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } }, { - "category":{ - "@id":"#characteristic_category/accession_360" + "category": { + "@id": "#characteristic_category/accession_360" }, - "value":{ - "annotationValue":"", - "termSource":"", - "termAccession":"" + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } } ], - "derivesFrom":[ + "derivesFrom": [ { - "@id":"#other_material/332" + "@id": "#other_material/332" } ] } ] }, - "processSequence":[ + "processSequence": [ { - "@id":"#process/nucleic_acid_extraction/332", - "name":"", - "executesProtocol":{ - "@id":"#protocol/19_18" + "@id": "#process/nucleic_acid_extraction/332", + "name": "", + "executesProtocol": { + "@id": "#protocol/19_18" }, - "parameterValues":[ - - ], - "performer":"", - "date":"", - "previousProcess":{ - "@id":"#process/sample_collection/332" + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/332" }, - "nextProcess":{ - "@id":"#process/library_construction/332" + "nextProcess": { + "@id": "#process/library_construction/332" }, - "inputs":[ + "inputs": [ { - "@id":"#sample/331" + "@id": "#sample/331" } ], - "outputs":[ + "outputs": [ { - "@id":"#other_material/332" + "@id": "#other_material/332" } ] }, { - "@id":"#process/library_construction/333", - "name":"", - "executesProtocol":{ - "@id":"#protocol/20_20" + "@id": "#process/library_construction/333", + "name": "", + "executesProtocol": { + "@id": "#protocol/20_20" }, - "parameterValues":[ + "parameterValues": [ { - "category":{ - "@id":"#parameter/349" + "category": { + "@id": "#parameter/349" }, - "value":{ - "annotationValue":"lib prep", - "termSource":"", - "termAccession":"" + "value": { + "annotationValue": "lib prep", + "termSource": "", + "termAccession": "" }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } }, { - "category":{ - "@id":"#parameter/351" + "category": { + "@id": "#parameter/351" }, - "value":{ - "annotationValue":"Test", - "termSource":"", - "termAccession":"" + "value": { + "annotationValue": "Test", + "termSource": "", + "termAccession": "" }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } }, { - "category":{ - "@id":"#parameter/352" + "category": { + "@id": "#parameter/352" }, - "value":{ - "annotationValue":"OTHER", - "termSource":"", - "termAccession":"" + "value": { + "annotationValue": "OTHER", + "termSource": "", + "termAccession": "" }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } }, { - "category":{ - "@id":"#parameter/353" + "category": { + "@id": "#parameter/353" }, - "value":{ - "annotationValue":"OTHER", - "termSource":"", - "termAccession":"" + "value": { + "annotationValue": "OTHER", + "termSource": "", + "termAccession": "" }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } }, { - "category":{ - "@id":"#parameter/354" + "category": { + "@id": "#parameter/354" }, - "value":{ - "annotationValue":"RT-PCR", - "termSource":"", - "termAccession":"" + "value": { + "annotationValue": "RT-PCR", + "termSource": "", + "termAccession": "" }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } }, { - "category":{ - "@id":"#parameter/355" + "category": { + "@id": "#parameter/355" }, - "value":{ - "annotationValue":"SINGLE", - "termSource":"", - "termAccession":"" + "value": { + "annotationValue": "SINGLE", + "termSource": "", + "termAccession": "" }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } }, { - "category":{ - "@id":"#parameter/356" + "category": { + "@id": "#parameter/356" }, - "value":{ - "annotationValue":"100", - "termSource":"", - "termAccession":"" + "value": { + "annotationValue": "100", + "termSource": "", + "termAccession": "" }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } } ], - "performer":"", - "date":"", - "previousProcess":{ - "@id":"#process/nucleic_acid_extraction/333" + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/nucleic_acid_extraction/333" }, - "nextProcess":{ - "@id":"#process/nucleic_acid_sequencing/333" + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/333" }, - "inputs":[ + "inputs": [ { - "@id":"#other_material/332" + "@id": "#other_material/332" } ], - "outputs":[ + "outputs": [ { - "@id":"#other_material/333" + "@id": "#other_material/333" } ] }, { - "@id":"#process/nucleic_acid_sequencing/334", - "name":"", - "executesProtocol":{ - "@id":"#protocol/21_21" + "@id": "#process/nucleic_acid_sequencing/334", + "name": "", + "executesProtocol": { + "@id": "#protocol/21_21" }, - "parameterValues":[ + "parameterValues": [ { - "category":{ - "@id":"#parameter/363" + "category": { + "@id": "#parameter/363" }, - "value":{ - "annotationValue":" MinION", - "termSource":"", - "termAccession":"" + "value": { + "annotationValue": " MinION", + "termSource": "", + "termAccession": "" }, - "unit":{ - "termSource":"", - "termAccession":"", - "comments":[ - - ] + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] } } ], - "performer":"", - "date":"", - "previousProcess":{ - "@id":"#process/library_construction/334" - }, - "nextProcess":{ - + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/334" }, - "inputs":[ + "nextProcess": {}, + "inputs": [ { - "@id":"#other_material/333" + "@id": "#other_material/333" } ], - "outputs":[ + "outputs": [ { - "@id":"#data_file/334" + "@id": "#data_file/334" } ] } ], - "dataFiles":[ + "dataFiles": [ { - "@id":"#data/334", - "name":"fake2.bam", - "type":"Raw Data File", - "comments":[ + "@id": "#data/334", + "name": "fake2.bam", + "type": "Raw Data File", + "comments": [ { - "name":"file type", - "value":"bam" + "name": "file type", + "value": "bam" }, { - "name":"file checksum", - "value":"9840f585055afc37de353706fd31a377" + "name": "file checksum", + "value": "9840f585055afc37de353706fd31a377" }, { - "name":"submission date", - "value":"" + "name": "submission date", + "value": "" }, { - "name":"status", - "value":"" + "name": "status", + "value": "" }, { - "name":"accession", - "value":"" + "name": "accession", + "value": "" } ] } ], - "unitCategories":[ - - ] + "unitCategories": [] } ], - "factors":[ - - ], - "unitCategories":[ - - ] + "factors": [], + "unitCategories": [] } ] } -} +} \ No newline at end of file From cc88350ec5ad7eefa00e26ea2b7b61d054dbbc95 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 19 Apr 2024 16:27:15 +0200 Subject: [PATCH 35/45] move model to mars_lib --- mars-cli/mars_lib/isa_json.py | 5 ++--- mars-cli/mars_lib/{schemas => }/model.py | 0 mars-cli/mars_lib/schemas/__init__.py | 0 mars-cli/tests/test_isa_json.py | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) rename mars-cli/mars_lib/{schemas => }/model.py (100%) delete mode 100644 mars-cli/mars_lib/schemas/__init__.py diff --git a/mars-cli/mars_lib/isa_json.py b/mars-cli/mars_lib/isa_json.py index 6dfdb74..e4e9bbe 100644 --- a/mars-cli/mars_lib/isa_json.py +++ b/mars-cli/mars_lib/isa_json.py @@ -1,6 +1,6 @@ import json from typing import Dict, Union, List -from mars_lib.schemas.model import Investigation, Assay, Comment +from mars_lib.model import Investigation, Assay, Comment from pydantic import ValidationError from mars_lib.target_repo import TARGET_REPO_KEY @@ -79,6 +79,5 @@ def load_isa_json(file_path: str) -> Union[Dict[str, str], ValidationError]: with open(file_path, "r") as json_file: isa_json = json.load(json_file) - # TODO: Once we have an idea on what / how to validate, it should be added here - + # Validation of the ISA JSON return Investigation.model_validate(isa_json) diff --git a/mars-cli/mars_lib/schemas/model.py b/mars-cli/mars_lib/model.py similarity index 100% rename from mars-cli/mars_lib/schemas/model.py rename to mars-cli/mars_lib/model.py diff --git a/mars-cli/mars_lib/schemas/__init__.py b/mars-cli/mars_lib/schemas/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/mars-cli/tests/test_isa_json.py b/mars-cli/tests/test_isa_json.py index a45a55e..15210aa 100644 --- a/mars-cli/tests/test_isa_json.py +++ b/mars-cli/tests/test_isa_json.py @@ -5,7 +5,7 @@ from mars_lib.target_repo import TargetRepository, TARGET_REPO_KEY import pytest from pydantic import ValidationError -from mars_lib.schemas.model import Data, Material, Assay, Person +from mars_lib.model import Data, Material, Assay, Person def test_load_isa_json(): From e97220ad84c0de2dea78e10f4f9b42819b37d308 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 19 Apr 2024 16:40:16 +0200 Subject: [PATCH 36/45] Add validation command to CLI --- mars-cli/mars_cli.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/mars-cli/mars_cli.py b/mars-cli/mars_cli.py index cc5193d..8c4ce4c 100644 --- a/mars-cli/mars_cli.py +++ b/mars-cli/mars_cli.py @@ -3,10 +3,12 @@ import pathlib from configparser import ConfigParser from mars_lib.target_repo import TargetRepository +from mars_lib.model import Investigation, IsaJson from logging.handlers import RotatingFileHandler import requests import sys import os +import json # Load CLI configuration home_dir = ( @@ -155,5 +157,30 @@ def health_check(ctx): print_and_log(f"Biosamples ({biosamples_url}) is healthy.") +@cli.command() +@click.argument( + "isa_json_file", + type=click.Path(exists=True), +) +@click.option( + "--investigation-is-root", + default=False, + type=click.BOOL, + help="Boolean indicating if the investigation is the root of the ISA JSON. Set this to True if the ISA-JSON does not contain a 'investigation' field.", +) +def validate_isa_json(isa_json_file, investigation_is_root): + """Validate the ISA JSON file.""" + print_and_log(f"Validating {isa_json_file}.") + + with open(isa_json_file) as f: + json_data = json.load(f) + + if investigation_is_root: + investigation = Investigation.model_validate(json_data) + else: + investigation = IsaJson.model_validate(json_data).investigation + + print_and_log(f"ISA JSON with investigation '{investigation.title}' is valid.") + if __name__ == "__main__": cli() From 48cd972c066c7b25c42742577b7c2756eb82b3be Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 19 Apr 2024 16:46:20 +0200 Subject: [PATCH 37/45] Remove phone nr validator --- mars-cli/mars_lib/model.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/mars-cli/mars_lib/model.py b/mars-cli/mars_lib/model.py index 45951b1..133dc95 100644 --- a/mars-cli/mars_lib/model.py +++ b/mars-cli/mars_lib/model.py @@ -233,12 +233,6 @@ class Person(IsaBase): phone: Optional[str] = Field(default=None) roles: Optional[List[OntologyAnnotation]] = Field(default=[]) - @field_validator("phone", "fax") - def check_numbers(cls, v): - if not (re.match(r"^\+\d{1,3}\d{4,}$", v) or v == ""): - raise ValueError("Invalid number format") - return v - class Publication(IsaBase): authorList: Optional[str] = Field(default=None) From 52445f0669b3c81c5b3f1961a43b1b9ac3ba2977 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Fri, 19 Apr 2024 17:09:59 +0200 Subject: [PATCH 38/45] Fix for ARC --- mars-cli/mars_lib/model.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mars-cli/mars_lib/model.py b/mars-cli/mars_lib/model.py index 133dc95..d798663 100644 --- a/mars-cli/mars_lib/model.py +++ b/mars-cli/mars_lib/model.py @@ -64,7 +64,7 @@ class OntologyAnnotation(IsaBase): class MaterialAttributeValue(IsaBase): id: Optional[str] = Field(alias="@id", default=None) - category: Optional[OntologyAnnotation] = Field(default=None) + category: Optional[MaterialAttribute] = Field(default=None) value: Union[ Optional[OntologyAnnotation], Optional[str], Optional[float], Optional[int] ] = Field(default=None) @@ -146,6 +146,7 @@ class Protocol(IsaBase): class MaterialTypeEnum(str, Enum): EXTRACT_NAME = "Extract Name" LABELED_EXTRACT_NAME = "Labeled Extract Name" + LIBRARY_NAME = "library name" # TODO: QUESTION: This is not mentioned in the specs (https://isa-specs.readthedocs.io/en/latest/isajson.html#material-schema-json) but was found in DataHub ISA-JSON and ARC ISA-JSON. class Material(IsaBase): From e57ce823b56f092c091d0e2dbc4464f251afc221 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Mon, 22 Apr 2024 15:51:24 +0200 Subject: [PATCH 39/45] Add option for ISA JSON's when investigation is root --- mars-cli/mars_cli.py | 21 ++++++++++++++++++++- mars-cli/mars_lib/isa_json.py | 15 ++++++++++----- mars-cli/mars_lib/model.py | 3 +-- 3 files changed, 31 insertions(+), 8 deletions(-) diff --git a/mars-cli/mars_cli.py b/mars-cli/mars_cli.py index 8c4ce4c..41e4c06 100644 --- a/mars-cli/mars_cli.py +++ b/mars-cli/mars_cli.py @@ -4,6 +4,7 @@ from configparser import ConfigParser from mars_lib.target_repo import TargetRepository from mars_lib.model import Investigation, IsaJson +from mars_lib.isa_json import load_isa_json from logging.handlers import RotatingFileHandler import requests import sys @@ -93,9 +94,26 @@ def cli(ctx, development): default=True, help="Submit to Metabolights.", ) -def submit(credentials_file, isa_json_file, submit_to_ena, submit_to_metabolights): +@click.option( + "--investigation-is-root", + default=False, + type=click.BOOL, + help="Boolean indicating if the investigation is the root of the ISA JSON. Set this to True if the ISA-JSON does not contain a 'investigation' field.", +) +def submit( + credentials_file, + isa_json_file, + submit_to_ena, + submit_to_metabolights, + investigation_is_root, +): """Start a submission to the target repositories.""" target_repositories = ["biosamples"] + + investigation = load_isa_json(isa_json_file, investigation_is_root) + + print_and_log(f"ISA JSON with investigation '{investigation.title}' is valid.") + if submit_to_ena: target_repositories.append(TargetRepository.ENA) @@ -182,5 +200,6 @@ def validate_isa_json(isa_json_file, investigation_is_root): print_and_log(f"ISA JSON with investigation '{investigation.title}' is valid.") + if __name__ == "__main__": cli() diff --git a/mars-cli/mars_lib/isa_json.py b/mars-cli/mars_lib/isa_json.py index e4e9bbe..af53897 100644 --- a/mars-cli/mars_lib/isa_json.py +++ b/mars-cli/mars_lib/isa_json.py @@ -1,6 +1,6 @@ import json -from typing import Dict, Union, List -from mars_lib.model import Investigation, Assay, Comment +from typing import Union, List +from mars_lib.model import Investigation, Assay, Comment, IsaJson from pydantic import ValidationError from mars_lib.target_repo import TARGET_REPO_KEY @@ -66,12 +66,15 @@ def is_assay_for_target_repo(assay: Assay, target_repo: str) -> bool: return False -def load_isa_json(file_path: str) -> Union[Dict[str, str], ValidationError]: +def load_isa_json( + file_path: str, investigation_is_root: bool +) -> Union[Investigation, ValidationError]: """ Reads the file and validates it as a valid ISA JSON. Args: file_path (str): Path to ISA JSON as string. + investigation_is_root (bool): Boolean indicating if the investigation is the root of the ISA JSON. Set this to True if the ISA-JSON does not contain a 'investigation' field. Returns: Union[Dict[str, str], ValidationError]: Depending on the validation, returns a filtered ISA JSON or a pydantic validation error. @@ -79,5 +82,7 @@ def load_isa_json(file_path: str) -> Union[Dict[str, str], ValidationError]: with open(file_path, "r") as json_file: isa_json = json.load(json_file) - # Validation of the ISA JSON - return Investigation.model_validate(isa_json) + if investigation_is_root: + return Investigation.model_validate(isa_json) + else: + return IsaJson.model_validate(isa_json).investigation diff --git a/mars-cli/mars_lib/model.py b/mars-cli/mars_lib/model.py index d798663..9149224 100644 --- a/mars-cli/mars_lib/model.py +++ b/mars-cli/mars_lib/model.py @@ -1,7 +1,6 @@ from __future__ import annotations from enum import Enum -import re from typing import List, Optional, Union from pydantic import BaseModel, Field, field_validator, ConfigDict @@ -146,7 +145,7 @@ class Protocol(IsaBase): class MaterialTypeEnum(str, Enum): EXTRACT_NAME = "Extract Name" LABELED_EXTRACT_NAME = "Labeled Extract Name" - LIBRARY_NAME = "library name" # TODO: QUESTION: This is not mentioned in the specs (https://isa-specs.readthedocs.io/en/latest/isajson.html#material-schema-json) but was found in DataHub ISA-JSON and ARC ISA-JSON. + LIBRARY_NAME = "library name" # TODO: QUESTION: This is not mentioned in the specs (https://isa-specs.readthedocs.io/en/latest/isajson.html#material-schema-json) but was found in DataHub ISA-JSON and ARC ISA-JSON. class Material(IsaBase): From eb5fb0b5c2c8227cebb95a3edf30790e7300e4dd Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Mon, 22 Apr 2024 15:51:56 +0200 Subject: [PATCH 40/45] Update tests --- .../tests/fixtures/invalid_investigation.json | 12 +++++++++++ .../fixtures/minimal_valid_investigation.json | 12 +++++++++++ mars-cli/tests/test_isa_json.py | 21 +++++++++++++------ 3 files changed, 39 insertions(+), 6 deletions(-) create mode 100644 mars-cli/tests/fixtures/invalid_investigation.json create mode 100644 mars-cli/tests/fixtures/minimal_valid_investigation.json diff --git a/mars-cli/tests/fixtures/invalid_investigation.json b/mars-cli/tests/fixtures/invalid_investigation.json new file mode 100644 index 0000000..db9f7dd --- /dev/null +++ b/mars-cli/tests/fixtures/invalid_investigation.json @@ -0,0 +1,12 @@ +{ + "comments": [], + "description": "This is a minimal test investigation for testing purposes", + "identifier": "INV test 001", + "ontologySourceReferences": [], + "people": [], + "publicReleaseDate": "", + "publications": [], + "studies": "study 1", + "submissionDate": "", + "title": "test investigation 001" +} \ No newline at end of file diff --git a/mars-cli/tests/fixtures/minimal_valid_investigation.json b/mars-cli/tests/fixtures/minimal_valid_investigation.json new file mode 100644 index 0000000..9f04b19 --- /dev/null +++ b/mars-cli/tests/fixtures/minimal_valid_investigation.json @@ -0,0 +1,12 @@ +{ + "comments": [], + "description": "This is a minimal test investigation for testing purposes", + "identifier": "INV test 001", + "ontologySourceReferences": [], + "people": [], + "publicReleaseDate": "", + "publications": [], + "studies": [], + "submissionDate": "", + "title": "test investigation 001" +} \ No newline at end of file diff --git a/mars-cli/tests/test_isa_json.py b/mars-cli/tests/test_isa_json.py index 15210aa..730629c 100644 --- a/mars-cli/tests/test_isa_json.py +++ b/mars-cli/tests/test_isa_json.py @@ -9,17 +9,26 @@ def test_load_isa_json(): - # Should test the validation process - valid_isa_json = load_isa_json("../test-data/ISA-BH2023-ALL/isa-bh2023-all.json") - assert len(valid_isa_json.studies) == 1 - assert valid_isa_json.studies[0].identifier == "BH2023" + # Should test the validation process of the ISA JSON file where root level = investigation. + valid_isa_json01 = load_isa_json( + "../test-data/ISA-BH2023-ALL/isa-bh2023-all.json", True + ) + assert len(valid_isa_json01.studies) == 1 + assert valid_isa_json01.studies[0].identifier == "BH2023" + + # Should test the validation process of the ISA JSON file where root has 'investigation' as key. + valid_isa_json02 = load_isa_json("../test-data/biosamples-input-isa.json", False) + assert len(valid_isa_json02.studies) == 1 + assert valid_isa_json02.studies[0].title == "Arabidopsis thaliana" with pytest.raises(ValidationError): - load_isa_json("./tests/fixtures/invalid_investigation.json") + load_isa_json("./tests/fixtures/invalid_investigation.json", True) def test_reduce_isa_json_for_target_repo(): - good_isa_json = load_isa_json("../test-data/ISA-BH2023-ALL/isa-bh2023-all.json") + good_isa_json = load_isa_json( + "../test-data/ISA-BH2023-ALL/isa-bh2023-all.json", True + ) filtered_isa_json = reduce_isa_json_for_target_repo( good_isa_json, TargetRepository.ENA From d2238c07a01f63a1256ceccb635ac18923619222 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Mon, 22 Apr 2024 15:52:18 +0200 Subject: [PATCH 41/45] Update README --- mars-cli/README.md | 156 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 155 insertions(+), 1 deletion(-) diff --git a/mars-cli/README.md b/mars-cli/README.md index 8163409..e8c55ff 100644 --- a/mars-cli/README.md +++ b/mars-cli/README.md @@ -1,9 +1,11 @@ # Installing the mars-cli +This installation procedure describes a typical Linux installation. This application can perfectly work on Windows and MacOS but some of the steps might be different. + Installing the mars-cli from source: ```sh -cd mars-cli # Assuming you are in the root folder +cd mars-cli # Assuming you are in the root folder of this project pip install . ``` @@ -18,6 +20,20 @@ If you want to overwrite the `settings.ini` file when reinstalling, you need to ```sh OVERWRITE_SETTINGS=True pip install .[test] ``` +Installing the MARS-cli, will by default create a `.mars` directory in the home directory to store settings and log files. +If you wish to create the `.mars` directory in another place, you must specify the `MARS_SETTINGS_DIR` variable and set it to the desired path: + +```sh +export MARS_SETTINGS_DIR= +``` + +If you want to make it permanent, you can run to following commands in the terminal. +Note: replace `.bashrc` by the config file of your shell. + +```sh +echo '# Add MARS setting directory to PATH' >> $HOME/.bashrc +echo 'export MARS_SETTINGS_DIR=' >> $HOME/.bashrc +``` Once installed, the CLI application will be available from the terminal. @@ -51,6 +67,144 @@ __log_max_size__: The maximum size in kB for the log file. By default the maximu __log_max_files__: The maximum number of old log files to keep. By default, this is set to 5 +## Target respoistory settings + +Each of the target repositories have a set of settings: + +- development-url: URL to the development server when performing a health-check +- development-submission-url: URL to the development server when performing a submission +- production-url: URL to the production server when performing a health-check +- production-submission-url: URL to the production server when performing a submissionW + +# Using the MARS-CLI + +If you wish to use a different location for the `.mars' folder: + +```sh +export MARS_SETTINGS_DIR= +mars-cli [options] ARGUMENT +``` + +## Help + +The mars-cli's help text can be found from the command line as such: + +```sh +mars-cli --help +``` + +Output: + +``` +➜ mars-cli --help +Usage: mars-cli [OPTIONS] COMMAND [ARGS]... + +Options: + -d, --development Boolean indicating the usage of the development + environment of the target repositories. If not present, + the production instances will be used. + --help Show this message and exit. + +Commands: + health-check Check the health of the target repositories. + submit Start a submission to the target repositories. + validate-isa-json Validate the ISA JSON file. +``` + +or for a specific command: + +```sh +mars-cli submit --help +``` + +Output: + +``` +➜ mars-cli submit --help +############# Welcome to the MARS CLI. ############# +Running in Production environment +Usage: mars-cli submit [OPTIONS] CREDENTIALS_FILE ISA_JSON_FILE + + Start a submission to the target repositories. + +Options: + --submit-to-ena BOOLEAN Submit to ENA. + --submit-to-metabolights BOOLEAN + Submit to Metabolights. + --investigation-is-root BOOLEAN + Boolean indicating if the investigation is + the root of the ISA JSON. Set this to True + if the ISA-JSON does not contain a + 'investigation' field. + --help Show this message and exit. +``` + +## Development + +By default the mars-CLI will try to submit the ISA-JSON's metadata towards the repositories' production servers. Passing the development flag will run it in development mode and substitute the production servers with the development servers. + +## Health check repository services + +You can check whether the supported repositories are healthy, prior to submission, by doing a health-check. + +```sh +mars-cli health-check +``` + +Output: + +``` +➜ mars-cli health-check +############# Welcome to the MARS CLI. ############# +Running in Production environment +Checking the health of the target repositories. +Checking production instances. +Webin (https://www.ebi.ac.uk/ena/submit/webin/auth) is healthy. +ENA (https://www.ebi.ac.uk/ena/submit/webin-v2/) is healthy. +Biosamples (https://www.ebi.ac.uk/biosamples/samples/) is healthy. +``` + +## Submitting to repository services + +TODO + +### Options + +- `--submit-to-ena`: By default set to `True`. Will try submit ISA-JSON metadata towards ENA. Setting it to `False` will skip sending the ISA-JSON's metadata to ENA. + +```sh +mars-cli submit --submit-to-ena False my-credentials my-isa-json.json +``` + +- `--submit-to-metabolights`: By default set to `True`. Will try submit ISA-JSON metadata towards Metabolights. Setting it to `False` will skip sending the ISA-JSON's metadata to Metabolights. + +```sh +mars-cli submit --submit-to-metabolights False my-credentials my-isa-json.json +``` + +`--investigation-is-root`: By default this flag is set to false, maening the ISA-JSON should have the `investigation` key at the root level. In case the root level __IS__ the investigation (`investigation` level is omitted), you need set the flag `--investigation-is-root` to `True` in order to validate the ISA-JSON. + +```sh +mars-cli submit --investigation-is-root True my-credentials my-isa-json.json +``` + +## Validation of the ISA JSON + +You can perform a syntactic validation of the ISA-JSON, without submitting to the target repositories. + +__Note:__ This does not take validation into account from the repository's side. This does not guarantee successful submission. + +```sh +mars-cli validate-isa-json --investigation-is-root True ../test-data/biosamples-input-isa.json +``` + +### Options + +`--investigation-is-root`: By default this flag is set to false, maening the ISA-JSON should have the `investigation` key at the root level. In case the root level __IS__ the investigation (`investigation` level is omitted), you need set the flag `--investigation-is-root` to `True` in order to validate the ISA-JSON. + +```sh +mars-cli validate-isa-json my-isa-investigation.json +``` # Extending BioSamples' records The Python script ``biosamples-externalReferences.py`` defines a class BiosamplesRecord for managing biosample records. This class is designed to interact with the BioSamples database, allowing operations like fetching, updating, and extending biosample records. From b98759351629382b7df71cf79c9619a62935c237 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Mon, 22 Apr 2024 16:00:49 +0200 Subject: [PATCH 42/45] Fix failing tests --- mars-cli/.coveragerc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mars-cli/.coveragerc b/mars-cli/.coveragerc index a0ce032..2c3ba72 100644 --- a/mars-cli/.coveragerc +++ b/mars-cli/.coveragerc @@ -1,2 +1,2 @@ [run] -omit = mars_lib/**__init__.py, mars_lib/submit.py, mars_lib/credential.py +omit = mars_lib/__init__.py, mars_lib/submit.py, mars_lib/credential.py From cd6a77ef97a83076ae45f33f373ae3479b00af8e Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer Date: Mon, 22 Apr 2024 16:02:45 +0200 Subject: [PATCH 43/45] Add pydantic to requirements --- mars-cli/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mars-cli/requirements.txt b/mars-cli/requirements.txt index bd4c238..ec5fd76 100644 --- a/mars-cli/requirements.txt +++ b/mars-cli/requirements.txt @@ -1,3 +1,4 @@ requests jsonschema -keyring \ No newline at end of file +keyring +pydantic \ No newline at end of file From 919fc1dc856bcf81130ab46e2c9a85da1c2c9f43 Mon Sep 17 00:00:00 2001 From: Kevin De Pelseneer <82407142+kdp-cloud@users.noreply.github.com> Date: Wed, 24 Apr 2024 13:36:21 +0200 Subject: [PATCH 44/45] Fix typo Co-authored-by: April Shen --- mars-cli/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mars-cli/README.md b/mars-cli/README.md index e8c55ff..1b079cf 100644 --- a/mars-cli/README.md +++ b/mars-cli/README.md @@ -67,7 +67,7 @@ __log_max_size__: The maximum size in kB for the log file. By default the maximu __log_max_files__: The maximum number of old log files to keep. By default, this is set to 5 -## Target respoistory settings +## Target repository settings Each of the target repositories have a set of settings: From f2861a81e1942fa4d6a8389a007d4ca751317f70 Mon Sep 17 00:00:00 2001 From: bedroesb Date: Tue, 7 May 2024 11:28:39 +0200 Subject: [PATCH 45/45] add l click --- mars-cli/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mars-cli/requirements.txt b/mars-cli/requirements.txt index ec5fd76..b2f2f14 100644 --- a/mars-cli/requirements.txt +++ b/mars-cli/requirements.txt @@ -1,4 +1,5 @@ requests jsonschema keyring -pydantic \ No newline at end of file +pydantic +click