diff --git a/README.md b/README.md index 44a516e..ed5804d 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,9 @@ :warning: **Package under developing**: Can be subjected to any critic change! # OpenVariant -![PyPI](https://img.shields.io/pypi/v/open-variant) +[![License](https://img.shields.io/github/license/bbglab/openvariant)](https://opensource.org/licenses/BSD-3-Clause) +[![PyPI](https://img.shields.io/pypi/v/open-variant)](https://pypi.org/project/open-variant/) + ## Install diff --git a/annotation_example.yaml b/annotation_example.yaml index 9fcef5e..7681562 100644 --- a/annotation_example.yaml +++ b/annotation_example.yaml @@ -42,10 +42,6 @@ annotation: function: 'lambda x: "{}".format(x.lower())' regex: '(*.)' - - type: 'plugin' - plugin: 'alteration_type' - field: 'ALT_TYPE' - - type: 'mapping' field: 'MUTATION_REF' fieldSource: @@ -56,6 +52,11 @@ annotation: fileMapping: 'metadata_mutation.tsv' fieldValue: 'REFERENCE' + - type: 'plugin' + plugin: 'alteration_type' + field: 'ALT_TYPE' + + exclude: - field: 'DATASET' value: 'laml' diff --git a/annotation_template.yaml b/annotation_template.yaml index b9408b0..9a53d36 100644 --- a/annotation_template.yaml +++ b/annotation_template.yaml @@ -54,7 +54,7 @@ annotation: # Columns to parse fileMapping: string # File name to make the mapping; required fieldValue: string # Field of the final output value; required - # Apply plugin in the columns described on the `fieldSource` attribute of input files. + # Apply plugin transformation in each row of the input file. - type: 'plugin' plugin: string # Plugin to apply, could be internal, located into 'plugin' folder, or customized by the user field: string diff --git a/main.py b/main.py index 5ad5876..07b10df 100644 --- a/main.py +++ b/main.py @@ -1,7 +1,23 @@ -import os +from os import getcwd -from openvariant.commands.tasks.groupby import group_by +from openvariant import find_files, Annotation, Variant -for g, v, _ in group_by(f'{os.getcwd()}/tests/data/dataset/', f'{os.getcwd()}/tests/data/task_test.yaml', - None, key_by='DATASET', where="PROJECT >= \"SAMPLE1\"", quite=True): - print(g, len(v)) +# where = "VAR != 4 AND (VAR != 5 OR VAR != 10)" +# where_clauses = parse_where(where) +# print(where_clauses) +# print(skip({"VAR": 4}, where_clauses)) + +# print(and_connector("VAR != 4 ", "VAR != 5")) + +#res = count(f'{getcwd()}/tests/data/dataset/', f'{getcwd()}/tests/data/task_test.yaml', +# where="DATASET != 'acc'", quite=True) +#print(res) + + +#for file, annotation in find_files(f"{getcwd()}/tests/data/dataset/"): + +file = "./indexes.tsv" +annotation = Annotation("./metadata.yaml") +result = Variant(file, annotation) +for line in result.read(): + print(f"Line in a dict: {line}") diff --git a/openvariant/__init__.py b/openvariant/__init__.py index a80b1c7..2eac563 100644 --- a/openvariant/__init__.py +++ b/openvariant/__init__.py @@ -1,4 +1,12 @@ import pkg_resources + +from openvariant.annotation.annotation import Annotation +from openvariant.tasks import cat, count, group_by +from openvariant.variant import Variant +from openvariant.find_files import find_files + version = pkg_resources.require("open-variant")[0].version __version__ = version +__all__ = ['Annotation', 'Variant', 'cat', 'count', 'group_by', 'find_files'] + diff --git a/openvariant/annotation/annotation.py b/openvariant/annotation/annotation.py index 8737df0..b998433 100644 --- a/openvariant/annotation/annotation.py +++ b/openvariant/annotation/annotation.py @@ -125,6 +125,12 @@ def _read_annotation_file(self) -> dict: logging.error(exc) stream.close() + def _check_columns(self) -> None: + """Check if columns exists as annotation fields""" + for col in self._columns: + if col not in self._annotations: + raise KeyError(f"'{col}' column unable to find.") + def __init__(self, annotation_path: str) -> None: """ Inits Annotation with annotation file path. @@ -137,6 +143,7 @@ def __init__(self, annotation_path: str) -> None: self._path = annotation_path raw_annotation = self._read_annotation_file() + _check_general_keys(raw_annotation) for annot in raw_annotation.get(AnnotationGeneralKeys.ANNOTATION.value, []): _check_annotation_keys(annot) @@ -145,11 +152,9 @@ def __init__(self, annotation_path: str) -> None: self._patterns = patterns if isinstance(patterns, List) else [patterns] self._recursive = raw_annotation.get(AnnotationGeneralKeys.RECURSIVE.value, True) self._delimiter = raw_annotation.get(AnnotationGeneralKeys.DELIMITER.value, DEFAULT_DELIMITER).upper() - self._format = raw_annotation.get(AnnotationGeneralKeys.FORMAT.value, DEFAULT_FORMAT).replace('.', '') self._excludes: dict = {} - for k in raw_annotation.get(AnnotationGeneralKeys.EXCLUDE.value, []): key_exclude = k[AnnotationKeys.FIELD.value] value_exclude = k[AnnotationKeys.VALUE.value] @@ -167,12 +172,6 @@ def __init__(self, annotation_path: str) -> None: self._columns = raw_annotation.get(AnnotationGeneralKeys.COLUMNS.value, list(self.annotations.keys())) self._check_columns() - def _check_columns(self) -> None: - """Check if columns exists as annotation fields""" - for col in self._columns: - if col not in self._annotations: - raise KeyError(f"'{col}' column unable to find.") - @property def path(self) -> str: """str: path where annotation file is located""" diff --git a/openvariant/annotation/builder.py b/openvariant/annotation/builder.py index fc40928..e98e30f 100644 --- a/openvariant/annotation/builder.py +++ b/openvariant/annotation/builder.py @@ -113,7 +113,7 @@ def _internal_builder(x: dict, base_path: str = None) -> InternalBuilder: return AnnotationTypes.INTERNAL.name, x[AnnotationKeys.FIELD_SOURCE.value], Builder("(lambda y: y)") \ if AnnotationKeys.FUNCTION.value not in x or x[AnnotationKeys.FUNCTION.value] is None or \ - len(x[AnnotationKeys.FUNCTION.value]) == 2 else Builder(x[AnnotationKeys.FUNCTION.value]), value + len(x[AnnotationKeys.FUNCTION.value]) == 2 else Builder(x[AnnotationKeys.FUNCTION.value]), value def _dirname_builder(x: dict, base_path: str = None) -> DirnameBuilder: diff --git a/openvariant/annotation/process.py b/openvariant/annotation/process.py index 7abd597..a2ea628 100644 --- a/openvariant/annotation/process.py +++ b/openvariant/annotation/process.py @@ -95,7 +95,7 @@ def _filename_process(x: FilenameBuilder, original_header: List = [] or None, fi """ try: if isdir(file_path): - raise FileNotFoundError('Unable to find a filename') + raise FileNotFoundError('Unable to find_files a filename') func_result = x[1](basename(file_path)) value = x[2].findall(func_result)[0] @@ -131,7 +131,7 @@ def _dirname_process(x: DirnameBuilder, original_header: List = [] or None, file """ try: if isdir(file_path): - raise FileNotFoundError('Unable to find a dirname') + raise FileNotFoundError('Unable to find_files a dirname') func_result = x[1](basename(dirname(abspath(file_path)))) value = x[2].findall(func_result)[0] @@ -179,7 +179,7 @@ def _mapping_process(x: MappingBuilder, original_header: List = [] or None, file if value is None: raise KeyError(f'Unable to map {x[1]} sources on mapping annotation') """ - return AnnotationTypes.MAPPING.name, x, str #value if value is not None else float('nan'), str + return AnnotationTypes.MAPPING.name, x, str def _plugin_process(x: PluginBuilder, original_header: List = [] or None, file_path: str = None, diff --git a/openvariant/commands/openvar.py b/openvariant/commands/openvar.py index b266cf5..cb998d5 100644 --- a/openvariant/commands/openvar.py +++ b/openvariant/commands/openvar.py @@ -16,85 +16,97 @@ def openvar(): pass -@openvar.command(name="cat", short_help='Concatenate files to standard input') +@openvar.command(name="cat", short_help='Concatenate parsed files to standard output.') @click.argument('input_path', type=click.Path(exists=True), default='.') -@click.option('--where', '-w', type=click.STRING, default=None, help="Filter expression. eg: CHROMOSOME == 4") -@click.option('--annotations', '-a', type=click.Path(exists=True), default=None) -@click.option('--header', help="Show the result header", is_flag=True) -@click.option('--output', '-o', help="File to write the output.", default=None) +@click.option('--where', '-w', type=click.STRING, default=None, help="Condition expression. eg: CHROMOSOME == 4") +@click.option('--annotations', '-a', type=click.Path(exists=True), default=None, + help="Annotation path. eg: /path/annotation.yaml") +@click.option('--header', is_flag=True, help="Show the result header.") +@click.option('--output', '-o', default=None, help="File to write the output.") def cat(input_path: str, where: str or None, annotations: str or None, header: bool, output: str or None): """Print the parsed files on the stdout/"output".""" cat_task(input_path, annotations, where, header, output) -@openvar.command(name="count", short_help='Number of rows that matches a specified criterion') +@openvar.command(name="count", short_help='Number of rows that matches a specified criterion.') @click.argument('input_path', type=click.Path(exists=True), default='.') -@click.option('--where', '-w', multiple=False, type=click.STRING, help="Filter expression. eg: CHROMOSOME == 4") -@click.option('--group_by', '-g', type=click.STRING, help="Filter expression. eg: CHROMOSOME") -@click.option('--annotations', '-a', default=None, type=click.Path(exists=True)) -@click.option('--cores', '-c', help='Maximum processes to run in parallel.', type=click.INT, default=cpu_count()) -@click.option('--quite', '-q', help="Don't show the progress, only the total count.", is_flag=True) -@click.option('--output', '-o', help="File to write the output.", default=None) -def count(input_path: str, where: str, group_by: str, cores: int, quite: bool, annotations: str or None, output:str or None) -> None: +@click.option('--where', '-w', multiple=False, type=click.STRING, help="Condition expression. eg: CHROMOSOME == 4") +@click.option('--group_by', '-g', type=click.STRING, help="Key to group rows. eg: COUNTRY") +@click.option('--annotations', '-a', default=None, type=click.Path(exists=True), + help="Annotation path. eg: /path/annotation.yaml") +@click.option('--cores', '-c', type=click.INT, default=cpu_count(), help='Maximum processes to run in parallel.') +@click.option('--quite', '-q', is_flag=True, help="Don't show the progress.") +@click.option('--output', '-o', default=None, help="File to write the output.") +def count(input_path: str, where: str, group_by: str, cores: int, quite: bool, annotations: str or None, + output: str or None) -> None: """Print on the stdout/"output" the number of rows that meets the criteria.""" result = count_task(input_path, annotations, group_by=group_by, where=where, cores=cores, quite=quite) + out_file = None if output: out_file = open(output, "w") if len(result[1]) > 0: for k, v in sorted(result[1].items(), key=lambda res: res[1]): if output: out_file.write("{}\t{}\n".format(k, v)) - else: print("{}\t{}".format(k, v)) - + else: + print("{}\t{}".format(k, v)) + if output: out_file.write("TOTAL\t{}\n".format(result[0])) - else: print("TOTAL\t{}".format(result[0])) - - if output: out_file.close() - + else: + print("TOTAL\t{}".format(result[0])) + + if output: + out_file.close() -@openvar.command(name="groupby", short_help='Groups rows that have the same values into summary rows') +@openvar.command(name="groupby", short_help='Group the parsed result for each different value of the specified key.') @click.argument('input_path', type=click.Path(exists=True), default='.') -@click.option('--header', help='Send header as first row', is_flag=True) -@click.option('--show', help='Show group by each row', is_flag=True) -@click.option('--group_by', '-g', type=click.STRING, default=None, help="Filter expression. eg: CHROMOSOME") +@click.option('--header', is_flag=True, help="Show the result header.") +@click.option('--show', is_flag=True, help='Show group by each row.') @click.option('--where', '-w', type=click.STRING, default=None, help="Filter expression. eg: CHROMOSOME == 4") +@click.option('--group_by', '-g', type=click.STRING, default=None, help="Key to group rows. eg: COUNTRY") @click.option('--script', '-s', type=click.STRING, default=None, help="Filter expression. eg: gzip > \${GROUP_KEY}.parsed.tsv.gz") -@click.option('--annotations', '-a', default=None, type=click.Path(exists=True)) -@click.option('--cores', '-c', help='Maximum processes to run in parallel.', type=click.INT, default=cpu_count()) -@click.option('--quite', '-q', help="Don't show the progress, only the total count.", is_flag=True) +@click.option('--annotations', '-a', default=None, type=click.Path(exists=True), + help="Annotation path. eg: /path/annotation.yaml") +@click.option('--cores', '-c', type=click.INT, default=cpu_count(), help='Maximum processes to run in parallel.') +@click.option('--quite', '-q', is_flag=True, help="Don't show the progress.") @click.option('--output', '-o', help="File to write the output.", default=None) def groupby(input_path: str, script: str, where: str, group_by: str, cores: int, quite: bool, annotations: str or None, header: bool, show: bool, output: str or None): """Print on the stdout/"output" the parsed files group by a specified field.""" + out_file = None if output: out_file = open(output, 'w') for group_key, group_result, command in group_by_task(input_path, annotations, script, key_by=group_by, where=where, - cores=cores, quite=quite, header=header): + cores=cores, quite=quite, header=header): for r in group_result: if command: if output: out_file.write(f"{group_key}\t{r}\n") if show else out_file.write(f"{r}\n") - else: print(f"{group_key}\t{r}") if show else print(f"{r}") + else: + print(f"{group_key}\t{r}") if show else print(f"{r}") else: if header: if output: out_file.write(f"{r}\n") - else: print(f"{r}") + else: + print(f"{r}") header = False else: if output: out_file.write(f"{group_key}\t{r}\n") if show else out_file.write(f"{r}\n") - else: print(f"{group_key}\t{r}") if show else print(f"{r}") + else: + print(f"{group_key}\t{r}") if show else print(f"{r}") if output: out_file.close() -@openvar.command(name="plugin", short_help='Actions to execute for a plugin: create') + +@openvar.command(name="plugin", short_help='Actions to execute for a plugin: create.') @click.argument('action', type=click.Choice(['create'])) -@click.option('--name', '-n', type=click.STRING) -@click.option('--directory', '-d', type=click.STRING) +@click.option('--name', '-n', type=click.STRING, help="Name of the plugin.") +@click.option('--directory', '-d', type=click.STRING, help="Directory to reach the plugin.") def plugin(action, name: str or None, directory: str or None): """Actions to apply on the plugin system.""" PluginActions[action.upper()].value(name, directory) diff --git a/openvariant/find/__init__.py b/openvariant/find_files/__init__.py similarity index 53% rename from openvariant/find/__init__.py rename to openvariant/find_files/__init__.py index 554082b..aef5f66 100644 --- a/openvariant/find/__init__.py +++ b/openvariant/find_files/__init__.py @@ -1,5 +1,3 @@ from .find_files import find_files -__all__ = [ - 'find_files' -] +__all__ = ['find_files'] diff --git a/openvariant/find/find_files.py b/openvariant/find_files/find_files.py similarity index 89% rename from openvariant/find/find_files.py rename to openvariant/find_files/find_files.py index 538ee06..fd8b2d0 100644 --- a/openvariant/find/find_files.py +++ b/openvariant/find_files/find_files.py @@ -7,7 +7,7 @@ import re from fnmatch import fnmatch from os import listdir -from os.path import isfile, join, isdir +from os.path import isfile, join, isdir, basename, dirname from typing import Generator from openvariant.annotation.annotation import Annotation @@ -37,7 +37,11 @@ def _get_annotation(file_path, annotation): def _find_files(base_path: str, annotation: Annotation or None, fix: bool) -> Generator[str, Annotation, None]: """Recursive exploration from a base path""" if not fix: - for annotation_file in glob.iglob(join(base_path, "*.{}".format(ANNOTATION_EXTENSION))): + if isfile(base_path): + annotation_path = dirname(base_path) + else: + annotation_path = base_path + for annotation_file in glob.iglob(join(annotation_path, "*.{}".format(ANNOTATION_EXTENSION))): annotation = Annotation(annotation_file) if isdir(base_path): diff --git a/openvariant/plugins/__init__.py b/openvariant/plugins/__init__.py index 87d02ae..e7776b5 100644 --- a/openvariant/plugins/__init__.py +++ b/openvariant/plugins/__init__.py @@ -3,9 +3,4 @@ from .get_AF import Get_afPlugin, Get_afContext from .alteration_type import Alteration_typePlugin, Alteration_typeContext -__all__ = [ - 'Plugin', 'Context', - - 'Get_afPlugin', 'Get_afContext', - 'Alteration_typePlugin', 'Alteration_typeContext' -] +__all__ = ['Plugin', 'Context', 'Get_afPlugin', 'Get_afContext', 'Alteration_typePlugin', 'Alteration_typeContext'] diff --git a/openvariant/plugins/alteration_type/alteration_type.py b/openvariant/plugins/alteration_type/alteration_type.py index 7fcf5a4..4007f84 100644 --- a/openvariant/plugins/alteration_type/alteration_type.py +++ b/openvariant/plugins/alteration_type/alteration_type.py @@ -104,5 +104,5 @@ def run(self, context: Alteration_typeContext) -> str: row[context.field_name] = alt_type else: - raise ValueError("Unable to find 'REF', 'ALT' or 'POSITION' values in the row.") + raise ValueError("Unable to find_files 'REF', 'ALT' or 'POSITION' values in the row.") return row[context.field_name] diff --git a/openvariant/tasks/__init__.py b/openvariant/tasks/__init__.py index ded2e24..fd5e980 100644 --- a/openvariant/tasks/__init__.py +++ b/openvariant/tasks/__init__.py @@ -2,4 +2,4 @@ from .count import count from .groupby import group_by -__all__ = ['cat', 'count', 'group_by'] +__all__ = ['cat', 'count', 'group_by'] \ No newline at end of file diff --git a/openvariant/tasks/cat.py b/openvariant/tasks/cat.py index a692d1e..02d9a7e 100644 --- a/openvariant/tasks/cat.py +++ b/openvariant/tasks/cat.py @@ -6,7 +6,7 @@ from typing import List from openvariant.annotation.config_annotation import AnnotationFormat -from openvariant.find.find_files import find_files +from openvariant.find_files.find_files import find_files from openvariant.variant.variant import Variant @@ -32,6 +32,7 @@ def cat(base_path: str, annotation_path: str or None = None, where: str = None, header_show : bool Shows header on the output. """ + out_file = None if output: out_file = open(output, "w") for file, annotation in find_files(base_path, annotation_path): @@ -41,12 +42,15 @@ def cat(base_path: str, annotation_path: str or None = None, where: str = None, if output: out_file.write(_format_line(header, result.annotation.format)) out_file.write("\n") - else: print(_format_line(header, result.annotation.format)) + else: + print(_format_line(header, result.annotation.format)) header_show = False for i, r in enumerate(result.read(where=where)): if isinstance(r, dict): if output: out_file.write(_format_line(list(map(str, r.values())), result.annotation.format)) out_file.write("\n") - else: print(_format_line(list(map(str, r.values())), result.annotation.format)) - if output: out_file.close() + else: + print(_format_line(list(map(str, r.values())), result.annotation.format)) + if output: + out_file.close() diff --git a/openvariant/tasks/count.py b/openvariant/tasks/count.py index 2137f66..0bd3998 100644 --- a/openvariant/tasks/count.py +++ b/openvariant/tasks/count.py @@ -11,7 +11,7 @@ from tqdm import tqdm from openvariant.annotation.annotation import Annotation -from openvariant.find.find_files import find_files +from openvariant.find_files.find_files import find_files from openvariant.variant.variant import Variant diff --git a/openvariant/tasks/groupby.py b/openvariant/tasks/groupby.py index 5911ab2..bdfa9f9 100644 --- a/openvariant/tasks/groupby.py +++ b/openvariant/tasks/groupby.py @@ -13,7 +13,7 @@ from tqdm import tqdm from openvariant.annotation.annotation import Annotation -from openvariant.find.find_files import find_files +from openvariant.find_files.find_files import find_files from openvariant.variant.variant import Variant diff --git a/tests/test_find/test_find.py b/tests/test_find/test_find.py index 451b2a3..b27c901 100644 --- a/tests/test_find/test_find.py +++ b/tests/test_find/test_find.py @@ -2,7 +2,7 @@ import unittest from typing import List -from openvariant.find.find_files import find_files +from openvariant.find_files.find_files import find_files class TestFind(unittest.TestCase):