Skip to content

Commit

Permalink
feat: checked otput flag
Browse files Browse the repository at this point in the history
  • Loading branch information
dmartmillan committed Jul 1, 2022
1 parent 5c22641 commit 34109a2
Show file tree
Hide file tree
Showing 18 changed files with 115 additions and 76 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
:warning: **Package under developing**: Can be subjected to any critic change!

# OpenVariant
![PyPI](https://img.shields.io/pypi/v/open-variant)
[![License](https://img.shields.io/github/license/bbglab/openvariant)](https://opensource.org/licenses/BSD-3-Clause)
[![PyPI](https://img.shields.io/pypi/v/open-variant)](https://pypi.org/project/open-variant/)


## Install

Expand Down
9 changes: 5 additions & 4 deletions annotation_example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,6 @@ annotation:
function: 'lambda x: "{}".format(x.lower())'
regex: '(*.)'

- type: 'plugin'
plugin: 'alteration_type'
field: 'ALT_TYPE'

- type: 'mapping'
field: 'MUTATION_REF'
fieldSource:
Expand All @@ -56,6 +52,11 @@ annotation:
fileMapping: 'metadata_mutation.tsv'
fieldValue: 'REFERENCE'

- type: 'plugin'
plugin: 'alteration_type'
field: 'ALT_TYPE'


exclude:
- field: 'DATASET'
value: 'laml'
Expand Down
2 changes: 1 addition & 1 deletion annotation_template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ annotation: # Columns to parse
fileMapping: string # File name to make the mapping; required
fieldValue: string # Field of the final output value; required

# Apply plugin in the columns described on the `fieldSource` attribute of input files.
# Apply plugin transformation in each row of the input file.
- type: 'plugin'
plugin: string # Plugin to apply, could be internal, located into 'plugin' folder, or customized by the user
field: string
Expand Down
26 changes: 21 additions & 5 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,23 @@
import os
from os import getcwd

from openvariant.commands.tasks.groupby import group_by
from openvariant import find_files, Annotation, Variant

for g, v, _ in group_by(f'{os.getcwd()}/tests/data/dataset/', f'{os.getcwd()}/tests/data/task_test.yaml',
None, key_by='DATASET', where="PROJECT >= \"SAMPLE1\"", quite=True):
print(g, len(v))
# where = "VAR != 4 AND (VAR != 5 OR VAR != 10)"
# where_clauses = parse_where(where)
# print(where_clauses)
# print(skip({"VAR": 4}, where_clauses))

# print(and_connector("VAR != 4 ", "VAR != 5"))

#res = count(f'{getcwd()}/tests/data/dataset/', f'{getcwd()}/tests/data/task_test.yaml',
# where="DATASET != 'acc'", quite=True)
#print(res)


#for file, annotation in find_files(f"{getcwd()}/tests/data/dataset/"):

file = "./indexes.tsv"
annotation = Annotation("./metadata.yaml")
result = Variant(file, annotation)
for line in result.read():
print(f"Line in a dict: {line}")
8 changes: 8 additions & 0 deletions openvariant/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
import pkg_resources

from openvariant.annotation.annotation import Annotation
from openvariant.tasks import cat, count, group_by
from openvariant.variant import Variant
from openvariant.find_files import find_files

version = pkg_resources.require("open-variant")[0].version
__version__ = version

__all__ = ['Annotation', 'Variant', 'cat', 'count', 'group_by', 'find_files']

15 changes: 7 additions & 8 deletions openvariant/annotation/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,12 @@ def _read_annotation_file(self) -> dict:
logging.error(exc)
stream.close()

def _check_columns(self) -> None:
"""Check if columns exists as annotation fields"""
for col in self._columns:
if col not in self._annotations:
raise KeyError(f"'{col}' column unable to find.")

def __init__(self, annotation_path: str) -> None:
"""
Inits Annotation with annotation file path.
Expand All @@ -137,6 +143,7 @@ def __init__(self, annotation_path: str) -> None:

self._path = annotation_path
raw_annotation = self._read_annotation_file()

_check_general_keys(raw_annotation)
for annot in raw_annotation.get(AnnotationGeneralKeys.ANNOTATION.value, []):
_check_annotation_keys(annot)
Expand All @@ -145,11 +152,9 @@ def __init__(self, annotation_path: str) -> None:
self._patterns = patterns if isinstance(patterns, List) else [patterns]
self._recursive = raw_annotation.get(AnnotationGeneralKeys.RECURSIVE.value, True)
self._delimiter = raw_annotation.get(AnnotationGeneralKeys.DELIMITER.value, DEFAULT_DELIMITER).upper()

self._format = raw_annotation.get(AnnotationGeneralKeys.FORMAT.value, DEFAULT_FORMAT).replace('.', '')

self._excludes: dict = {}

for k in raw_annotation.get(AnnotationGeneralKeys.EXCLUDE.value, []):
key_exclude = k[AnnotationKeys.FIELD.value]
value_exclude = k[AnnotationKeys.VALUE.value]
Expand All @@ -167,12 +172,6 @@ def __init__(self, annotation_path: str) -> None:
self._columns = raw_annotation.get(AnnotationGeneralKeys.COLUMNS.value, list(self.annotations.keys()))
self._check_columns()

def _check_columns(self) -> None:
"""Check if columns exists as annotation fields"""
for col in self._columns:
if col not in self._annotations:
raise KeyError(f"'{col}' column unable to find.")

@property
def path(self) -> str:
"""str: path where annotation file is located"""
Expand Down
2 changes: 1 addition & 1 deletion openvariant/annotation/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def _internal_builder(x: dict, base_path: str = None) -> InternalBuilder:

return AnnotationTypes.INTERNAL.name, x[AnnotationKeys.FIELD_SOURCE.value], Builder("(lambda y: y)") \
if AnnotationKeys.FUNCTION.value not in x or x[AnnotationKeys.FUNCTION.value] is None or \
len(x[AnnotationKeys.FUNCTION.value]) == 2 else Builder(x[AnnotationKeys.FUNCTION.value]), value
len(x[AnnotationKeys.FUNCTION.value]) == 2 else Builder(x[AnnotationKeys.FUNCTION.value]), value


def _dirname_builder(x: dict, base_path: str = None) -> DirnameBuilder:
Expand Down
6 changes: 3 additions & 3 deletions openvariant/annotation/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def _filename_process(x: FilenameBuilder, original_header: List = [] or None, fi
"""
try:
if isdir(file_path):
raise FileNotFoundError('Unable to find a filename')
raise FileNotFoundError('Unable to find_files a filename')

func_result = x[1](basename(file_path))
value = x[2].findall(func_result)[0]
Expand Down Expand Up @@ -131,7 +131,7 @@ def _dirname_process(x: DirnameBuilder, original_header: List = [] or None, file
"""
try:
if isdir(file_path):
raise FileNotFoundError('Unable to find a dirname')
raise FileNotFoundError('Unable to find_files a dirname')

func_result = x[1](basename(dirname(abspath(file_path))))
value = x[2].findall(func_result)[0]
Expand Down Expand Up @@ -179,7 +179,7 @@ def _mapping_process(x: MappingBuilder, original_header: List = [] or None, file
if value is None:
raise KeyError(f'Unable to map {x[1]} sources on mapping annotation')
"""
return AnnotationTypes.MAPPING.name, x, str #value if value is not None else float('nan'), str
return AnnotationTypes.MAPPING.name, x, str


def _plugin_process(x: PluginBuilder, original_header: List = [] or None, file_path: str = None,
Expand Down
78 changes: 45 additions & 33 deletions openvariant/commands/openvar.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,85 +16,97 @@ def openvar():
pass


@openvar.command(name="cat", short_help='Concatenate files to standard input')
@openvar.command(name="cat", short_help='Concatenate parsed files to standard output.')
@click.argument('input_path', type=click.Path(exists=True), default='.')
@click.option('--where', '-w', type=click.STRING, default=None, help="Filter expression. eg: CHROMOSOME == 4")
@click.option('--annotations', '-a', type=click.Path(exists=True), default=None)
@click.option('--header', help="Show the result header", is_flag=True)
@click.option('--output', '-o', help="File to write the output.", default=None)
@click.option('--where', '-w', type=click.STRING, default=None, help="Condition expression. eg: CHROMOSOME == 4")
@click.option('--annotations', '-a', type=click.Path(exists=True), default=None,
help="Annotation path. eg: /path/annotation.yaml")
@click.option('--header', is_flag=True, help="Show the result header.")
@click.option('--output', '-o', default=None, help="File to write the output.")
def cat(input_path: str, where: str or None, annotations: str or None, header: bool, output: str or None):
"""Print the parsed files on the stdout/"output"."""
cat_task(input_path, annotations, where, header, output)


@openvar.command(name="count", short_help='Number of rows that matches a specified criterion')
@openvar.command(name="count", short_help='Number of rows that matches a specified criterion.')
@click.argument('input_path', type=click.Path(exists=True), default='.')
@click.option('--where', '-w', multiple=False, type=click.STRING, help="Filter expression. eg: CHROMOSOME == 4")
@click.option('--group_by', '-g', type=click.STRING, help="Filter expression. eg: CHROMOSOME")
@click.option('--annotations', '-a', default=None, type=click.Path(exists=True))
@click.option('--cores', '-c', help='Maximum processes to run in parallel.', type=click.INT, default=cpu_count())
@click.option('--quite', '-q', help="Don't show the progress, only the total count.", is_flag=True)
@click.option('--output', '-o', help="File to write the output.", default=None)
def count(input_path: str, where: str, group_by: str, cores: int, quite: bool, annotations: str or None, output:str or None) -> None:
@click.option('--where', '-w', multiple=False, type=click.STRING, help="Condition expression. eg: CHROMOSOME == 4")
@click.option('--group_by', '-g', type=click.STRING, help="Key to group rows. eg: COUNTRY")
@click.option('--annotations', '-a', default=None, type=click.Path(exists=True),
help="Annotation path. eg: /path/annotation.yaml")
@click.option('--cores', '-c', type=click.INT, default=cpu_count(), help='Maximum processes to run in parallel.')
@click.option('--quite', '-q', is_flag=True, help="Don't show the progress.")
@click.option('--output', '-o', default=None, help="File to write the output.")
def count(input_path: str, where: str, group_by: str, cores: int, quite: bool, annotations: str or None,
output: str or None) -> None:
"""Print on the stdout/"output" the number of rows that meets the criteria."""
result = count_task(input_path, annotations, group_by=group_by, where=where, cores=cores, quite=quite)
out_file = None
if output:
out_file = open(output, "w")
if len(result[1]) > 0:
for k, v in sorted(result[1].items(), key=lambda res: res[1]):
if output:
out_file.write("{}\t{}\n".format(k, v))
else: print("{}\t{}".format(k, v))

else:
print("{}\t{}".format(k, v))

if output:
out_file.write("TOTAL\t{}\n".format(result[0]))
else: print("TOTAL\t{}".format(result[0]))

if output: out_file.close()

else:
print("TOTAL\t{}".format(result[0]))

if output:
out_file.close()


@openvar.command(name="groupby", short_help='Groups rows that have the same values into summary rows')
@openvar.command(name="groupby", short_help='Group the parsed result for each different value of the specified key.')
@click.argument('input_path', type=click.Path(exists=True), default='.')
@click.option('--header', help='Send header as first row', is_flag=True)
@click.option('--show', help='Show group by each row', is_flag=True)
@click.option('--group_by', '-g', type=click.STRING, default=None, help="Filter expression. eg: CHROMOSOME")
@click.option('--header', is_flag=True, help="Show the result header.")
@click.option('--show', is_flag=True, help='Show group by each row.')
@click.option('--where', '-w', type=click.STRING, default=None, help="Filter expression. eg: CHROMOSOME == 4")
@click.option('--group_by', '-g', type=click.STRING, default=None, help="Key to group rows. eg: COUNTRY")
@click.option('--script', '-s', type=click.STRING, default=None,
help="Filter expression. eg: gzip > \${GROUP_KEY}.parsed.tsv.gz")
@click.option('--annotations', '-a', default=None, type=click.Path(exists=True))
@click.option('--cores', '-c', help='Maximum processes to run in parallel.', type=click.INT, default=cpu_count())
@click.option('--quite', '-q', help="Don't show the progress, only the total count.", is_flag=True)
@click.option('--annotations', '-a', default=None, type=click.Path(exists=True),
help="Annotation path. eg: /path/annotation.yaml")
@click.option('--cores', '-c', type=click.INT, default=cpu_count(), help='Maximum processes to run in parallel.')
@click.option('--quite', '-q', is_flag=True, help="Don't show the progress.")
@click.option('--output', '-o', help="File to write the output.", default=None)
def groupby(input_path: str, script: str, where: str, group_by: str, cores: int, quite: bool, annotations: str or None,
header: bool, show: bool, output: str or None):
"""Print on the stdout/"output" the parsed files group by a specified field."""
out_file = None
if output:
out_file = open(output, 'w')
for group_key, group_result, command in group_by_task(input_path, annotations, script, key_by=group_by, where=where,
cores=cores, quite=quite, header=header):
cores=cores, quite=quite, header=header):
for r in group_result:
if command:
if output:
out_file.write(f"{group_key}\t{r}\n") if show else out_file.write(f"{r}\n")
else: print(f"{group_key}\t{r}") if show else print(f"{r}")
else:
print(f"{group_key}\t{r}") if show else print(f"{r}")
else:
if header:
if output:
out_file.write(f"{r}\n")
else: print(f"{r}")
else:
print(f"{r}")
header = False
else:
if output:
out_file.write(f"{group_key}\t{r}\n") if show else out_file.write(f"{r}\n")
else: print(f"{group_key}\t{r}") if show else print(f"{r}")
else:
print(f"{group_key}\t{r}") if show else print(f"{r}")
if output:
out_file.close()

@openvar.command(name="plugin", short_help='Actions to execute for a plugin: create')

@openvar.command(name="plugin", short_help='Actions to execute for a plugin: create.')
@click.argument('action', type=click.Choice(['create']))
@click.option('--name', '-n', type=click.STRING)
@click.option('--directory', '-d', type=click.STRING)
@click.option('--name', '-n', type=click.STRING, help="Name of the plugin.")
@click.option('--directory', '-d', type=click.STRING, help="Directory to reach the plugin.")
def plugin(action, name: str or None, directory: str or None):
"""Actions to apply on the plugin system."""
PluginActions[action.upper()].value(name, directory)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from .find_files import find_files

__all__ = [
'find_files'
]
__all__ = ['find_files']
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import re
from fnmatch import fnmatch
from os import listdir
from os.path import isfile, join, isdir
from os.path import isfile, join, isdir, basename, dirname
from typing import Generator

from openvariant.annotation.annotation import Annotation
Expand Down Expand Up @@ -37,7 +37,11 @@ def _get_annotation(file_path, annotation):
def _find_files(base_path: str, annotation: Annotation or None, fix: bool) -> Generator[str, Annotation, None]:
"""Recursive exploration from a base path"""
if not fix:
for annotation_file in glob.iglob(join(base_path, "*.{}".format(ANNOTATION_EXTENSION))):
if isfile(base_path):
annotation_path = dirname(base_path)
else:
annotation_path = base_path
for annotation_file in glob.iglob(join(annotation_path, "*.{}".format(ANNOTATION_EXTENSION))):
annotation = Annotation(annotation_file)

if isdir(base_path):
Expand Down
7 changes: 1 addition & 6 deletions openvariant/plugins/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,4 @@
from .get_AF import Get_afPlugin, Get_afContext
from .alteration_type import Alteration_typePlugin, Alteration_typeContext

__all__ = [
'Plugin', 'Context',

'Get_afPlugin', 'Get_afContext',
'Alteration_typePlugin', 'Alteration_typeContext'
]
__all__ = ['Plugin', 'Context', 'Get_afPlugin', 'Get_afContext', 'Alteration_typePlugin', 'Alteration_typeContext']
2 changes: 1 addition & 1 deletion openvariant/plugins/alteration_type/alteration_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,5 +104,5 @@ def run(self, context: Alteration_typeContext) -> str:

row[context.field_name] = alt_type
else:
raise ValueError("Unable to find 'REF', 'ALT' or 'POSITION' values in the row.")
raise ValueError("Unable to find_files 'REF', 'ALT' or 'POSITION' values in the row.")
return row[context.field_name]
2 changes: 1 addition & 1 deletion openvariant/tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
from .count import count
from .groupby import group_by

__all__ = ['cat', 'count', 'group_by']
__all__ = ['cat', 'count', 'group_by']
12 changes: 8 additions & 4 deletions openvariant/tasks/cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import List

from openvariant.annotation.config_annotation import AnnotationFormat
from openvariant.find.find_files import find_files
from openvariant.find_files.find_files import find_files
from openvariant.variant.variant import Variant


Expand All @@ -32,6 +32,7 @@ def cat(base_path: str, annotation_path: str or None = None, where: str = None,
header_show : bool
Shows header on the output.
"""
out_file = None
if output:
out_file = open(output, "w")
for file, annotation in find_files(base_path, annotation_path):
Expand All @@ -41,12 +42,15 @@ def cat(base_path: str, annotation_path: str or None = None, where: str = None,
if output:
out_file.write(_format_line(header, result.annotation.format))
out_file.write("\n")
else: print(_format_line(header, result.annotation.format))
else:
print(_format_line(header, result.annotation.format))
header_show = False
for i, r in enumerate(result.read(where=where)):
if isinstance(r, dict):
if output:
out_file.write(_format_line(list(map(str, r.values())), result.annotation.format))
out_file.write("\n")
else: print(_format_line(list(map(str, r.values())), result.annotation.format))
if output: out_file.close()
else:
print(_format_line(list(map(str, r.values())), result.annotation.format))
if output:
out_file.close()
2 changes: 1 addition & 1 deletion openvariant/tasks/count.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from tqdm import tqdm

from openvariant.annotation.annotation import Annotation
from openvariant.find.find_files import find_files
from openvariant.find_files.find_files import find_files

from openvariant.variant.variant import Variant

Expand Down
Loading

0 comments on commit 34109a2

Please sign in to comment.