Skip to content

Commit

Permalink
add reviewer sugestions #258
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel-VM committed Apr 12, 2024
1 parent bdbb1d3 commit 1f28585
Show file tree
Hide file tree
Showing 4 changed files with 192 additions and 47 deletions.
1 change: 0 additions & 1 deletion relecov_tools/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import relecov_tools.map_schema
import relecov_tools.feed_database
import relecov_tools.read_bioinfo_metadata
import relecov_tools.long_table_parse
import relecov_tools.metadata_homogeneizer
import relecov_tools.gisaid_upload
import relecov_tools.upload_ena_protocol
Expand Down
75 changes: 60 additions & 15 deletions relecov_tools/assets/pipeline_utils/viralrecon.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ class LongTableParse:
- saving_file(generated_JSON)
- parsing_csv() : It manages all this proccess:
- calling first to parse_a_list_of_dictionaries() and then calling to saving_file()
"""

def __init__(self, file_path=None, output_directory=None):
Expand Down Expand Up @@ -136,17 +135,32 @@ def convert_to_json(self, samp_dict):
j_list = []
# Grab date from filename
result_regex = re.search(
"variants_long_table_(.*).csv", os.path.basename(self.file_path)
"variants_long_table(?:_\d{8})?\.csv", os.path.basename(self.file_path)
)
stderr.print(result_regex.group(0))
if result_regex is None:
log.error("Analysis date not found in filename, aborting")
stderr.print(
"[red]Error: filename must include analysis date in format YYYYMMDD"
"[red]\tWARN: Couldn't find variants long table file. Expected file name is:"
)
stderr.print(
"[red]\t\t- variants_long_table.csv or variants_long_table_YYYYMMDD.csv. Aborting..."
)
stderr.print("[red]e.g. variants_long_table_20220830.csv")
sys.exit(1)
else:
date_regex = re.search(r"(\d{8})", result_regex.group())
if date_regex is not None:
analysis_date = date_regex
stderr.print(
f"[green]\tDate {analysis_date.group()} found in {self.file_path}"
)
else:
analysis_date = "Not Provided [GENEPIO:0001668]"
stderr.print(
f"[yellow]\tWARN:No analysis date found in long table: {self.file_path}"
)

for key, values in samp_dict.items():
j_dict = {"sample_name": key, "analysis_date": result_regex.group(1)}
j_dict = {"sample_name": key, "analysis_date": analysis_date.group()}
j_dict["variants"] = values
j_list.append(j_dict)
return j_list
Expand Down Expand Up @@ -182,12 +196,20 @@ def parsing_csv(self):

# START util functions
def handle_pangolin_data(files_list):
"""File handler to parse pangolin data (csv) into JSON structured format."""
"""File handler to parse pangolin data (csv) into JSON structured format.
Args:
files_list (list): A list with paths to pangolin files.
Returns:
pango_data_processed: A dictionary containing pangolin data handled.
"""
method_name = f"{handle_pangolin_data.__name__}"
method_log_report = BioinfoReportLog()

# Handling pangolin data
pango_data_processed = {}
valid_samples = []
try:
files_list_processed = relecov_tools.utils.select_most_recent_files_per_sample(
files_list
Expand All @@ -208,23 +230,39 @@ def handle_pangolin_data(files_list):
key.split()[0]: value for key, value in pango_data.items()
}
pango_data_processed.update(pango_data_updated)
method_log_report.update_log_report(
method_name, "valid", f"Successfully handled data in {pango_file}."
)
valid_samples.append(pango_data_key.split()[0])
except (FileNotFoundError, IndexError) as e:
method_log_report.update_log_report(
method_name, "error", f"Error processing file {pango_file}: {e}"
method_name,
"warning",
f"Error occurred while processing file {pango_file}: {e}",
)
sys.exit(method_log_report.print_log_report(method_name, ["error"]))
continue
except Exception as e:
method_log_report.update_log_report(
method_name, "error", f"Error occurred while processing files: {e}"
method_name, "warning", f"Error occurred while processing files: {e}"
)
if len(valid_samples) > 0:
method_log_report.update_log_report(
method_name,
"valid",
f"Successfully handled data in samples: {', '.join(valid_samples)}",
)
sys.exit(method_log_report.print_log_report(method_name, ["error"]))
method_log_report.print_log_report(method_name, ["valid", "warning"])
return pango_data_processed


def parse_long_table(files_list):
"""File handler to retrieve data from long table files and convert it into a JSON structured format.
This function utilizes the LongTableParse class to parse the long table data.
Since this utility handles and maps data using a custom way, it returns None to be avoid being transferred to method read_bioinfo_metadata.BioinfoMetadata.mapping_over_table().
Args:
files_list (list): A list of paths to long table files.
Returns:
None: Indicates that the function does not return any meaningful value.
"""
method_name = f"{parse_long_table.__name__}"
method_log_report = BioinfoReportLog()

Expand All @@ -250,7 +288,14 @@ def parse_long_table(files_list):


def handle_consensus_fasta(files_list):
"""File handler to parse consensus fasta data (*.consensus.fa) into JSON structured format"""
"""File handler to parse consensus data (fasta) into JSON structured format.
Args:
files_list (list): A list with paths to condensus files.
Returns:
consensus_data_processed: A dictionary containing consensus data handled.
"""
method_name = f"{handle_consensus_fasta.__name__}"
method_log_report = BioinfoReportLog()

Expand Down
4 changes: 2 additions & 2 deletions relecov_tools/conf/bioinfo_config.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"viralrecon": {
"mapping_stats": {
"fn": "mapping_illumina_stats.tab",
"fn": "mapping_illumina_stats(?:_\\d{8})?\\.tab",
"sample_col_idx": 5,
"header_row_idx": 1,
"required": true,
Expand Down Expand Up @@ -37,7 +37,7 @@
}
},
"variants_long_table": {
"fn": "variants_long_table(?:_\\d{8})?\\.csv$",
"fn": "variants_long_table(?:_\\d{8})?\\.csv",
"sample_col_idx": 1,
"header_row_idx": 1,
"required": true,
Expand Down
Loading

0 comments on commit 1f28585

Please sign in to comment.