From 7b8a4917e05b5c465d33bd9839766c4c470cce3d Mon Sep 17 00:00:00 2001 From: Tianhao-Gu Date: Fri, 15 Mar 2024 13:38:15 -0500 Subject: [PATCH 1/5] pass program threads to tools --- src/loaders/compute_tools/eggnog/eggnog.py | 4 ++-- src/loaders/compute_tools/eggnog/versions.yaml | 6 ++++++ src/loaders/compute_tools/mash/mash.py | 2 ++ src/loaders/compute_tools/mash/versions.yaml | 6 +++++- src/loaders/compute_tools/microtrait/microtrait.py | 8 +++++++- src/loaders/compute_tools/microtrait/versions.yaml | 6 +++++- src/loaders/compute_tools/tool_common.py | 1 + 7 files changed, 28 insertions(+), 5 deletions(-) diff --git a/src/loaders/compute_tools/eggnog/eggnog.py b/src/loaders/compute_tools/eggnog/eggnog.py index c278a2809..9ae48ae4b 100644 --- a/src/loaders/compute_tools/eggnog/eggnog.py +++ b/src/loaders/compute_tools/eggnog/eggnog.py @@ -12,7 +12,6 @@ from src.loaders.compute_tools.tool_common import ToolRunner, run_command INPUT_TYPE = 'proteins' -THREADS = 8 def _run_eggnog_single( @@ -20,6 +19,7 @@ def _run_eggnog_single( data_id: str, source_file: Path, output_dir: Path, + program_threads: int, debug: bool) -> None: metadata_file = output_dir / EGGNOG_METADATA @@ -33,7 +33,7 @@ def _run_eggnog_single( '-o', output_dir / source_file.name, # Output prefix. # Save result file to collectiondata directory. Expecting 'emapper.annotations', 'emapper.hits' and 'emapper.seed_orthologs' files. '--itype', f'{INPUT_TYPE}', - '--cpu', f'{THREADS}', + '--cpu', program_threads, '--excel', '--sensmode', 'fast', '--dmnd_iterate', 'no', diff --git a/src/loaders/compute_tools/eggnog/versions.yaml b/src/loaders/compute_tools/eggnog/versions.yaml index ece3f5da3..782f09efc 100644 --- a/src/loaders/compute_tools/eggnog/versions.yaml +++ b/src/loaders/compute_tools/eggnog/versions.yaml @@ -1,4 +1,10 @@ versions: - version: 0.1.0 date: 2024-03-13 + reference_db_version: 5.0.2 + + - version: 0.1.1 + date: 2024-03-15 + notes: | + - add ability to specify thread number for execution reference_db_version: 5.0.2 \ No newline at end of file diff --git a/src/loaders/compute_tools/mash/mash.py b/src/loaders/compute_tools/mash/mash.py index 32c45c2ac..a21ee12d2 100644 --- a/src/loaders/compute_tools/mash/mash.py +++ b/src/loaders/compute_tools/mash/mash.py @@ -16,6 +16,7 @@ def _run_mash_single( data_id: str, source_file: Path, output_dir: Path, + program_threads: int, debug: bool, kmer_size: int = KMER_SIZE, sketch_size: int = SKETCH_SIZE) -> None: @@ -25,6 +26,7 @@ def _run_mash_single( # Save result file to source file directory. The suffix '.msh' will be appended. '-k', f'{kmer_size}', '-s', f'{sketch_size}', + '-p', program_threads, source_file] run_command(command, output_dir if debug else None) diff --git a/src/loaders/compute_tools/mash/versions.yaml b/src/loaders/compute_tools/mash/versions.yaml index 2818a86fe..ad8d3ef57 100644 --- a/src/loaders/compute_tools/mash/versions.yaml +++ b/src/loaders/compute_tools/mash/versions.yaml @@ -2,4 +2,8 @@ versions: - version: 0.1.0 date: 2023-07-18 - version: 0.1.1 - date: 2023-07-19 \ No newline at end of file + date: 2023-07-19 + - version: 0.1.2 + date: 2024-03-15 + notes: | + - add ability to specify thread number for execution \ No newline at end of file diff --git a/src/loaders/compute_tools/microtrait/microtrait.py b/src/loaders/compute_tools/microtrait/microtrait.py index 57f81c021..6877bc2d7 100644 --- a/src/loaders/compute_tools/microtrait/microtrait.py +++ b/src/loaders/compute_tools/microtrait/microtrait.py @@ -186,7 +186,13 @@ def _process_trait_counts( return heatmap_row, cells_meta, traits_meta -def _run_microtrait(tool_safe_data_id: str, data_id: str, fna_file: Path, genome_dir: Path, debug: bool): +def _run_microtrait( + tool_safe_data_id: str, + data_id: str, + fna_file: Path, + genome_dir: Path, + program_threads: int, + debug: bool): # run microtrait.extract_traits on the genome file # https://github.com/ukaraoz/microtrait diff --git a/src/loaders/compute_tools/microtrait/versions.yaml b/src/loaders/compute_tools/microtrait/versions.yaml index f2180a8ad..083f830b1 100644 --- a/src/loaders/compute_tools/microtrait/versions.yaml +++ b/src/loaders/compute_tools/microtrait/versions.yaml @@ -12,4 +12,8 @@ versions: - version: 0.1.3 date: 2023-10-16 notes: | - - fix cells value data type \ No newline at end of file + - fix cells value data type + - version: 0.1.4 + date: 2024-03-15 + notes: | + - add ability to specify thread number for execution \ No newline at end of file diff --git a/src/loaders/compute_tools/tool_common.py b/src/loaders/compute_tools/tool_common.py index 566f2df3d..b98ee54d3 100644 --- a/src/loaders/compute_tools/tool_common.py +++ b/src/loaders/compute_tools/tool_common.py @@ -298,6 +298,7 @@ def parallel_single_execution(self, tool_callable: Callable[[str, str, Path, Pat meta.get(loader_common_names.META_UNCOMPRESSED_FILE, meta[loader_common_names.META_SOURCE_FILE]), output_dir, + self._program_threads, self._debug)) try: From ffcc686461abb1948c0cd8c3a8a4d3f9b86e1e3a Mon Sep 17 00:00:00 2001 From: Tianhao-Gu Date: Fri, 15 Mar 2024 15:48:46 -0500 Subject: [PATCH 2/5] fix command line --- src/loaders/compute_tools/eggnog/eggnog.py | 2 +- src/loaders/compute_tools/mash/mash.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/loaders/compute_tools/eggnog/eggnog.py b/src/loaders/compute_tools/eggnog/eggnog.py index 9ae48ae4b..f61b8a944 100644 --- a/src/loaders/compute_tools/eggnog/eggnog.py +++ b/src/loaders/compute_tools/eggnog/eggnog.py @@ -33,7 +33,7 @@ def _run_eggnog_single( '-o', output_dir / source_file.name, # Output prefix. # Save result file to collectiondata directory. Expecting 'emapper.annotations', 'emapper.hits' and 'emapper.seed_orthologs' files. '--itype', f'{INPUT_TYPE}', - '--cpu', program_threads, + '--cpu', f'{program_threads}', '--excel', '--sensmode', 'fast', '--dmnd_iterate', 'no', diff --git a/src/loaders/compute_tools/mash/mash.py b/src/loaders/compute_tools/mash/mash.py index a21ee12d2..82d72ddd4 100644 --- a/src/loaders/compute_tools/mash/mash.py +++ b/src/loaders/compute_tools/mash/mash.py @@ -26,7 +26,7 @@ def _run_mash_single( # Save result file to source file directory. The suffix '.msh' will be appended. '-k', f'{kmer_size}', '-s', f'{sketch_size}', - '-p', program_threads, + '-p', f'{program_threads}', source_file] run_command(command, output_dir if debug else None) From bc63b8e0fff07d3a4690509724ae355fffd4614a Mon Sep 17 00:00:00 2001 From: Tianhao-Gu Date: Fri, 15 Mar 2024 15:55:44 -0500 Subject: [PATCH 3/5] update arg type --- src/loaders/compute_tools/tool_common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/loaders/compute_tools/tool_common.py b/src/loaders/compute_tools/tool_common.py index b98ee54d3..97a905691 100644 --- a/src/loaders/compute_tools/tool_common.py +++ b/src/loaders/compute_tools/tool_common.py @@ -246,7 +246,7 @@ def _get_data_ids(self): data_ids = all_data_ids return list(set(data_ids)) - def parallel_single_execution(self, tool_callable: Callable[[str, str, Path, Path, bool], None], unzip=False): + def parallel_single_execution(self, tool_callable: Callable[[str, str, Path, Path, int, bool], None], unzip=False): """ Run a tool by a single data file, storing the results in a single batch directory with the individual runs stored in directories by the data ID. @@ -379,7 +379,7 @@ def _execute( self, threads: int, tool_callable: Callable[..., None], - args: List[Tuple[Dict[str, GenomeTuple], Path, int, bool]], + args: List[Tuple[Dict[str, GenomeTuple], Path, int, bool]] | List[Tuple[str, str, Path, Path, int, bool]], start: datetime.datetime, total: bool, ): From 7ec8b0756d60af72998d736f91fc7d81af6f6ec3 Mon Sep 17 00:00:00 2001 From: Tianhao-Gu Date: Fri, 15 Mar 2024 18:37:57 -0500 Subject: [PATCH 4/5] add comments to microtrait script --- src/loaders/compute_tools/microtrait/microtrait.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/loaders/compute_tools/microtrait/microtrait.py b/src/loaders/compute_tools/microtrait/microtrait.py index 6877bc2d7..77aba3262 100644 --- a/src/loaders/compute_tools/microtrait/microtrait.py +++ b/src/loaders/compute_tools/microtrait/microtrait.py @@ -204,6 +204,10 @@ def _run_microtrait( # object returned by the # extract_traits function. + # programe_threads is not used in this function, but it is kept for consistency with another tools (e.g., eggnog, mash) + # since extract_traits function doesn't take the number of threads as an argument + # https://github.com/ukaraoz/microtrait/blob/master/R/extract_traits.R#L22-L26 + # Load the R script as an R function r_script = """ library(microtrait) From 8e4a516de880b626842541ff7ff30a645fdd6e07 Mon Sep 17 00:00:00 2001 From: Tianhao-Gu Date: Fri, 15 Mar 2024 18:43:11 -0500 Subject: [PATCH 5/5] remove microtrait release --- src/loaders/compute_tools/microtrait/versions.yaml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/loaders/compute_tools/microtrait/versions.yaml b/src/loaders/compute_tools/microtrait/versions.yaml index 083f830b1..f2180a8ad 100644 --- a/src/loaders/compute_tools/microtrait/versions.yaml +++ b/src/loaders/compute_tools/microtrait/versions.yaml @@ -12,8 +12,4 @@ versions: - version: 0.1.3 date: 2023-10-16 notes: | - - fix cells value data type - - version: 0.1.4 - date: 2024-03-15 - notes: | - - add ability to specify thread number for execution \ No newline at end of file + - fix cells value data type \ No newline at end of file