diff --git a/src/loaders/compute_tools/eggnog/eggnog.py b/src/loaders/compute_tools/eggnog/eggnog.py index c278a2809..f61b8a944 100644 --- a/src/loaders/compute_tools/eggnog/eggnog.py +++ b/src/loaders/compute_tools/eggnog/eggnog.py @@ -12,7 +12,6 @@ from src.loaders.compute_tools.tool_common import ToolRunner, run_command INPUT_TYPE = 'proteins' -THREADS = 8 def _run_eggnog_single( @@ -20,6 +19,7 @@ def _run_eggnog_single( data_id: str, source_file: Path, output_dir: Path, + program_threads: int, debug: bool) -> None: metadata_file = output_dir / EGGNOG_METADATA @@ -33,7 +33,7 @@ def _run_eggnog_single( '-o', output_dir / source_file.name, # Output prefix. # Save result file to collectiondata directory. Expecting 'emapper.annotations', 'emapper.hits' and 'emapper.seed_orthologs' files. '--itype', f'{INPUT_TYPE}', - '--cpu', f'{THREADS}', + '--cpu', f'{program_threads}', '--excel', '--sensmode', 'fast', '--dmnd_iterate', 'no', diff --git a/src/loaders/compute_tools/eggnog/versions.yaml b/src/loaders/compute_tools/eggnog/versions.yaml index ece3f5da3..782f09efc 100644 --- a/src/loaders/compute_tools/eggnog/versions.yaml +++ b/src/loaders/compute_tools/eggnog/versions.yaml @@ -1,4 +1,10 @@ versions: - version: 0.1.0 date: 2024-03-13 + reference_db_version: 5.0.2 + + - version: 0.1.1 + date: 2024-03-15 + notes: | + - add ability to specify thread number for execution reference_db_version: 5.0.2 \ No newline at end of file diff --git a/src/loaders/compute_tools/mash/mash.py b/src/loaders/compute_tools/mash/mash.py index 32c45c2ac..82d72ddd4 100644 --- a/src/loaders/compute_tools/mash/mash.py +++ b/src/loaders/compute_tools/mash/mash.py @@ -16,6 +16,7 @@ def _run_mash_single( data_id: str, source_file: Path, output_dir: Path, + program_threads: int, debug: bool, kmer_size: int = KMER_SIZE, sketch_size: int = SKETCH_SIZE) -> None: @@ -25,6 +26,7 @@ def _run_mash_single( # Save result file to source file directory. The suffix '.msh' will be appended. '-k', f'{kmer_size}', '-s', f'{sketch_size}', + '-p', f'{program_threads}', source_file] run_command(command, output_dir if debug else None) diff --git a/src/loaders/compute_tools/mash/versions.yaml b/src/loaders/compute_tools/mash/versions.yaml index 2818a86fe..ad8d3ef57 100644 --- a/src/loaders/compute_tools/mash/versions.yaml +++ b/src/loaders/compute_tools/mash/versions.yaml @@ -2,4 +2,8 @@ versions: - version: 0.1.0 date: 2023-07-18 - version: 0.1.1 - date: 2023-07-19 \ No newline at end of file + date: 2023-07-19 + - version: 0.1.2 + date: 2024-03-15 + notes: | + - add ability to specify thread number for execution \ No newline at end of file diff --git a/src/loaders/compute_tools/microtrait/microtrait.py b/src/loaders/compute_tools/microtrait/microtrait.py index 57f81c021..77aba3262 100644 --- a/src/loaders/compute_tools/microtrait/microtrait.py +++ b/src/loaders/compute_tools/microtrait/microtrait.py @@ -186,7 +186,13 @@ def _process_trait_counts( return heatmap_row, cells_meta, traits_meta -def _run_microtrait(tool_safe_data_id: str, data_id: str, fna_file: Path, genome_dir: Path, debug: bool): +def _run_microtrait( + tool_safe_data_id: str, + data_id: str, + fna_file: Path, + genome_dir: Path, + program_threads: int, + debug: bool): # run microtrait.extract_traits on the genome file # https://github.com/ukaraoz/microtrait @@ -198,6 +204,10 @@ def _run_microtrait(tool_safe_data_id: str, data_id: str, fna_file: Path, genome # object returned by the # extract_traits function. + # programe_threads is not used in this function, but it is kept for consistency with another tools (e.g., eggnog, mash) + # since extract_traits function doesn't take the number of threads as an argument + # https://github.com/ukaraoz/microtrait/blob/master/R/extract_traits.R#L22-L26 + # Load the R script as an R function r_script = """ library(microtrait) diff --git a/src/loaders/compute_tools/tool_common.py b/src/loaders/compute_tools/tool_common.py index 566f2df3d..97a905691 100644 --- a/src/loaders/compute_tools/tool_common.py +++ b/src/loaders/compute_tools/tool_common.py @@ -246,7 +246,7 @@ def _get_data_ids(self): data_ids = all_data_ids return list(set(data_ids)) - def parallel_single_execution(self, tool_callable: Callable[[str, str, Path, Path, bool], None], unzip=False): + def parallel_single_execution(self, tool_callable: Callable[[str, str, Path, Path, int, bool], None], unzip=False): """ Run a tool by a single data file, storing the results in a single batch directory with the individual runs stored in directories by the data ID. @@ -298,6 +298,7 @@ def parallel_single_execution(self, tool_callable: Callable[[str, str, Path, Pat meta.get(loader_common_names.META_UNCOMPRESSED_FILE, meta[loader_common_names.META_SOURCE_FILE]), output_dir, + self._program_threads, self._debug)) try: @@ -378,7 +379,7 @@ def _execute( self, threads: int, tool_callable: Callable[..., None], - args: List[Tuple[Dict[str, GenomeTuple], Path, int, bool]], + args: List[Tuple[Dict[str, GenomeTuple], Path, int, bool]] | List[Tuple[str, str, Path, Path, int, bool]], start: datetime.datetime, total: bool, ):