From 9a13f5f6b379160e493578372c5b35683dea6de7 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 31 Oct 2023 12:46:57 +0100 Subject: [PATCH] Various fixes for stricter cwltool and cwltest --- lib/galaxy/tool_util/cwl/representation.py | 37 ++++++++++++++++++++++ lib/galaxy/tool_util/cwl/util.py | 8 ++++- lib/galaxy/tools/__init__.py | 33 ------------------- lib/galaxy_test/base/populators.py | 27 ++++++++++++++++ 4 files changed, 71 insertions(+), 34 deletions(-) diff --git a/lib/galaxy/tool_util/cwl/representation.py b/lib/galaxy/tool_util/cwl/representation.py index d2fcbe43cc4b..14aee19630e8 100644 --- a/lib/galaxy/tool_util/cwl/representation.py +++ b/lib/galaxy/tool_util/cwl/representation.py @@ -218,7 +218,43 @@ def dataset_wrapper_to_directory_json(inputs_dir, dataset_wrapper): "archive_nameext": nameext, "archive_nameroot": nameroot, } + + def tar_to_directory(directory_item): + import tarfile + import uuid + + # TODO: Should we just make sure that archive exists in extra_files_path ?? + + tar_file_location = directory_item["archive_location"] + directory_name = directory_item["name"] + + assert os.path.exists(tar_file_location), tar_file_location + + tmp_dir = os.path.join(inputs_dir, "direx", str(uuid.uuid4())) # direx for "DIR EXtract" + directory_location = os.path.join(tmp_dir, directory_name) + + os.makedirs(tmp_dir) + + assert os.path.exists(tmp_dir), tmp_dir + + # TODO: safe version of this! + bkp_cwd = os.getcwd() + os.chdir(tmp_dir) + tar = tarfile.open(tar_file_location) + tar.extractall(directory_location) + tar.close() + os.chdir(bkp_cwd) + + assert os.path.exists(directory_location), directory_location + + directory_item["location"] = directory_location + directory_item["nameext"] = "None" + directory_item["nameroot"] = directory_name + directory_item["basename"] = directory_name + + tar_to_directory(directory_json) extra_params.update(directory_json) + entry_to_location(extra_params, extra_params["location"]) return extra_params @@ -227,6 +263,7 @@ def entry_to_location(entry: Dict[str, Any], parent_location: str): # TODO unit test if entry["class"] == "File" and "path" in entry and "location" not in entry: entry["location"] = os.path.join(parent_location, entry.pop("path")) + entry["size"] = os.path.getsize(entry["location"]) elif entry["class"] == "Directory" and "listing" in entry: if "location" not in entry and "path" in entry: entry["location"] = os.path.join(parent_location, entry.pop("path")) diff --git a/lib/galaxy/tool_util/cwl/util.py b/lib/galaxy/tool_util/cwl/util.py index 4c57663cb090..a86ad2b1d31a 100644 --- a/lib/galaxy/tool_util/cwl/util.py +++ b/lib/galaxy/tool_util/cwl/util.py @@ -571,7 +571,13 @@ def dir_listing(dir_path): if extra_file_class == "File": ec = get_dataset(output_metadata, filename=path) ec["basename"] = extra_file_basename - ec_properties = output_properties(pseudo_location=pseudo_location, **ec) + filename = f"{dir_path}/{extra_file_basename}" + _download_url = ( + output_metadata["download_url"] + f"?filename={urllib.parse.quote_plus(filename)}" + ) + ec_properties = output_properties( + pseudo_location=pseudo_location, download_url=_download_url, **ec + ) elif extra_file_class == "Directory": ec_properties = {} ec_properties["class"] = "Directory" diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py index d0ebd01d3283..14737741162f 100644 --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -3173,39 +3173,6 @@ def exec_before_job(self, app, inp_data, out_data, param_dict=None): # this really seems wrong -John input_json = {k: v for k, v in input_json.items() if v != ""} - # handle 'Directory' type (uncompress tar file) - for v in input_json.values(): - if isinstance(v, dict) and "class" in v and v["class"] == "Directory": - if "archive_nameext" in v and v["archive_nameext"] == ".tar": - tar_file_location = v["archive_location"] - directory_name = v["name"] - - assert os.path.exists(tar_file_location), tar_file_location - - tmp_dir = os.path.join( - local_working_directory, "direx", str(uuid.uuid4()) - ) # direx for "DIR EXtract" - directory_location = os.path.join(tmp_dir, directory_name) - - os.makedirs(tmp_dir) - - assert os.path.exists(tmp_dir), tmp_dir - - # TODO: safe version of this! - bkp_cwd = os.getcwd() - os.chdir(tmp_dir) - tar = tarfile.open(tar_file_location) - tar.extractall(directory_location) - tar.close() - os.chdir(bkp_cwd) - - assert os.path.exists(directory_location), directory_location - - v["location"] = directory_location - v["nameext"] = "None" - v["nameroot"] = directory_name - v["basename"] = directory_name - cwl_job_proxy = self._cwl_tool_proxy.job_proxy( input_json, output_dict, diff --git a/lib/galaxy_test/base/populators.py b/lib/galaxy_test/base/populators.py index b4acb65be5ff..6c092d409284 100644 --- a/lib/galaxy_test/base/populators.py +++ b/lib/galaxy_test/base/populators.py @@ -274,7 +274,21 @@ def to_local_location(listing, location): to_local_location(item["listing"], location=item["location"]) +def fix_conflicts(path): + # find first key that does not clash with an existing entry in targets + # start with entry.target + '_' + 2 and then keep incrementing + # the number till there is no clash + i = 2 + tgt = f"{path}_{i}" + while os.path.exists(tgt): + i += 1 + tgt = f"{path}_{i}" + return tgt + + def output_to_disk(output, download_folder): + if isinstance(output, list): + return [output_to_disk(item, download_folder=download_folder) for item in output] if isinstance(output, dict): if "secondaryFiles" in output: output["secondaryFiles"] = [ @@ -282,6 +296,8 @@ def output_to_disk(output, download_folder): ] if "basename" in output: download_path = os.path.join(download_folder, output["basename"]) + if os.path.exists(download_path): + download_path = fix_conflicts(download_path) if output["class"] == "Directory": zip_path = f"{download_path}.zip" download_to_file(output["location"], zip_path) @@ -293,6 +309,17 @@ def output_to_disk(output, download_folder): output["location"] = f"file://{download_path}" if "listing" in output: to_local_location(output["listing"], output["location"]) + + return output + elif output["class"] == "Directory": + # Directory in secondary files + download_folder = os.path.join(download_folder, output["location"]) + os.makedirs(download_folder, exist_ok=True) + output["location"] = download_folder + new_listing = [ + output_to_disk(secondary, download_folder=download_folder) for secondary in output["listing"] + ] + output["listing"] = new_listing return output else: new_output = {}