Skip to content

Commit

Permalink
Various fixes for stricter cwltool and cwltest
Browse files Browse the repository at this point in the history
  • Loading branch information
mvdbeek committed Oct 31, 2023
1 parent b8c9968 commit 7fbcb28
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 34 deletions.
37 changes: 37 additions & 0 deletions lib/galaxy/tool_util/cwl/representation.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,43 @@ def dataset_wrapper_to_directory_json(inputs_dir, dataset_wrapper):
"archive_nameext": nameext,
"archive_nameroot": nameroot,
}

def tar_to_directory(directory_item):
import tarfile
import uuid

# TODO: Should we just make sure that archive exists in extra_files_path ??

tar_file_location = directory_item["archive_location"]
directory_name = directory_item["name"]

assert os.path.exists(tar_file_location), tar_file_location

tmp_dir = os.path.join(inputs_dir, "direx", str(uuid.uuid4())) # direx for "DIR EXtract"
directory_location = os.path.join(tmp_dir, directory_name)

os.makedirs(tmp_dir)

assert os.path.exists(tmp_dir), tmp_dir

# TODO: safe version of this!
bkp_cwd = os.getcwd()
os.chdir(tmp_dir)
tar = tarfile.open(tar_file_location)
tar.extractall(directory_location)
tar.close()
os.chdir(bkp_cwd)

assert os.path.exists(directory_location), directory_location

directory_item["location"] = directory_location
directory_item["nameext"] = "None"
directory_item["nameroot"] = directory_name
directory_item["basename"] = directory_name

tar_to_directory(directory_json)
extra_params.update(directory_json)

entry_to_location(extra_params, extra_params["location"])
return extra_params

Expand All @@ -227,6 +263,7 @@ def entry_to_location(entry: Dict[str, Any], parent_location: str):
# TODO unit test
if entry["class"] == "File" and "path" in entry and "location" not in entry:
entry["location"] = os.path.join(parent_location, entry.pop("path"))
entry["size"] = os.path.getsize(entry["location"])
elif entry["class"] == "Directory" and "listing" in entry:
if "location" not in entry and "path" in entry:
entry["location"] = os.path.join(parent_location, entry.pop("path"))
Expand Down
8 changes: 7 additions & 1 deletion lib/galaxy/tool_util/cwl/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,13 @@ def dir_listing(dir_path):
if extra_file_class == "File":
ec = get_dataset(output_metadata, filename=path)
ec["basename"] = extra_file_basename
ec_properties = output_properties(pseudo_location=pseudo_location, **ec)
filename = f"{dir_path}/{extra_file_basename}"
_download_url = (
output_metadata["download_url"] + f"?filename={urllib.parse.quote_plus(filename)}"
)
ec_properties = output_properties(
pseudo_location=pseudo_location, download_url=_download_url, **ec
)
elif extra_file_class == "Directory":
ec_properties = {}
ec_properties["class"] = "Directory"
Expand Down
33 changes: 0 additions & 33 deletions lib/galaxy/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3173,39 +3173,6 @@ def exec_before_job(self, app, inp_data, out_data, param_dict=None):
# this really seems wrong -John
input_json = {k: v for k, v in input_json.items() if v != ""}

# handle 'Directory' type (uncompress tar file)
for v in input_json.values():
if isinstance(v, dict) and "class" in v and v["class"] == "Directory":
if "archive_nameext" in v and v["archive_nameext"] == ".tar":
tar_file_location = v["archive_location"]
directory_name = v["name"]

assert os.path.exists(tar_file_location), tar_file_location

tmp_dir = os.path.join(
local_working_directory, "direx", str(uuid.uuid4())
) # direx for "DIR EXtract"
directory_location = os.path.join(tmp_dir, directory_name)

os.makedirs(tmp_dir)

assert os.path.exists(tmp_dir), tmp_dir

# TODO: safe version of this!
bkp_cwd = os.getcwd()
os.chdir(tmp_dir)
tar = tarfile.open(tar_file_location)
tar.extractall(directory_location)
tar.close()
os.chdir(bkp_cwd)

assert os.path.exists(directory_location), directory_location

v["location"] = directory_location
v["nameext"] = "None"
v["nameroot"] = directory_name
v["basename"] = directory_name

cwl_job_proxy = self._cwl_tool_proxy.job_proxy(
input_json,
output_dict,
Expand Down
16 changes: 16 additions & 0 deletions lib/galaxy_test/base/populators.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,8 @@ def to_local_location(listing, location):


def output_to_disk(output, download_folder):
if isinstance(output, list):
return [output_to_disk(item, download_folder=download_folder) for item in output]
if isinstance(output, dict):
if "secondaryFiles" in output:
output["secondaryFiles"] = [
Expand All @@ -288,11 +290,25 @@ def output_to_disk(output, download_folder):
CompressedFile(zip_path).extract(download_folder)
os.remove(zip_path)
else:
# bit of a hack, to discriminate files with same basename
# cwltool suffixes with _2, _3 but doesn't really matter for us.
download_path = f"{output['checksum']}.{download_path}"
download_to_file(output["location"], download_path)
output["path"] = download_path
output["location"] = f"file://{download_path}"
if "listing" in output:
to_local_location(output["listing"], output["location"])

return output
elif output["class"] == "Directory":
# Directory in secondary files
download_folder = os.path.join(download_folder, output["location"])
os.makedirs(download_folder, exist_ok=True)
output["location"] = download_folder
new_listing = [
output_to_disk(secondary, download_folder=download_folder) for secondary in output["listing"]
]
output["listing"] = new_listing
return output
else:
new_output = {}
Expand Down

0 comments on commit 7fbcb28

Please sign in to comment.