Skip to content

Commit

Permalink
Merge pull request #15305 from davelopez/22.05_fix_bulk_dataset_purge
Browse files Browse the repository at this point in the history
[22.05] Fix immediate dataset purge in bulk
  • Loading branch information
mvdbeek authored Jan 16, 2023
2 parents ca77e18 + 543bb90 commit e3e61fb
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 2 deletions.
2 changes: 1 addition & 1 deletion lib/galaxy/webapps/galaxy/services/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,7 @@ def delete_batch(
if dataset.src == DatasetSourceType.hda:
self.hda_manager.error_if_uploading(dataset_instance)
if payload.purge:
manager.purge(dataset_instance, flush=False)
manager.purge(dataset_instance, flush=True)
else:
manager.delete(dataset_instance, flush=False)
success_count += 1
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/webapps/galaxy/services/history_contents.py
Original file line number Diff line number Diff line change
Expand Up @@ -1609,7 +1609,7 @@ def _purge(self, item: HistoryItemModel, trans: ProvidesHistoryContext):
return
if isinstance(item, HistoryDatasetCollectionAssociation):
return self.dataset_collection_manager.delete(trans, "history", item.id, recursive=True, purge=True)
self.hda_manager.purge(item, flush=self.flush)
self.hda_manager.purge(item, flush=True)

def _change_datatype(
self, item: HistoryItemModel, params: ChangeDatatypeOperationParams, trans: ProvidesHistoryContext
Expand Down
2 changes: 2 additions & 0 deletions lib/galaxy_test/api/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,8 @@ def test_delete_batch(self):
hda = self.dataset_populator.new_dataset(history_id)
dataset_map[index] = hda["id"]

self.dataset_populator.wait_for_history(history_id)

expected_deleted_source_ids = [
{"id": dataset_map[1], "src": "hda"},
{"id": dataset_map[2], "src": "hda"},
Expand Down
81 changes: 81 additions & 0 deletions test/integration/test_purge_datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import os
from typing import (
Callable,
Optional,
)

from galaxy_test.base.populators import DatasetPopulator
from galaxy_test.driver import integration_util


class PurgeDatasetsIntegrationTestCase(integration_util.IntegrationTestCase):
def setUp(self):
super().setUp()
self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
self.history_id = self.dataset_populator.new_history()

@classmethod
def handle_galaxy_config_kwds(cls, config):
super().handle_galaxy_config_kwds(config)
config["allow_user_dataset_purge"] = True

def test_purge_dataset_batch_removes_underlying_dataset_from_disk(self):
self._expect_dataset_purged_on(self._purge_hda_using_batch)

def test_purge_history_content_bulk_removes_underlying_dataset_from_disk(self):
self._expect_dataset_purged_on(self._purge_hda_using_bulk)

def _expect_dataset_purged_on(self, purge_operation: Callable):
hda = self.dataset_populator.new_dataset(self.history_id, wait=True)
hda_id = hda["id"]

# Ensure dataset file exists on disk
dataset_file = self._get_underlying_dataset_on_disk(hda_id)
assert self._file_exists_on_disk(dataset_file)

# Purge dataset
purge_operation(hda_id)

# Ensure dataset is purged
self.dataset_populator.wait_for_purge(self.history_id, hda_id)

# Ensure dataset file is removed from disk after purge
assert not self._file_exists_on_disk(dataset_file)

def _purge_hda_using_batch(self, hda_id):
payload = {
"purge": True,
"datasets": [
{"id": hda_id, "src": "hda"},
],
}
purge_response = self._delete("datasets", data=payload, json=True)
self._assert_status_code_is_ok(purge_response)
purge_result = purge_response.json()
assert purge_result["success_count"] == 1

def _purge_hda_using_bulk(self, hda_id):
payload = {
"operation": "purge",
"items": [
{
"id": hda_id,
"history_content_type": "dataset",
},
],
}
purge_response = self._put(
f"histories/{self.history_id}/contents/bulk",
data=payload,
json=True,
)
self._assert_status_code_is_ok(purge_response)
purge_result = purge_response.json()
assert purge_result["success_count"] == 1

def _get_underlying_dataset_on_disk(self, hda_id: str) -> Optional[str]:
detailed_response = self._get(f"datasets/{hda_id}", admin=True).json()
return detailed_response.get("file_name")

def _file_exists_on_disk(self, filename: Optional[str]) -> bool:
return os.path.isfile(filename) if filename else False

0 comments on commit e3e61fb

Please sign in to comment.