Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions lib/galaxy/celery/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,17 @@ def purge_history_datasets(
if not history:
log.error(f"Purge history datasets task failed, history {request.history_id} not found")
return
# Bulk mark all non-deleted HDCAs as deleted
sa_session.execute(
update(model.HistoryDatasetCollectionAssociation)
.where(
and_(
model.HistoryDatasetCollectionAssociation.history_id == request.history_id,
model.HistoryDatasetCollectionAssociation.deleted == false(),
)
)
.values(deleted=True)
)
# Collect dataset IDs before the bulk update
dataset_id_stmt = (
select(model.HistoryDatasetAssociation.dataset_id)
Expand All @@ -152,6 +163,7 @@ def purge_history_datasets(
)
dataset_ids = list(sa_session.scalars(dataset_id_stmt))
if not dataset_ids:
sa_session.commit()
return
# Bulk mark all unpurged HDAs as deleted and purged
sa_session.execute(
Expand Down
3 changes: 3 additions & 0 deletions lib/galaxy/managers/histories.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,9 @@ def purge(self, item, flush=True, **kwargs):
for hda in item.datasets:
if not hda.purged:
self.hda_manager.purge(hda, flush=True, **kwargs)
for hdca in item.dataset_collections:
if not hdca.deleted:
hdca.deleted = True

# Now mark the history as purged
super().purge(item, flush=flush, **kwargs)
Expand Down
40 changes: 39 additions & 1 deletion test/integration/test_purge_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,22 @@
Optional,
)

from galaxy_test.base.populators import DatasetPopulator
from galaxy_test.base.populators import (
DatasetCollectionPopulator,
DatasetPopulator,
)
from galaxy_test.driver import integration_util


class TestPurgeDatasetsIntegration(integration_util.IntegrationTestCase):
dataset_populator: DatasetPopulator
dataset_collection_populator: DatasetCollectionPopulator
test_history_id: str

def setUp(self):
super().setUp()
self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
self.dataset_collection_populator = DatasetCollectionPopulator(self.galaxy_interactor)
self.test_history_id = self.dataset_populator.new_history()

@classmethod
Expand Down Expand Up @@ -110,9 +115,42 @@ def test_purge_anonymous_history(self):

assert not self._file_exists_on_disk(dataset_file)

def test_purge_history_marks_collections_as_deleted(self):
"""Test that purging a history also marks its dataset collections as deleted.

Regression test for https://github.com/galaxyproject/galaxy/issues/22312
"""
hdca = self.dataset_collection_populator.create_list_in_history(
self.test_history_id, direct_upload=False, wait=True
).json()
hdca_id = hdca["id"]

details = self.dataset_populator.get_history_collection_details(
self.test_history_id, content_id=hdca_id, wait=False
)
assert not details["deleted"]

purge_result = self.dataset_populator.purge_history(self.test_history_id)
assert purge_result["purged"]

details = self.dataset_populator.get_history_collection_details(
self.test_history_id, content_id=hdca_id, wait=False
)
assert details["deleted"]

def _get_underlying_dataset_on_disk(self, hda_id: str) -> Optional[str]:
detailed_response = self._get(f"datasets/{hda_id}", admin=True).json()
return detailed_response.get("file_name")

def _file_exists_on_disk(self, filename: Optional[str]) -> bool:
return os.path.isfile(filename) if filename else False


class TestPurgeDatasetsWithoutCeleryIntegration(TestPurgeDatasetsIntegration):
"""Test history purge cascades to collections without celery tasks."""

@classmethod
def handle_galaxy_config_kwds(cls, config):
super().handle_galaxy_config_kwds(config)
config["enable_celery_tasks"] = False
config["metadata_strategy"] = "directory"
Loading