diff --git a/lib/galaxy/celery/tasks.py b/lib/galaxy/celery/tasks.py index 5e48a1e7b126..4a1cf31768e2 100644 --- a/lib/galaxy/celery/tasks.py +++ b/lib/galaxy/celery/tasks.py @@ -139,6 +139,17 @@ def purge_history_datasets( if not history: log.error(f"Purge history datasets task failed, history {request.history_id} not found") return + # Bulk mark all non-deleted HDCAs as deleted + sa_session.execute( + update(model.HistoryDatasetCollectionAssociation) + .where( + and_( + model.HistoryDatasetCollectionAssociation.history_id == request.history_id, + model.HistoryDatasetCollectionAssociation.deleted == false(), + ) + ) + .values(deleted=True) + ) # Collect dataset IDs before the bulk update dataset_id_stmt = ( select(model.HistoryDatasetAssociation.dataset_id) @@ -152,6 +163,7 @@ def purge_history_datasets( ) dataset_ids = list(sa_session.scalars(dataset_id_stmt)) if not dataset_ids: + sa_session.commit() return # Bulk mark all unpurged HDAs as deleted and purged sa_session.execute( diff --git a/lib/galaxy/managers/histories.py b/lib/galaxy/managers/histories.py index c357aef1af0d..8843574da8ed 100644 --- a/lib/galaxy/managers/histories.py +++ b/lib/galaxy/managers/histories.py @@ -308,6 +308,9 @@ def purge(self, item, flush=True, **kwargs): for hda in item.datasets: if not hda.purged: self.hda_manager.purge(hda, flush=True, **kwargs) + for hdca in item.dataset_collections: + if not hdca.deleted: + hdca.deleted = True # Now mark the history as purged super().purge(item, flush=flush, **kwargs) diff --git a/test/integration/test_purge_datasets.py b/test/integration/test_purge_datasets.py index d0d3c9b69490..1a129da03aa3 100644 --- a/test/integration/test_purge_datasets.py +++ b/test/integration/test_purge_datasets.py @@ -4,17 +4,22 @@ Optional, ) -from galaxy_test.base.populators import DatasetPopulator +from galaxy_test.base.populators import ( + DatasetCollectionPopulator, + DatasetPopulator, +) from galaxy_test.driver import integration_util class TestPurgeDatasetsIntegration(integration_util.IntegrationTestCase): dataset_populator: DatasetPopulator + dataset_collection_populator: DatasetCollectionPopulator test_history_id: str def setUp(self): super().setUp() self.dataset_populator = DatasetPopulator(self.galaxy_interactor) + self.dataset_collection_populator = DatasetCollectionPopulator(self.galaxy_interactor) self.test_history_id = self.dataset_populator.new_history() @classmethod @@ -110,9 +115,42 @@ def test_purge_anonymous_history(self): assert not self._file_exists_on_disk(dataset_file) + def test_purge_history_marks_collections_as_deleted(self): + """Test that purging a history also marks its dataset collections as deleted. + + Regression test for https://github.com/galaxyproject/galaxy/issues/22312 + """ + hdca = self.dataset_collection_populator.create_list_in_history( + self.test_history_id, direct_upload=False, wait=True + ).json() + hdca_id = hdca["id"] + + details = self.dataset_populator.get_history_collection_details( + self.test_history_id, content_id=hdca_id, wait=False + ) + assert not details["deleted"] + + purge_result = self.dataset_populator.purge_history(self.test_history_id) + assert purge_result["purged"] + + details = self.dataset_populator.get_history_collection_details( + self.test_history_id, content_id=hdca_id, wait=False + ) + assert details["deleted"] + def _get_underlying_dataset_on_disk(self, hda_id: str) -> Optional[str]: detailed_response = self._get(f"datasets/{hda_id}", admin=True).json() return detailed_response.get("file_name") def _file_exists_on_disk(self, filename: Optional[str]) -> bool: return os.path.isfile(filename) if filename else False + + +class TestPurgeDatasetsWithoutCeleryIntegration(TestPurgeDatasetsIntegration): + """Test history purge cascades to collections without celery tasks.""" + + @classmethod + def handle_galaxy_config_kwds(cls, config): + super().handle_galaxy_config_kwds(config) + config["enable_celery_tasks"] = False + config["metadata_strategy"] = "directory"