Skip to content

Commit 4364049

Browse files
committed
Optimize metadata reset to clone repository once
Previously, reset_all_metadata_on_repository_in_tool_shed() cloned the repository separately for each changeset revision over HTTP. With 6 changesets at ~6-7 seconds per clone, this took ~40 seconds total, exceeding Playwright's 30-second timeout. Now clone the repository once and use hg update to switch between revisions. This reduces 6 network round-trips to 1, making the operation complete well within the timeout.
1 parent c147814 commit 4364049

2 files changed

Lines changed: 24 additions & 11 deletions

File tree

lib/galaxy/tool_shed/metadata/metadata_generator.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,9 @@ def generate_metadata_for_changeset_revision(self):
392392
if invalid_tool_configs:
393393
metadata_dict["invalid_tools"] = invalid_tool_configs
394394
self.metadata_dict = metadata_dict
395-
remove_dir(work_dir)
395+
# Only remove work_dir if not resetting all metadata - in that case the caller handles cleanup
396+
if not self.resetting_all_metadata_on_repository:
397+
remove_dir(work_dir)
396398

397399
def generate_package_dependency_metadata(self, elem, valid_tool_dependencies_dict, invalid_tool_dependencies_dict):
398400
"""

lib/tool_shed/metadata/repository_metadata_manager.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -799,14 +799,24 @@ def reset_all_metadata_on_repository_in_tool_shed(self, repository_clone_url=Non
799799
metadata_dict = None
800800
ancestor_changeset_revision = None
801801
ancestor_metadata_dict = None
802-
for changeset in self.repository.get_changesets_for_setting_metadata(self.app):
803-
work_dir = tempfile.mkdtemp(prefix="tmp-toolshed-ramorits")
804-
ctx = repo[changeset]
805-
log.debug("Cloning repository changeset revision: %s", str(ctx.rev()))
806-
assert self.repository_clone_url
807-
repository_clone_url = repository_clone_url or self.repository_clone_url
808-
cloned_ok, error_message = hg_util.clone_repository(repository_clone_url, work_dir, str(ctx.rev()))
809-
if cloned_ok:
802+
803+
# Clone repository once, then update for each changeset revision.
804+
work_dir = tempfile.mkdtemp(prefix="tmp-toolshed-ramorits")
805+
assert self.repository_clone_url
806+
repository_clone_url = repository_clone_url or self.repository_clone_url
807+
log.debug("Cloning repository for metadata reset")
808+
cloned_ok, error_message = hg_util.clone_repository(repository_clone_url, work_dir)
809+
if not cloned_ok:
810+
log.error(f"Failed to clone repository: {error_message}")
811+
basic_util.remove_dir(work_dir)
812+
self._clean_repository_metadata(changeset_revisions)
813+
return
814+
815+
try:
816+
for changeset in self.repository.get_changesets_for_setting_metadata(self.app):
817+
ctx = repo[changeset]
818+
log.debug("Updating to changeset revision: %s", str(ctx.rev()))
819+
hg_util.update_repository(work_dir, str(ctx.rev()))
810820
log.debug("Generating metadata for changeset revision: %s", str(ctx.rev()))
811821
self.set_changeset_revision(str(ctx))
812822
self.set_repository_files_dir(work_dir)
@@ -823,11 +833,11 @@ def reset_all_metadata_on_repository_in_tool_shed(self, repository_clone_url=Non
823833
# self.SUBSET - ancestor metadata is a subset of current metadata, so continue from current
824834
# self.NOT_EQUAL_AND_NOT_SUBSET - ancestor metadata is neither equal to nor a subset of current
825835
# metadata, so persist ancestor metadata.
826-
log.info(f"amd {ancestor_metadata_dict}")
836+
log.debug(f"amd {ancestor_metadata_dict}")
827837
comparison = self.compare_changeset_revisions(
828838
ancestor_changeset_revision, ancestor_metadata_dict
829839
)
830-
log.info(f"comparison {comparison}")
840+
log.debug(f"comparison {comparison}")
831841
if comparison in [self.NO_METADATA, self.EQUAL, self.SUBSET]:
832842
ancestor_changeset_revision = self.changeset_revision
833843
ancestor_metadata_dict = self.metadata_dict
@@ -858,6 +868,7 @@ def reset_all_metadata_on_repository_in_tool_shed(self, repository_clone_url=Non
858868
changeset_revisions.append(metadata_changeset_revision)
859869
ancestor_changeset_revision = None
860870
ancestor_metadata_dict = None
871+
finally:
861872
basic_util.remove_dir(work_dir)
862873
# Delete all repository_metadata records for this repository that do not have a changeset_revision
863874
# value in changeset_revisions.

0 commit comments

Comments
 (0)