Skip to content

Commit c122f10

Browse files
committed
internal tagging
1 parent af54791 commit c122f10

File tree

2 files changed

+40
-12
lines changed

2 files changed

+40
-12
lines changed

nomic/data_operations.py

Lines changed: 39 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import base64
22
import io
33
import json
4+
import time
45
from collections import defaultdict
56
from datetime import datetime
67
from pathlib import Path
@@ -679,11 +680,14 @@ def __init__(self, projection: "AtlasProjection", auto_cleanup: Optional[bool] =
679680
self.auto_cleanup = auto_cleanup
680681

681682
@property
682-
def df(self, overwrite: bool = False) -> pd.DataFrame:
683+
def df(self, overwrite: bool = False, wait_time: int = 120) -> pd.DataFrame:
683684
"""
684685
Pandas DataFrame mapping each data point to its tags.
686+
687+
Args:
688+
wait_time: The maximum time to wait while fetching a tag.
685689
"""
686-
tags = self.get_tags()
690+
tags = self.get_tags(wait_time=wait_time)
687691
tag_definition_ids = [tag["tag_definition_id"] for tag in tags]
688692
if self.auto_cleanup:
689693
self._remove_outdated_tag_files(tag_definition_ids)
@@ -707,12 +711,26 @@ def df(self, overwrite: bool = False) -> pd.DataFrame:
707711
tb = tb.append_column(tag["tag_name"], bitmask)
708712
tbs.append(tb)
709713
return pa.concat_tables(tbs).to_pandas()
714+
715+
def is_tag_complete(self, tag_id) -> bool:
716+
is_complete = requests.get(
717+
self.dataset.atlas_api_path + "/v1/project/projection/tags/status",
718+
headers=self.dataset.header,
719+
params={
720+
"project_id": self.dataset.id,
721+
"tag_id": tag_id,
722+
},
723+
).json()["is_complete"]
724+
return is_complete
710725

711-
def get_tags(self) -> List[Dict[str, str]]:
726+
def get_tags(self, wait_time: int = 120) -> List[Dict[str, str]]:
712727
"""
713728
Retrieves back all tags made in the web browser for a specific map.
714729
Each tag is a dictionary containing tag_name, tag_id, and metadata.
715730
731+
Args:
732+
wait_time: The maximum time to wait for a tag to be completed.
733+
716734
Returns:
717735
A list of tags a user has created for projection.
718736
"""
@@ -723,16 +741,26 @@ def get_tags(self) -> List[Dict[str, str]]:
723741
).json()
724742
keep_tags = []
725743
for tag in tags:
726-
is_complete = requests.get(
727-
self.dataset.atlas_api_path + "/v1/project/projection/tags/status",
728-
headers=self.dataset.header,
729-
params={
730-
"project_id": self.dataset.id,
731-
"tag_id": tag["tag_id"],
732-
},
733-
).json()["is_complete"]
744+
tag_id = tag["tag_id"]
745+
is_complete = self.is_tag_complete(tag_id)
734746
if is_complete:
735747
keep_tags.append(tag)
748+
else:
749+
# Use robotag route instead of v1/n so we guarantee only one request gets launched
750+
requests.post(self.dataset.atlas_api_path + "/v1/project/projection/tags/robotag", headers=self.dataset.header,
751+
json={"project_id": self.dataset.id, "tag_id": tag_id})
752+
wait_start = time.time()
753+
# Wait up to 5 minutes for tag to be completed
754+
while not is_complete:
755+
# Sleep 5 seconds
756+
time.sleep(15)
757+
if time.time() >= wait_start + wait_time:
758+
break
759+
is_complete = self.is_tag_complete(tag_id)
760+
if is_complete:
761+
keep_tags.append(tag)
762+
else:
763+
logger.warning(f"Tag {tag['tag_name']} currently unavailable for download from SDK. Download from {self.projection.dataset_link} instead or try again.")
736764
return keep_tags
737765

738766
def get_datums_in_tag(self, tag_name: str, overwrite: bool = False):

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
setup(
2525
name="nomic",
26-
version="3.4.1",
26+
version="3.4.2",
2727
url="https://github.com/nomic-ai/nomic",
2828
description=description,
2929
long_description=long_description,

0 commit comments

Comments
 (0)