Skip to content

Commit e2bd78e

Browse files
authored
feat: Qdrant vectorstore support (#260)
* feat: Qdrant vectorstore support * chore: review changes * docs: Updated README.md
1 parent cbe45a4 commit e2bd78e

File tree

9 files changed

+208
-2
lines changed

9 files changed

+208
-2
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ starting point.
189189
KH_DOCSTORE=(Elasticsearch | LanceDB | SimpleFileDocumentStore)
190190
191191
# setup your preferred vectorstore (for vector-based search)
192-
KH_VECTORSTORE=(ChromaDB | LanceDB | InMemory)
192+
KH_VECTORSTORE=(ChromaDB | LanceDB | InMemory | Qdrant)
193193
194194
# Enable / disable multimodal QA
195195
KH_REASONINGS_USE_MULTIMODAL=True

flowsettings.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@
8181
# "__type__": "kotaemon.storages.LanceDBVectorStore",
8282
"__type__": "kotaemon.storages.ChromaVectorStore",
8383
# "__type__": "kotaemon.storages.MilvusVectorStore",
84+
# "__type__": "kotaemon.storages.QdrantVectorStore",
8485
"path": str(KH_USER_DATA_DIR / "vectorstore"),
8586
}
8687
KH_LLMS = {}

libs/kotaemon/kotaemon/storages/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
InMemoryVectorStore,
1212
LanceDBVectorStore,
1313
MilvusVectorStore,
14+
QdrantVectorStore,
1415
SimpleFileVectorStore,
1516
)
1617

@@ -28,4 +29,5 @@
2829
"SimpleFileVectorStore",
2930
"LanceDBVectorStore",
3031
"MilvusVectorStore",
32+
"QdrantVectorStore",
3133
]

libs/kotaemon/kotaemon/storages/vectorstores/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from .in_memory import InMemoryVectorStore
44
from .lancedb import LanceDBVectorStore
55
from .milvus import MilvusVectorStore
6+
from .qdrant import QdrantVectorStore
67
from .simple_file import SimpleFileVectorStore
78

89
__all__ = [
@@ -12,4 +13,5 @@
1213
"SimpleFileVectorStore",
1314
"LanceDBVectorStore",
1415
"MilvusVectorStore",
16+
"QdrantVectorStore",
1517
]
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
from typing import Any, List, Optional, Type, cast
2+
3+
from llama_index.vector_stores.qdrant import QdrantVectorStore as LIQdrantVectorStore
4+
5+
from .base import LlamaIndexVectorStore
6+
7+
8+
class QdrantVectorStore(LlamaIndexVectorStore):
9+
_li_class: Type[LIQdrantVectorStore] = LIQdrantVectorStore
10+
11+
def __init__(
12+
self,
13+
collection_name,
14+
url: Optional[str] = None,
15+
api_key: Optional[str] = None,
16+
client_kwargs: Optional[dict] = None,
17+
**kwargs: Any,
18+
):
19+
self._collection_name = collection_name
20+
self._url = url
21+
self._api_key = api_key
22+
self._client_kwargs = client_kwargs
23+
self._kwargs = kwargs
24+
25+
super().__init__(
26+
collection_name=collection_name,
27+
url=url,
28+
api_key=api_key,
29+
client_kwargs=client_kwargs,
30+
**kwargs,
31+
)
32+
self._client = cast(LIQdrantVectorStore, self._client)
33+
34+
def delete(self, ids: List[str], **kwargs):
35+
"""Delete vector embeddings from vector stores
36+
37+
Args:
38+
ids: List of ids of the embeddings to be deleted
39+
kwargs: meant for vectorstore-specific parameters
40+
"""
41+
from qdrant_client import models
42+
43+
self._client.client.delete(
44+
collection_name=self._collection_name,
45+
points_selector=models.PointIdsList(
46+
points=ids,
47+
),
48+
**kwargs,
49+
)
50+
51+
def drop(self):
52+
"""Delete entire collection from vector stores"""
53+
self._client.client.delete_collection(self._collection_name)
54+
55+
def count(self) -> int:
56+
return self._client.client.count(
57+
collection_name=self._collection_name, exact=True
58+
).count
59+
60+
def __persist_flow__(self):
61+
return {
62+
"collection_name": self._collection_name,
63+
"url": self._url,
64+
"api_key": self._api_key,
65+
"client_kwargs": self._client_kwargs,
66+
**self._kwargs,
67+
}

libs/kotaemon/pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ requires-python = ">= 3.10"
2222
description = "Kotaemon core library for AI development."
2323
dependencies = [
2424
"click>=8.1.7,<9",
25-
"cohere>=5.3.2,<5.4",
25+
"cohere>=5.3.2,<6",
2626
"cookiecutter>=2.6.0,<2.7",
2727
"fast_langdetect",
2828
"gradio>=4.31.0,<4.40",
@@ -73,6 +73,7 @@ adv = [
7373
"sentence-transformers",
7474
"llama-cpp-python<0.2.8",
7575
"fastembed",
76+
"llama-index-vector-stores-qdrant",
7677
]
7778
dev = [
7879
"black",

libs/kotaemon/tests/conftest.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,15 @@ def if_unstructured_not_installed():
5151
return False
5252

5353

54+
def if_cohere_not_installed():
55+
try:
56+
import cohere # noqa: F401
57+
except ImportError:
58+
return True
59+
else:
60+
return False
61+
62+
5463
def if_llama_cpp_not_installed():
5564
try:
5665
import llama_cpp # noqa: F401
@@ -76,6 +85,10 @@ def if_llama_cpp_not_installed():
7685
if_unstructured_not_installed(), reason="unstructured is not installed"
7786
)
7887

88+
skip_when_cohere_not_installed = pytest.mark.skipif(
89+
if_cohere_not_installed(), reason="cohere is not installed"
90+
)
91+
7992
skip_openai_lc_wrapper_test = pytest.mark.skipif(
8093
True, reason="OpenAI LC wrapper test is skipped"
8194
)

libs/kotaemon/tests/test_embedding_models.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
)
1515

1616
from .conftest import (
17+
skip_when_cohere_not_installed,
1718
skip_when_fastembed_not_installed,
1819
skip_when_sentence_bert_not_installed,
1920
)
@@ -132,6 +133,7 @@ def test_lchuggingface_embeddings(
132133
langchain_huggingface_embedding_call.assert_called()
133134

134135

136+
@skip_when_cohere_not_installed
135137
@patch(
136138
"langchain.embeddings.cohere.CohereEmbeddings.embed_documents",
137139
side_effect=lambda *args, **kwargs: [[1.0, 2.1, 3.2]],

libs/kotaemon/tests/test_vectorstore.py

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
import json
22
import os
33

4+
import pytest
5+
46
from kotaemon.base import DocumentWithEmbedding
57
from kotaemon.storages import (
68
ChromaVectorStore,
79
InMemoryVectorStore,
810
MilvusVectorStore,
11+
QdrantVectorStore,
912
SimpleFileVectorStore,
1013
)
1114

@@ -248,3 +251,118 @@ def test_save_load_delete(self, tmp_path):
248251
# reinit the milvus with the same collection name
249252
db2 = MilvusVectorStore(path=str(tmp_path), overwrite=False)
250253
assert db2.count() == 0, "delete collection function does not work correctly"
254+
255+
256+
class TestQdrantVectorStore:
257+
def test_add(self):
258+
from qdrant_client import QdrantClient
259+
260+
db = QdrantVectorStore(collection_name="test", client=QdrantClient(":memory:"))
261+
262+
embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]
263+
metadatas = [{"a": 1, "b": 2}, {"a": 3, "b": 4}]
264+
ids = [
265+
"0f0611b3-2d9c-4818-ab69-1f1c4cf66693",
266+
"90aba5d3-f4f8-47c6-bad9-5ea457442e07",
267+
]
268+
269+
output = db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)
270+
assert output == ids, "Expected output to be the same as ids"
271+
assert db.count() == 2, "Expected 2 added entries"
272+
273+
def test_add_from_docs(self, tmp_path):
274+
from qdrant_client import QdrantClient
275+
276+
db = QdrantVectorStore(collection_name="test", client=QdrantClient(":memory:"))
277+
278+
embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]
279+
metadatas = [{"a": 1, "b": 2}, {"a": 3, "b": 4}]
280+
documents = [
281+
DocumentWithEmbedding(embedding=embedding, metadata=metadata)
282+
for embedding, metadata in zip(embeddings, metadatas)
283+
]
284+
285+
output = db.add(documents)
286+
assert len(output) == 2, "Expected outputting 2 ids"
287+
assert db.count() == 2, "Expected 2 added entries"
288+
289+
def test_delete(self, tmp_path):
290+
from qdrant_client import QdrantClient
291+
292+
db = QdrantVectorStore(collection_name="test", client=QdrantClient(":memory:"))
293+
294+
embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]
295+
metadatas = [{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 5, "b": 6}]
296+
ids = [
297+
"0f0611b3-2d9c-4818-ab69-1f1c4cf66693",
298+
"90aba5d3-f4f8-47c6-bad9-5ea457442e07",
299+
"6bed07c3-d284-47a3-a711-c3f9186755b8",
300+
]
301+
302+
db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)
303+
assert db.count() == 3, "Expected 3 added entries"
304+
db.delete(
305+
ids=[
306+
"0f0611b3-2d9c-4818-ab69-1f1c4cf66693",
307+
"90aba5d3-f4f8-47c6-bad9-5ea457442e07",
308+
]
309+
)
310+
assert db.count() == 1, "Expected 1 remaining entry"
311+
db.delete(ids=["6bed07c3-d284-47a3-a711-c3f9186755b8"])
312+
assert db.count() == 0, "Expected 0 remaining entry"
313+
314+
def test_query(self, tmp_path):
315+
from qdrant_client import QdrantClient
316+
317+
db = QdrantVectorStore(collection_name="test", client=QdrantClient(":memory:"))
318+
319+
embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]
320+
metadatas = [{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 5, "b": 6}]
321+
ids = [
322+
"0f0611b3-2d9c-4818-ab69-1f1c4cf66693",
323+
"90aba5d3-f4f8-47c6-bad9-5ea457442e07",
324+
"6bed07c3-d284-47a3-a711-c3f9186755b8",
325+
]
326+
327+
db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)
328+
329+
_, sim, out_ids = db.query(embedding=[0.1, 0.2, 0.3], top_k=1)
330+
assert sim[0] - 1.0 < 1e-6
331+
assert out_ids == ["0f0611b3-2d9c-4818-ab69-1f1c4cf66693"]
332+
333+
_, _, out_ids = db.query(embedding=[0.4, 0.5, 0.6], top_k=1)
334+
assert out_ids == ["90aba5d3-f4f8-47c6-bad9-5ea457442e07"]
335+
336+
def test_save_load_delete(self, tmp_path):
337+
"""Test that save/load func behave correctly."""
338+
embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]
339+
metadatas = [{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 5, "b": 6}]
340+
ids = [
341+
"0f0611b3-2d9c-4818-ab69-1f1c4cf66693",
342+
"90aba5d3-f4f8-47c6-bad9-5ea457442e07",
343+
"6bed07c3-d284-47a3-a711-c3f9186755b8",
344+
]
345+
from qdrant_client import QdrantClient
346+
347+
db = QdrantVectorStore(
348+
collection_name="test", client=QdrantClient(path=tmp_path)
349+
)
350+
db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)
351+
del db
352+
353+
db2 = QdrantVectorStore(
354+
collection_name="test", client=QdrantClient(path=tmp_path)
355+
)
356+
assert db2.count() == 3
357+
358+
db2.drop()
359+
del db2
360+
361+
db2 = QdrantVectorStore(
362+
collection_name="test", client=QdrantClient(path=tmp_path)
363+
)
364+
365+
with pytest.raises(Exception):
366+
# Since no docs were added, the collection should not exist yet
367+
# and thus the count function should raise an exception
368+
db2.count()

0 commit comments

Comments
 (0)