BerriAI
diff --git a/‎docs/my-website/docs/completion/knowledgebase.md
Lines changed: 169 additions & 12 deletions b/‎docs/my-website/docs/completion/knowledgebase.md
Lines changed: 169 additions & 12 deletions
diff --git a/‎docs/my-website/img/kb.png
668 KB b/‎docs/my-website/img/kb.png
668 KB
diff --git a/‎docs/my-website/img/kb_2.png
126 KB b/‎docs/my-website/img/kb_2.png
126 KB
diff --git a/‎docs/my-website/img/kb_3.png
249 KB b/‎docs/my-website/img/kb_3.png
249 KB
diff --git a/‎docs/my-website/img/kb_4.png
1.14 MB b/‎docs/my-website/img/kb_4.png
1.14 MB
diff --git a/‎enterprise/proxy/vector_stores/endpoints.py
Lines changed: 5 additions & 1 deletion b/‎enterprise/proxy/vector_stores/endpoints.py
Lines changed: 5 additions & 1 deletion
diff --git a/‎litellm/proxy/proxy_config.yaml
Lines changed: 9 additions & 17 deletions b/‎litellm/proxy/proxy_config.yaml
Lines changed: 9 additions & 17 deletions
diff --git a/‎litellm/proxy/proxy_server.py
Lines changed: 3 additions & 3 deletions b/‎litellm/proxy/proxy_server.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎litellm/vector_stores/vector_store_registry.py
Lines changed: 2 additions & 2 deletions b/‎litellm/vector_stores/vector_store_registry.py
Lines changed: 2 additions & 2 deletions
@@ -1,31 +1,74 @@
-# Using Vector Stores (Knowledge Bases) with LiteLLM
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+import Image from '@theme/IdealImage';
+
+# Using Vector Stores (Knowledge Bases)
+
+<Image 
+  img={require('../../img/kb.png')}
+  style={{width: '100%', display: 'block', margin: '2rem auto'}}
+/>
+<p style={{textAlign: 'left', color: '#666'}}>
+  Use Vector Stores with any LiteLLM supported model
+</p>
+
 
-LiteLLM integrates with AWS Bedrock Knowledge Bases, allowing your models to access your organization's data for more accurate and contextually relevant responses.
+LiteLLM integrates with vector stores, allowing your models to access your organization's data for more accurate and contextually relevant responses.
+
+## Supported Vector Stores
+- [Bedrock Knowledge Bases](https://aws.amazon.com/bedrock/knowledge-bases/)
 
 ## Quick Start
 
-In order to use a Bedrock Knowledge Base with LiteLLM, you need to pass `vector_store_ids` as a parameter to the completion request. Where `vector_store_ids` is a list of Bedrock Knowledge Base IDs.
+In order to use a vector store with LiteLLM, you need to 
+
+- Initialize litellm.vector_store_registry
+- Pass tools with vector_store_ids to the completion request. Where `vector_store_ids` is a list of vector store ids you initialized in litellm.vector_store_registry
 
 ### LiteLLM Python SDK
 
+LiteLLM's allows you to use vector stores in the [OpenAI API spec](https://platform.openai.com/docs/api-reference/chat/create) by passing a tool with vector_store_ids you want to use
+
 ```python showLineNumbers title="Basic Bedrock Knowledge Base Usage"
 import os
 import litellm
 
+from litellm.vector_stores.vector_store_registry import VectorStoreRegistry, LiteLLM_ManagedVectorStore
+
+# Init vector store registry
+litellm.vector_store_registry = VectorStoreRegistry(
+    vector_stores=[
+        LiteLLM_ManagedVectorStore(
+            vector_store_id="T37J8R4WTM",
+            custom_llm_provider="bedrock"
+        )
+    ]
+)
+
 
 # Make a completion request with vector_store_ids parameter
 response = await litellm.acompletion(
     model="anthropic/claude-3-5-sonnet", 
     messages=[{"role": "user", "content": "What is litellm?"}],
-    vector_store_ids=["YOUR_KNOWLEDGE_BASE_ID"]  # e.g., "T37J8R4WTM"
+    tools=[
+        {
+            "type": "file_search",
+            "vector_store_ids": ["T37J8R4WTM"]
+        }
+    ],
 )
 
 print(response.choices[0].message.content)
 ```
 
 ### LiteLLM Proxy
 
-#### 1. Configure your proxy
+#### 1. Configure your vector_store_registry
+
+In order to use a vector store with LiteLLM, you need to configure your vector_store_registry. This tells litellm which vector stores to use and api provider to use for the vector store.
+
+<Tabs>
+<TabItem value="config-yaml" label="config.yaml">
 
 ```yaml showLineNumbers title="config.yaml"
 model_list:
@@ -34,12 +77,35 @@ model_list:
       model: anthropic/claude-3-5-sonnet
       api_key: os.environ/ANTHROPIC_API_KEY
 
+vector_store_registry:
+  - vector_store_name: "bedrock-litellm-website-knowledgebase"
+    litellm_params:
+      vector_store_id: "T37J8R4WTM"
+      custom_llm_provider: "bedrock"
+      vector_store_description: "Bedrock vector store for the Litellm website knowledgebase"
+      vector_store_metadata:
+        source: "https://www.litellm.com/docs"
+
 ```
 
-#### 2. Make a request with vector_store_ids parameter
+</TabItem>
+
+<TabItem value="litellm-ui" label="LiteLLM UI">
+
+On the LiteLLM UI, Navigate to Experimental > Vector Stores > Create Vector Store. On this page you can create a vector store with a name, vector store id and credentials.
+<Image 
+  img={require('../../img/kb_2.png')}
+  style={{width: '50%'}}
+/>
+
+
 
-import Tabs from '@theme/Tabs';
-import TabItem from '@theme/TabItem';
+
+</TabItem>
+
+</Tabs>
+
+#### 2. Make a request with vector_store_ids parameter
 
 <Tabs>
 <TabItem value="curl" label="Curl">
@@ -51,7 +117,12 @@ curl http://localhost:4000/v1/chat/completions \
   -d '{
     "model": "claude-3-5-sonnet",
     "messages": [{"role": "user", "content": "What is litellm?"}],
-    "vector_store_ids": ["YOUR_KNOWLEDGE_BASE_ID"]
+    "tools": [
+        {
+            "type": "file_search",
+            "vector_store_ids": ["T37J8R4WTM"]
+        }
+    ]
   }'
 ```
 
@@ -72,7 +143,12 @@ client = OpenAI(
 response = client.chat.completions.create(
     model="claude-3-5-sonnet",
     messages=[{"role": "user", "content": "What is litellm?"}],
-    extra_body={"vector_store_ids": ["YOUR_KNOWLEDGE_BASE_ID"]}
+    tools=[
+        {
+            "type": "file_search",
+            "vector_store_ids": ["T37J8R4WTM"]
+        }
+    ]
 )
 
 print(response.choices[0].message.content)
@@ -81,6 +157,87 @@ print(response.choices[0].message.content)
 </TabItem>
 </Tabs>
 
+
+
+
+## Advanced
+
+### Logging Vector Store Usage
+
+LiteLLM allows you to view your vector store usage in the LiteLLM UI on the `Logs` page.
+
+After completing a request with a vector store, navigate to the `Logs` page on LiteLLM. Here you should be able to see the query sent to the vector store and corresponding response with scores.
+
+<Image 
+  img={require('../../img/kb_4.png')}
+  style={{width: '80%'}}
+/>
+<p style={{textAlign: 'left', color: '#666'}}>
+  LiteLLM Logs Page: Vector Store Usage
+</p>
+
+
+### Listing available vector stores
+
+You can list all available vector stores using the /vector_store/list endpoint
+
+**Request:**
+```bash showLineNumbers title="List all available vector stores"
+curl -X GET "http://localhost:4000/vector_store/list" \
+  -H "Authorization: Bearer $LITELLM_API_KEY"
+```
+
+**Response:**
+
+The response will be a list of all vector stores that are available to use with LiteLLM.
+
+```json
+{
+  "object": "list",
+  "data": [
+    {
+      "vector_store_id": "T37J8R4WTM",
+      "custom_llm_provider": "bedrock",
+      "vector_store_name": "bedrock-litellm-website-knowledgebase",
+      "vector_store_description": "Bedrock vector store for the Litellm website knowledgebase",
+      "vector_store_metadata": {
+        "source": "https://www.litellm.com/docs"
+      },
+      "created_at": "2023-05-03T18:21:36.462Z",
+      "updated_at": "2023-05-03T18:21:36.462Z",
+      "litellm_credential_name": "bedrock_credentials"
+    }
+  ],
+  "total_count": 1,
+  "current_page": 1,
+  "total_pages": 1
+}
+```
+
+
+### Always on for a model
+
+**Use this if you want vector stores to be used by default for a specific model.**
+
+In this config, we add `vector_store_ids` to the claude-3-5-sonnet-with-vector-store model. This means that any request to the claude-3-5-sonnet-with-vector-store model will always use the vector store with the id `T37J8R4WTM` defined in the `vector_store_registry`.
+
+```yaml showLineNumbers title="Always on for a model"
+model_list:
+  - model_name: claude-3-5-sonnet-with-vector-store
+    litellm_params:
+      model: anthropic/claude-3-5-sonnet
+      vector_store_ids: ["T37J8R4WTM"]
+
+vector_store_registry:
+  - vector_store_name: "bedrock-litellm-website-knowledgebase"
+    litellm_params:
+      vector_store_id: "T37J8R4WTM"
+      custom_llm_provider: "bedrock"
+      vector_store_description: "Bedrock vector store for the Litellm website knowledgebase"
+      vector_store_metadata:
+        source: "https://www.litellm.com/docs"
+```
+
 ## How It Works
 
 LiteLLM implements a `BedrockKnowledgeBaseHook` that intercepts your completion requests for handling the integration with Bedrock Knowledge Bases.
@@ -91,7 +248,7 @@ LiteLLM implements a `BedrockKnowledgeBaseHook` that intercepts your completion
    - Adds the retrieved context to your conversation
    - Sends the augmented messages to the model
 
-### Example Transformation
+#### Example Transformation
 
 When you pass `vector_store_ids=["YOUR_KNOWLEDGE_BASE_ID"]`, your request flows through these steps:
 
@@ -137,4 +294,4 @@ When using the Knowledge Base integration with LiteLLM, you can include the foll
 
 | Parameter | Type | Description |
 |-----------|------|-------------|
-| `vector_store_ids` | List[str] | List of Bedrock Knowledge Base IDs to query |
+| `vector_store_ids` | List[str] | List of Knowledge Base IDs to query |
@@ -70,12 +70,16 @@ async def new_vector_store(
                 vector_store.get("vector_store_metadata")
             )
 
-        new_vector_store = (
+        _new_vector_store = (
             await prisma_client.db.litellm_managedvectorstorestable.create(
                 data=vector_store
             )
         )
 
+        new_vector_store: LiteLLM_ManagedVectorStore = LiteLLM_ManagedVectorStore(
+            _new_vector_store.model_dump()
+        )
+
         # Add vector store to registry
         if litellm.vector_store_registry is not None:
             litellm.vector_store_registry.add_vector_store_to_registry(
 
@@ -3,23 +3,15 @@ model_list:
     litellm_params:
       model: openai/gpt-4o
       api_key: os.environ/OPENAI_API_KEY
-  - model_name: claude-3-5-sonnet-with-vector-store
-    litellm_params:
-      model: anthropic/claude-3-5-sonnet-latest
-      api_key: os.environ/ANTHROPIC_API_KEY
-      vector_store_ids: ["T37J8R4WTM"]
 
-# vector_stores:
-#   - vector_store_name: "bedrock-litellm-website-knowledgebase"
-#     litellm_params:
-#       custom_llm_provider: "bedrock"
-#       vector_store_id: "T37J8R4WTM"
-#       vector_store_description: "Bedrock vector store for the Litellm website knowledgebase"
-#       vector_store_metadata:
-#         source: "https://www.litellm.com/docs"
-        
+vector_store_registry:
+  - vector_store_name: "bedrock-litellm-website-knowledgebase"
+    litellm_params:
+      vector_store_id: "T37J8R4WTM"
+      custom_llm_provider: "bedrock"
+      vector_store_description: "Bedrock vector store for the Litellm website knowledgebase"
+      vector_store_metadata:
+        source: "https://www.litellm.com/docs"
 
 general_settings:
-  alerting: ["webhook"]
-
-
+  store_prompts_in_spend_logs: true
@@ -2026,16 +2026,16 @@ def _init_non_llm_configs(self, config: dict):
             global_mcp_server_manager.load_servers_from_config(mcp_servers_config)
 
         ## VECTOR STORES
-        vector_stores_config = config.get("vector_stores", None)
-        if vector_stores_config:
+        vector_store_registry_config = config.get("vector_store_registry", None)
+        if vector_store_registry_config:
             from litellm.vector_stores.vector_store_registry import VectorStoreRegistry
 
             if litellm.vector_store_registry is None:
                 litellm.vector_store_registry = VectorStoreRegistry()
 
             # Load vector stores from config
             litellm.vector_store_registry.load_vector_stores_from_config(
-                vector_stores_config
+                vector_store_registry_config
             )
         pass
 
 
@@ -1,7 +1,7 @@
 # litellm/proxy/vector_stores/vector_store_registry.py
 import json
 from datetime import datetime, timezone
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast
+from typing import TYPE_CHECKING, Any, Dict, List, Optional
 
 from litellm._logging import verbose_logger
 from litellm.types.vector_stores import (
@@ -172,7 +172,7 @@ def add_vector_store_to_registry(self, vector_store: LiteLLM_ManagedVectorStore)
         for vector_store in self.vector_stores:
             if vector_store.get("vector_store_id") == vector_store_id:
                 return
-        self.vector_stores.append(cast(LiteLLM_ManagedVectorStore, vector_store))
+        self.vector_stores.append(vector_store)
 
     def delete_vector_store_from_registry(self, vector_store_id: str):
         """
Original file line number	Diff line number	Diff line change
`@@ -70,12 +70,16 @@ async def new_vector_store(`
`70`	`70`	`vector_store.get("vector_store_metadata")`
`71`	`71`	`)`
`72`	`72`
`73`		`- new_vector_store = (`
	`73`	`+ _new_vector_store = (`
`74`	`74`	`await prisma_client.db.litellm_managedvectorstorestable.create(`
`75`	`75`	`data=vector_store`
`76`	`76`	`)`
`77`	`77`	`)`
`78`	`78`
	`79`	`+ new_vector_store: LiteLLM_ManagedVectorStore = LiteLLM_ManagedVectorStore(`
	`80`	`+ _new_vector_store.model_dump()`
	`81`	`+ )`
	`82`	`+`
`79`	`83`	`# Add vector store to registry`
`80`	`84`	`if litellm.vector_store_registry is not None:`
`81`	`85`	`litellm.vector_store_registry.add_vector_store_to_registry(`