BerriAI
diff --git a/‎docs/my-website/docs/providers/elevenlabs.md
Lines changed: 231 additions & 0 deletions b/‎docs/my-website/docs/providers/elevenlabs.md
Lines changed: 231 additions & 0 deletions
diff --git a/‎docs/my-website/sidebars.js
Lines changed: 1 addition & 0 deletions b/‎docs/my-website/sidebars.js
Lines changed: 1 addition & 0 deletions
diff --git a/‎litellm/__init__.py
Lines changed: 5 additions & 0 deletions b/‎litellm/__init__.py
Lines changed: 5 additions & 0 deletions
diff --git a/‎litellm/litellm_core_utils/audio_utils/utils.py
Lines changed: 100 additions & 0 deletions b/‎litellm/litellm_core_utils/audio_utils/utils.py
Lines changed: 100 additions & 0 deletions
@@ -0,0 +1,231 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# ElevenLabs
+
+ElevenLabs provides high-quality AI voice technology, including speech-to-text capabilities through their transcription API.
+
+| Property | Details |
+|----------|---------|
+| Description | ElevenLabs offers advanced AI voice technology with speech-to-text transcription capabilities that support multiple languages and speaker diarization. |
+| Provider Route on LiteLLM | `elevenlabs/` |
+| Provider Doc | [ElevenLabs API ↗](https://elevenlabs.io/docs/api-reference) |
+| Supported Endpoints | `/audio/transcriptions` |
+
+## Quick Start
+
+### LiteLLM Python SDK
+
+<Tabs>
+<TabItem value="basic" label="Basic Usage">
+
+```python showLineNumbers title="Basic audio transcription with ElevenLabs"
+import litellm
+
+# Transcribe audio file
+with open("audio.mp3", "rb") as audio_file:
+    response = litellm.transcription(
+        model="elevenlabs/scribe_v1",
+        file=audio_file,
+        api_key="your-elevenlabs-api-key"  # or set ELEVENLABS_API_KEY env var
+    )
+
+print(response.text)
+```
+
+</TabItem>
+
+<TabItem value="advanced" label="Advanced Features">
+
+```python showLineNumbers title="Audio transcription with advanced features"
+import litellm
+
+# Transcribe with speaker diarization and language specification
+with open("audio.wav", "rb") as audio_file:
+    response = litellm.transcription(
+        model="elevenlabs/scribe_v1",
+        file=audio_file,
+        language="en",           # Language hint (maps to language_code)
+        temperature=0.3,         # Control randomness in transcription
+        diarize=True,           # Enable speaker diarization
+        api_key="your-elevenlabs-api-key"
+    )
+
+print(f"Transcription: {response.text}")
+print(f"Language: {response.language}")
+
+# Access word-level timestamps if available
+if hasattr(response, 'words') and response.words:
+    for word_info in response.words:
+        print(f"Word: {word_info['word']}, Start: {word_info['start']}, End: {word_info['end']}")
+```
+
+</TabItem>
+
+<TabItem value="async" label="Async Usage">
+
+```python showLineNumbers title="Async audio transcription"
+import litellm
+import asyncio
+
+async def transcribe_audio():
+    with open("audio.mp3", "rb") as audio_file:
+        response = await litellm.atranscription(
+            model="elevenlabs/scribe_v1",
+            file=audio_file,
+            api_key="your-elevenlabs-api-key"
+        )
+    
+    return response.text
+
+# Run async transcription
+result = asyncio.run(transcribe_audio())
+print(result)
+```
+
+</TabItem>
+</Tabs>
+
+### LiteLLM Proxy
+
+#### 1. Configure your proxy
+
+<Tabs>
+<TabItem value="config-yaml" label="config.yaml">
+
+```yaml showLineNumbers title="ElevenLabs configuration in config.yaml"
+model_list:
+  - model_name: elevenlabs-transcription
+    litellm_params:
+      model: elevenlabs/scribe_v1
+      api_key: os.environ/ELEVENLABS_API_KEY
+
+general_settings:
+  master_key: your-master-key
+```
+
+</TabItem>
+
+<TabItem value="env-vars" label="Environment Variables">
+
+```bash showLineNumbers title="Required environment variables"
+export ELEVENLABS_API_KEY="your-elevenlabs-api-key"
+export LITELLM_MASTER_KEY="your-master-key"
+```
+
+</TabItem>
+</Tabs>
+
+#### 2. Start the proxy
+
+```bash showLineNumbers title="Start LiteLLM proxy server"
+litellm --config config.yaml
+
+# Proxy will be available at http://localhost:4000
+```
+
+#### 3. Make transcription requests
+
+<Tabs>
+<TabItem value="curl" label="Curl">
+
+```bash showLineNumbers title="Audio transcription with curl"
+curl http://localhost:4000/v1/audio/transcriptions \
+  -H "Authorization: Bearer $LITELLM_API_KEY" \
+  -H "Content-Type: multipart/form-data" \
+  -F file="@audio.mp3" \
+  -F model="elevenlabs-transcription" \
+  -F language="en" \
+  -F temperature="0.3"
+```
+
+</TabItem>
+
+<TabItem value="openai-sdk" label="OpenAI Python SDK">
+
+```python showLineNumbers title="Using OpenAI SDK with LiteLLM proxy"
+from openai import OpenAI
+
+# Initialize client with your LiteLLM proxy URL
+client = OpenAI(
+    base_url="http://localhost:4000",
+    api_key="your-litellm-api-key"
+)
+
+# Transcribe audio file
+with open("audio.mp3", "rb") as audio_file:
+    response = client.audio.transcriptions.create(
+        model="elevenlabs-transcription",
+        file=audio_file,
+        language="en",
+        temperature=0.3,
+        # ElevenLabs-specific parameters
+        diarize=True,
+        speaker_boost=True,
+        custom_vocabulary="technical,AI,machine learning"
+    )
+
+print(response.text)
+```
+
+</TabItem>
+
+<TabItem value="javascript" label="JavaScript/Node.js">
+
+```javascript showLineNumbers title="Audio transcription with JavaScript"
+import OpenAI from 'openai';
+import fs from 'fs';
+
+const openai = new OpenAI({
+  baseURL: 'http://localhost:4000',
+  apiKey: 'your-litellm-api-key'
+});
+
+async function transcribeAudio() {
+  const response = await openai.audio.transcriptions.create({
+    file: fs.createReadStream('audio.mp3'),
+    model: 'elevenlabs-transcription',
+    language: 'en',
+    temperature: 0.3,
+    diarize: true,
+    speaker_boost: true
+  });
+
+  console.log(response.text);
+}
+
+transcribeAudio();
+```
+
+</TabItem>
+</Tabs>
+
+## Response Format
+
+ElevenLabs returns transcription responses in OpenAI-compatible format:
+
+```json showLineNumbers title="Example transcription response"
+{
+  "text": "Hello, this is a sample transcription with multiple speakers.",
+  "task": "transcribe",
+  "language": "en",
+  "words": [
+    {
+      "word": "Hello",
+      "start": 0.0,
+      "end": 0.5
+    },
+    {
+      "word": "this",
+      "start": 0.5,
+      "end": 0.8
+    }
+  ]
+}
+```
+
+### Common Issues
+
+1. **Invalid API Key**: Ensure `ELEVENLABS_API_KEY` is set correctly
+
+
@@ -415,6 +415,7 @@ const sidebars = {
         "providers/groq",
         "providers/github",
         "providers/deepseek",
+        "providers/elevenlabs",
         "providers/fireworks_ai",
         "providers/clarifai",
         "providers/vllm",
 
@@ -478,6 +478,7 @@ def identify(event_details):
 nebius_models: List = []
 nebius_embedding_models: List = []
 deepgram_models: List = []
+elevenlabs_models: List = []
 
 
 def is_bedrock_pricing_only_model(key: str) -> bool:
@@ -651,6 +652,8 @@ def add_known_models():
             featherless_ai_models.append(key)
         elif value.get("litellm_provider") == "deepgram":
             deepgram_models.append(key)
+        elif value.get("litellm_provider") == "elevenlabs":
+            elevenlabs_models.append(key)
 
 
 add_known_models()
@@ -733,6 +736,7 @@ def add_known_models():
     + featherless_ai_models
     + nscale_models
     + deepgram_models
+    + elevenlabs_models
 )
 
 model_list_set = set(model_list)
@@ -797,6 +801,7 @@ def add_known_models():
     "nscale": nscale_models,
     "featherless_ai": featherless_ai_models,
     "deepgram": deepgram_models,
+    "elevenlabs": elevenlabs_models,
 }
 
 # mapping for those models which have larger equivalents
 
@@ -3,10 +3,110 @@
 """
 
 import os
+from dataclasses import dataclass
 
+from litellm.types.files import get_file_mime_type_from_extension
 from litellm.types.utils import FileTypes
 
 
+@dataclass
+class ProcessedAudioFile:
+    """
+    Processed audio file data.
+    
+    Attributes:
+        file_content: The binary content of the audio file
+        filename: The filename (extracted or generated)
+        content_type: The MIME type of the audio file
+    """
+    file_content: bytes
+    filename: str
+    content_type: str
+
+
+def process_audio_file(audio_file: FileTypes) -> ProcessedAudioFile:
+    """
+    Common utility function to process audio files for audio transcription APIs.
+    
+    Handles various input types:
+    - File paths (str, os.PathLike)
+    - Raw bytes/bytearray
+    - Tuples (filename, content, optional content_type)
+    - File-like objects with read() method
+    
+    Args:
+        audio_file: The audio file input in various formats
+        
+    Returns:
+        ProcessedAudioFile: Structured data with file content, filename, and content type
+        
+    Raises:
+        ValueError: If audio_file type is unsupported or content cannot be extracted
+    """
+    file_content = None
+    filename = None
+    
+    if isinstance(audio_file, (bytes, bytearray)):
+        # Raw bytes
+        filename = 'audio.wav'
+        file_content = bytes(audio_file)
+    elif isinstance(audio_file, (str, os.PathLike)):
+        # File path or PathLike
+        file_path = str(audio_file)
+        with open(file_path, 'rb') as f:
+            file_content = f.read()
+        filename = file_path.split('/')[-1]
+    elif isinstance(audio_file, tuple):
+        # Tuple format: (filename, content, content_type) or (filename, content)
+        if len(audio_file) >= 2:
+            filename = audio_file[0] or 'audio.wav'
+            content = audio_file[1]
+            if isinstance(content, (bytes, bytearray)):
+                file_content = bytes(content)
+            elif isinstance(content, (str, os.PathLike)):
+                # File path or PathLike
+                with open(str(content), 'rb') as f:
+                    file_content = f.read()
+            elif hasattr(content, 'read'):
+                # File-like object
+                file_content = content.read()
+                if hasattr(content, 'seek'):
+                    content.seek(0)
+            else:
+                raise ValueError(f"Unsupported content type in tuple: {type(content)}")
+        else:
+            raise ValueError("Tuple must have at least 2 elements: (filename, content)")
+    elif hasattr(audio_file, 'read') and not isinstance(audio_file, (str, bytes, bytearray, tuple, os.PathLike)):
+        # File-like object (IO) - check this after all other types
+        filename = getattr(audio_file, 'name', 'audio.wav')
+        file_content = audio_file.read()  # type: ignore
+        # Reset file pointer if possible
+        if hasattr(audio_file, 'seek'):
+            audio_file.seek(0)  # type: ignore
+    else:
+        raise ValueError(f"Unsupported audio_file type: {type(audio_file)}")
+
+    if file_content is None:
+        raise ValueError("Could not extract file content from audio_file")
+
+    # Determine content type using LiteLLM's file type utilities
+    content_type = 'audio/wav'  # Default fallback
+    if filename:
+        try:
+            # Extract extension from filename
+            extension = filename.split('.')[-1].lower() if '.' in filename else 'wav'
+            content_type = get_file_mime_type_from_extension(extension)
+        except ValueError:
+            # If extension is not recognized, fallback to audio/wav
+            content_type = 'audio/wav'
+    
+    return ProcessedAudioFile(
+        file_content=file_content,
+        filename=filename,
+        content_type=content_type
+    )
+
+
 def get_audio_file_name(file_obj: FileTypes) -> str:
     """
     Safely get the name of a file-like object or return its string representation.