superagent-ai
diff --git a/‎docs/content/docs/sdk/sdk/python.mdx‎
Lines changed: 27 additions & 0 deletions b/‎docs/content/docs/sdk/sdk/python.mdx‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎docs/content/docs/sdk/sdk/typescript.mdx‎
Lines changed: 27 additions & 0 deletions b/‎docs/content/docs/sdk/sdk/typescript.mdx‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎sdk/python/pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎sdk/python/pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sdk/python/src/safety_agent/client.py‎
Lines changed: 15 additions & 7 deletions b/‎sdk/python/src/safety_agent/client.py‎
Lines changed: 15 additions & 7 deletions
diff --git a/‎sdk/python/src/safety_agent/providers/__init__.py‎
Lines changed: 39 additions & 1 deletion b/‎sdk/python/src/safety_agent/providers/__init__.py‎
Lines changed: 39 additions & 1 deletion
diff --git a/‎sdk/python/src/safety_agent/types.py‎
Lines changed: 9 additions & 0 deletions b/‎sdk/python/src/safety_agent/types.py‎
Lines changed: 9 additions & 0 deletions
@@ -70,6 +70,30 @@ client = create_client(
 
 The fallback URL can also be set via the `SUPERAGENT_FALLBACK_URL` environment variable.
 
+### Model Fallback
+
+When using third-party providers (e.g., Google Gemini), transient errors like 503 (high demand) or 429 (rate limited) can cause requests to fail. The SDK supports automatic model fallback: if the primary model returns a retryable error, the request is re-issued to a backup model you specify.
+
+```python
+result = await client.guard(
+    input="user message to analyze",
+    model="google/gemini-2.5-flash-lite",
+    fallback_model="google/gemini-2.5-pro"
+)
+```
+
+If the primary model succeeds, `fallback_model` is never called. If it returns a retryable status code (429, 500, 502, or 503), the SDK automatically retries with the fallback model. The fallback model can be from a different provider entirely:
+
+```python
+result = await client.guard(
+    input="user message to analyze",
+    model="google/gemini-2.5-flash-lite",
+    fallback_model="openai/gpt-4o-mini"
+)
+```
+
+The `fallback_model` option is available on `guard()`, `redact()`, and `scan()`. The fallback model gets a single attempt — there is no recursive fallback chain.
+
 ---
 
 ## Guard
@@ -92,6 +116,7 @@ if result.classification == "block":
 |--------|------|----------|---------|-------------|
 | `input` | `str \| bytes` | Yes | - | The input to analyze |
 | `model` | `str` | No | `superagent/guard-1.7b` | Model in `provider/model` format |
+| `fallback_model` | `str` | No | - | Backup model used when primary returns 429/500/502/503 |
 | `system_prompt` | `str` | No | - | Custom system prompt |
 | `chunk_size` | `int` | No | `8000` | Characters per chunk (0 to disable) |
 
@@ -148,6 +173,7 @@ print(result.redacted)
 |--------|------|----------|---------|-------------|
 | `input` | `str` | Yes | - | The text to redact |
 | `model` | `str` | Yes | - | Model in `provider/model` format |
+| `fallback_model` | `str` | No | - | Backup model used when primary returns 429/500/502/503 |
 | `entities` | `list[str]` | No | Default PII | Entity types to redact |
 | `rewrite` | `bool` | No | `False` | Rewrite contextually instead of placeholders |
 
@@ -216,6 +242,7 @@ print(f"Cost: ${response.usage.cost:.4f}")
 | `repo` | `str` | Yes | - | Git repository URL (https:// or git@) |
 | `branch` | `str` | No | Default branch | Branch, tag, or commit to checkout |
 | `model` | `str` | No | `anthropic/claude-sonnet-4-5` | Model for OpenCode analysis |
+| `fallback_model` | `str` | No | - | Backup model used when primary returns 429/500/502/503 |
 
 ### Response
 
 
@@ -70,6 +70,30 @@ const client = createClient({
 
 The fallback URL can also be set via the `SUPERAGENT_FALLBACK_URL` environment variable.
 
+### Model Fallback
+
+When using third-party providers (e.g., Google Gemini), transient errors like 503 (high demand) or 429 (rate limited) can cause requests to fail. The SDK supports automatic model fallback: if the primary model returns a retryable error, the request is re-issued to a backup model you specify.
+
+```typescript
+const result = await client.guard({
+  input: "user message to analyze",
+  model: "google/gemini-2.5-flash-lite",
+  fallbackModel: "google/gemini-2.5-pro"
+});
+```
+
+If the primary model succeeds, `fallbackModel` is never called. If it returns a retryable status code (429, 500, 502, or 503), the SDK automatically retries with the fallback model. The fallback model can be from a different provider entirely:
+
+```typescript
+const result = await client.guard({
+  input: "user message to analyze",
+  model: "google/gemini-2.5-flash-lite",
+  fallbackModel: "openai/gpt-4o-mini"
+});
+```
+
+The `fallbackModel` option is available on `guard()`, `redact()`, and `scan()`. The fallback model gets a single attempt — there is no recursive fallback chain.
+
 ---
 
 ## Guard
@@ -95,6 +119,7 @@ if (result.classification === "block") {
 |--------|------|----------|---------|-------------|
 | `input` | `string \| Blob \| URL` | Yes | - | The input to analyze |
 | `model` | `string` | No | `superagent/guard-1.7b` | Model in `provider/model` format |
+| `fallbackModel` | `string` | No | - | Backup model used when primary returns 429/500/502/503 |
 | `systemPrompt` | `string` | No | - | Custom system prompt |
 | `chunkSize` | `number` | No | `8000` | Characters per chunk (0 to disable) |
 
@@ -155,6 +180,7 @@ console.log(result.redacted);
 |--------|------|----------|---------|-------------|
 | `input` | `string` | Yes | - | The text to redact |
 | `model` | `string` | Yes | - | Model in `provider/model` format |
+| `fallbackModel` | `string` | No | - | Backup model used when primary returns 429/500/502/503 |
 | `entities` | `string[]` | No | Default PII | Entity types to redact |
 | `rewrite` | `boolean` | No | `false` | Rewrite contextually instead of placeholders |
 
@@ -225,6 +251,7 @@ console.log(`Cost: $${response.usage.cost.toFixed(4)}`);
 | `repo` | `string` | Yes | - | Git repository URL (https:// or git@) |
 | `branch` | `string` | No | Default branch | Branch, tag, or commit to checkout |
 | `model` | `string` | No | `anthropic/claude-sonnet-4-5` | Model for OpenCode analysis |
+| `fallbackModel` | `string` | No | - | Backup model used when primary returns 429/500/502/503 |
 
 ### Response
 
 
@@ -1,6 +1,6 @@
 [project]
 name = "safety-agent"
-version = "0.1.5"
+version = "0.1.7-rc1"
 description = "A lightweight Python guardrail SDK for content safety"
 readme = "README.md"
 license = "MIT"
 
@@ -241,6 +241,7 @@ async def _guard_single_text(
         input_text: str,
         system_prompt: str | None,
         model: str,
+        fallback_model: str | None = None,
     ) -> GuardResponse:
         """Guard a single chunk of text input (internal method)."""
         is_superagent = model.startswith("superagent/")
@@ -265,7 +266,7 @@ async def _guard_single_text(
             GUARD_RESPONSE_FORMAT if _supports_structured_output(model) else None
         )
         response = await call_provider(
-            model, messages, response_format, self._fallback_options
+            model, messages, response_format, self._fallback_options, fallback_model
         )
         content = response.choices[0].message.content
 
@@ -292,6 +293,7 @@ async def _guard_image(
         processed: ProcessedInput,
         system_prompt: str | None,
         model: str,
+        fallback_model: str | None = None,
     ) -> GuardResponse:
         """Guard an image input using vision model (internal method)."""
         if not is_vision_model(model):
@@ -323,7 +325,7 @@ async def _guard_image(
             GUARD_RESPONSE_FORMAT if _supports_structured_output(model) else None
         )
         response = await call_provider(
-            model, messages, response_format, self._fallback_options
+            model, messages, response_format, self._fallback_options, fallback_model
         )
         content = response.choices[0].message.content
 
@@ -350,6 +352,7 @@ async def guard(
         input: GuardInput | None = None,
         *,
         model: str | None = None,
+        fallback_model: str | None = None,
         system_prompt: str | None = None,
         chunk_size: int = 8000,
         # Also accept GuardOptions-style kwargs
@@ -369,6 +372,7 @@ async def guard(
         Args:
             input: The input to analyze - text, URL, or bytes
             model: Model in "provider/model" format. Defaults to superagent/guard-1.7b
+            fallback_model: Fallback model when the primary returns a retryable error (429/500/502/503)
             system_prompt: Optional custom system prompt
             chunk_size: Characters per chunk. Default: 8000. Set to 0 to disable chunking.
 
@@ -380,6 +384,7 @@ async def guard(
             options = input
             input = options.input
             model = model or options.model
+            fallback_model = fallback_model or options.fallback_model
             system_prompt = system_prompt or options.system_prompt
             chunk_size = options.chunk_size
 
@@ -401,7 +406,7 @@ async def guard(
 
         # Handle image inputs with vision models
         if processed.type == "image":
-            result = await self._guard_image(processed, system_prompt, model)
+            result = await self._guard_image(processed, system_prompt, model, fallback_model)
             self._post_usage(result.usage)
             return result
 
@@ -424,7 +429,7 @@ async def guard(
             # Analyze each page in parallel
             results = await asyncio.gather(
                 *[
-                    self._guard_single_text(page_text, system_prompt, model)
+                    self._guard_single_text(page_text, system_prompt, model, fallback_model)
                     for page_text in non_empty_pages
                 ]
             )
@@ -439,15 +444,15 @@ async def guard(
 
         # Skip chunking if disabled (chunk_size=0) or input is small enough
         if chunk_size == 0 or len(text) <= chunk_size:
-            result = await self._guard_single_text(text, system_prompt, model)
+            result = await self._guard_single_text(text, system_prompt, model, fallback_model)
             self._post_usage(result.usage)
             return result
 
         # Chunk and process in parallel
         chunks = _chunk_text(text, chunk_size)
         results = await asyncio.gather(
             *[
-                self._guard_single_text(chunk, system_prompt, model)
+                self._guard_single_text(chunk, system_prompt, model, fallback_model)
                 for chunk in chunks
             ]
         )
@@ -462,6 +467,7 @@ async def redact(
         input: str | None = None,
         *,
         model: str | None = None,
+        fallback_model: str | None = None,
         entities: list[str] | None = None,
         rewrite: bool = False,
         # Also accept RedactOptions-style kwargs
@@ -473,6 +479,7 @@ async def redact(
         Args:
             input: The input text to redact
             model: Model in "provider/model" format, e.g. "openai/gpt-4o"
+            fallback_model: Fallback model when the primary returns a retryable error (429/500/502/503)
             entities: Optional list of entity types to redact (overrides default entities)
             rewrite: When true, rewrites text contextually instead of using placeholders
 
@@ -484,6 +491,7 @@ async def redact(
             options = input
             input = options.input
             model = model or options.model
+            fallback_model = fallback_model or options.fallback_model
             entities = entities or options.entities
             rewrite = options.rewrite
 
@@ -509,7 +517,7 @@ async def redact(
             REDACT_RESPONSE_FORMAT if _supports_structured_output(model) else None
         )
         response = await call_provider(
-            model, messages, response_format, self._fallback_options
+            model, messages, response_format, self._fallback_options, fallback_model
         )
         content = response.choices[0].message.content
 
 
@@ -99,13 +99,21 @@ def get_provider(provider_name: str) -> Any:
     return provider
 
 
+RETRYABLE_STATUS_CODES = {429, 500, 502, 503}
+
+
 async def call_provider(
     model_string: str,
     messages: list[ChatMessage],
     response_format: ResponseFormat | None = None,
     fallback_options: FallbackOptions | None = None,
+    fallback_model: str | None = None,
 ) -> AnalysisResponse:
-    """Call an LLM provider with the given messages."""
+    """Call an LLM provider with the given messages.
+
+    If ``fallback_model`` is set and the primary model returns a retryable
+    status code, the request is re-issued against the fallback model.
+    """
     parsed = parse_model(model_string)
     provider = get_provider(parsed.provider)
 
@@ -159,6 +167,21 @@ async def call_provider(
                 )
 
                 if response.status_code != 200:
+                    if (
+                        fallback_model
+                        and response.status_code in RETRYABLE_STATUS_CODES
+                    ):
+                        print(
+                            f"Primary model {model_string} failed "
+                            f"({response.status_code}), falling back to "
+                            f"{fallback_model}"
+                        )
+                        return await call_provider(
+                            fallback_model,
+                            messages,
+                            response_format,
+                            fallback_options,
+                        )
                     raise RuntimeError(
                         f"Provider API error ({response.status_code}): {response.text}"
                     )
@@ -200,6 +223,21 @@ async def call_provider(
         )
 
         if response.status_code != 200:
+            if (
+                fallback_model
+                and response.status_code in RETRYABLE_STATUS_CODES
+            ):
+                print(
+                    f"Primary model {model_string} failed "
+                    f"({response.status_code}), falling back to "
+                    f"{fallback_model}"
+                )
+                return await call_provider(
+                    fallback_model,
+                    messages,
+                    response_format,
+                    fallback_options,
+                )
             raise RuntimeError(
                 f"Provider API error ({response.status_code}): {response.text}"
             )
 
@@ -126,6 +126,9 @@ class GuardOptions:
     model: SupportedModel | None = None
     """Model in 'provider/model' format. Defaults to superagent/guard-1.7b."""
 
+    fallback_model: SupportedModel | None = None
+    """Fallback model to use when the primary model returns a retryable error (429/500/502/503)."""
+
     system_prompt: str | None = None
     """Optional custom system prompt that replaces the default guard prompt."""
 
@@ -165,6 +168,9 @@ class RedactOptions:
     model: SupportedModel
     """Model in 'provider/model' format, e.g. 'openai/gpt-4o'."""
 
+    fallback_model: SupportedModel | None = None
+    """Fallback model to use when the primary model returns a retryable error (429/500/502/503)."""
+
     entities: list[str] | None = None
     """Optional list of entity types to redact (overrides default entities)."""
 
@@ -289,6 +295,9 @@ class ScanOptions:
     model: str = "anthropic/claude-sonnet-4-5"
     """Model for OpenCode to use (provider/model format)."""
 
+    fallback_model: str | None = None
+    """Fallback model to use when the primary model returns a retryable error (429/500/502/503)."""
+
 
 @dataclass
 class ScanUsage: