Skip to content

Commit 968e543

Browse files
Merge branch 'BerriAI:main' into gemini-embeddings
2 parents efa6142 + 75dbb86 commit 968e543

File tree

11 files changed

+533
-70
lines changed

11 files changed

+533
-70
lines changed

docs/my-website/docs/proxy/guardrails/pii_masking_v2.md

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import TabItem from '@theme/TabItem';
1313
| Supported Entity Types | All Presidio Entity Types |
1414
| Supported Actions | `MASK`, `BLOCK` |
1515
| Supported Modes | `pre_call`, `during_call`, `post_call`, `logging_only` |
16+
| Language Support | Configurable via `presidio_language` parameter (supports multiple languages including English, Spanish, German, etc.) |
1617

1718
## Deployment options
1819

@@ -48,6 +49,18 @@ Now select the entity types you want to mask. See the [supported actions here](#
4849
style={{width: '50%', display: 'block', margin: '0'}}
4950
/>
5051

52+
#### 1.3 Set Default Language (Optional)
53+
54+
You can also configure a default language for PII analysis using the `presidio_language` field in the UI. This sets the default language that will be used for all requests unless overridden by a per-request language setting.
55+
56+
**Supported language codes include:**
57+
- `en` - English (default)
58+
- `es` - Spanish
59+
- `de` - German
60+
61+
62+
If not specified, English (`en`) will be used as the default language.
63+
5164
</TabItem>
5265

5366

@@ -67,6 +80,7 @@ guardrails:
6780
litellm_params:
6881
guardrail: presidio # supported values: "aporia", "bedrock", "lakera", "presidio"
6982
mode: "pre_call"
83+
presidio_language: "en" # optional: set default language for PII analysis
7084
```
7185
7286
Set the following env vars
@@ -380,6 +394,86 @@ print(response)
380394

381395
</Tabs>
382396

397+
### Set default `language` in config.yaml
398+
399+
You can configure a default language for PII analysis in your YAML configuration using the `presidio_language` parameter. This language will be used for all requests unless overridden by a per-request language setting.
400+
401+
```yaml title="Default Language Configuration" showLineNumbers
402+
model_list:
403+
- model_name: gpt-3.5-turbo
404+
litellm_params:
405+
model: openai/gpt-3.5-turbo
406+
api_key: os.environ/OPENAI_API_KEY
407+
408+
guardrails:
409+
- guardrail_name: "presidio-german"
410+
litellm_params:
411+
guardrail: presidio
412+
mode: "pre_call"
413+
presidio_language: "de" # Default to German for PII analysis
414+
pii_entities_config:
415+
CREDIT_CARD: "MASK"
416+
EMAIL_ADDRESS: "MASK"
417+
PERSON: "MASK"
418+
419+
- guardrail_name: "presidio-spanish"
420+
litellm_params:
421+
guardrail: presidio
422+
mode: "pre_call"
423+
presidio_language: "es" # Default to Spanish for PII analysis
424+
pii_entities_config:
425+
CREDIT_CARD: "MASK"
426+
PHONE_NUMBER: "MASK"
427+
```
428+
429+
#### Supported Language Codes
430+
431+
Presidio supports multiple languages for PII detection. Common language codes include:
432+
433+
- `en` - English (default)
434+
- `es` - Spanish
435+
- `de` - German
436+
437+
For a complete list of supported languages, refer to the [Presidio documentation](https://microsoft.github.io/presidio/analyzer/languages/).
438+
439+
#### Language Precedence
440+
441+
The language setting follows this precedence order:
442+
443+
1. **Per-request language** (via `guardrail_config.language`) - highest priority
444+
2. **YAML config language** (via `presidio_language`) - medium priority
445+
3. **Default language** (`en`) - lowest priority
446+
447+
**Example with mixed languages:**
448+
449+
```yaml title="Mixed Language Configuration" showLineNumbers
450+
guardrails:
451+
- guardrail_name: "presidio-multilingual"
452+
litellm_params:
453+
guardrail: presidio
454+
mode: "pre_call"
455+
presidio_language: "de" # Default to German
456+
pii_entities_config:
457+
CREDIT_CARD: "MASK"
458+
PERSON: "MASK"
459+
```
460+
461+
```shell title="Override with per-request language" showLineNumbers
462+
curl http://localhost:4000/chat/completions \
463+
-H "Content-Type: application/json" \
464+
-H "Authorization: Bearer sk-1234" \
465+
-d '{
466+
"model": "gpt-3.5-turbo",
467+
"messages": [
468+
{"role": "user", "content": "Mi tarjeta de crédito es 4111-1111-1111-1111"}
469+
],
470+
"guardrails": ["presidio-multilingual"],
471+
"guardrail_config": {"language": "es"}
472+
}'
473+
```
474+
475+
In this example, the request will use Spanish (`es`) for PII detection even though the guardrail is configured with German (`de`) as the default language.
476+
383477
### Output parsing
384478

385479

litellm/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -588,6 +588,7 @@
588588

589589
open_ai_embedding_models: List = ["text-embedding-ada-002"]
590590
cohere_embedding_models: List = [
591+
"embed-v4.0",
591592
"embed-english-v3.0",
592593
"embed-english-light-v3.0",
593594
"embed-multilingual-v3.0",

litellm/proxy/guardrails/guardrail_hooks/presidio.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ def __init__(
6969
presidio_ad_hoc_recognizers: Optional[str] = None,
7070
logging_only: Optional[bool] = None,
7171
pii_entities_config: Optional[Dict[PiiEntityType, PiiAction]] = None,
72+
presidio_language: Optional[str] = None,
7273
**kwargs,
7374
):
7475
if logging_only is True:
@@ -83,6 +84,7 @@ def __init__(
8384
self.pii_entities_config: Dict[PiiEntityType, PiiAction] = (
8485
pii_entities_config or {}
8586
)
87+
self.presidio_language = presidio_language or "en"
8688
if mock_testing is True: # for testing purposes only
8789
return
8890

@@ -161,7 +163,7 @@ def _get_presidio_analyze_request_payload(
161163
"""
162164
analyze_payload: PresidioAnalyzeRequest = PresidioAnalyzeRequest(
163165
text=text,
164-
language="en",
166+
language=self.presidio_language,
165167
)
166168
##################################################################
167169
###### Check if user has configured any params for this guardrail

litellm/proxy/guardrails/guardrail_initializers.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ def initialize_presidio(litellm_params: LitellmParams, guardrail: Guardrail):
113113
pii_entities_config=litellm_params.pii_entities_config,
114114
presidio_analyzer_api_base=litellm_params.presidio_analyzer_api_base,
115115
presidio_anonymizer_api_base=litellm_params.presidio_anonymizer_api_base,
116+
presidio_language=litellm_params.presidio_language,
116117
)
117118
litellm.logging_callback_manager.add_litellm_callback(_presidio_callback)
118119

@@ -125,6 +126,7 @@ def initialize_presidio(litellm_params: LitellmParams, guardrail: Guardrail):
125126
default_on=litellm_params.default_on,
126127
presidio_analyzer_api_base=litellm_params.presidio_analyzer_api_base,
127128
presidio_anonymizer_api_base=litellm_params.presidio_anonymizer_api_base,
129+
presidio_language=litellm_params.presidio_language,
128130
)
129131
litellm.logging_callback_manager.add_litellm_callback(_success_callback)
130132

litellm/proxy/management_endpoints/scim/scim_v2.py

Lines changed: 20 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
ScimTransformations,
3333
)
3434
from litellm.proxy.management_endpoints.team_endpoints import new_team
35+
from litellm.proxy.utils import handle_exception_on_proxy
3536
from litellm.types.proxy.management_endpoints.scim_v2 import *
3637

3738
scim_router = APIRouter(
@@ -81,13 +82,13 @@ async def get_users(
8182
where_conditions["user_email"] = email
8283

8384
# Get users from database
84-
users: List[LiteLLM_UserTable] = (
85-
await prisma_client.db.litellm_usertable.find_many(
86-
where=where_conditions,
87-
skip=(startIndex - 1),
88-
take=count,
89-
order={"created_at": "desc"},
90-
)
85+
users: List[
86+
LiteLLM_UserTable
87+
] = await prisma_client.db.litellm_usertable.find_many(
88+
where=where_conditions,
89+
skip=(startIndex - 1),
90+
take=count,
91+
order={"created_at": "desc"},
9192
)
9293

9394
# Get total count for pagination
@@ -111,9 +112,7 @@ async def get_users(
111112
)
112113

113114
except Exception as e:
114-
raise HTTPException(
115-
status_code=500, detail={"error": f"Error retrieving users: {str(e)}"}
116-
)
115+
raise handle_exception_on_proxy(e)
117116

118117

119118
@scim_router.get(
@@ -147,12 +146,8 @@ async def get_user(
147146
scim_user = await ScimTransformations.transform_litellm_user_to_scim_user(user)
148147
return scim_user
149148

150-
except HTTPException:
151-
raise
152149
except Exception as e:
153-
raise HTTPException(
154-
status_code=500, detail={"error": f"Error retrieving user: {str(e)}"}
155-
)
150+
raise handle_exception_on_proxy(e)
156151

157152

158153
@scim_router.post(
@@ -213,13 +208,8 @@ async def create_user(
213208
user=created_user
214209
)
215210
return scim_user
216-
217-
except HTTPException:
218-
raise
219211
except Exception as e:
220-
raise HTTPException(
221-
status_code=500, detail={"error": f"Error creating user: {str(e)}"}
222-
)
212+
raise handle_exception_on_proxy(e)
223213

224214

225215
@scim_router.put(
@@ -241,12 +231,8 @@ async def update_user(
241231
raise HTTPException(status_code=500, detail={"error": "No database connected"})
242232
try:
243233
return None
244-
except HTTPException:
245-
raise
246234
except Exception as e:
247-
raise HTTPException(
248-
status_code=500, detail={"error": f"Error updating user: {str(e)}"}
249-
)
235+
raise handle_exception_on_proxy(e)
250236

251237

252238
@scim_router.delete(
@@ -299,13 +285,8 @@ async def delete_user(
299285
await prisma_client.db.litellm_usertable.delete(where={"user_id": user_id})
300286

301287
return Response(status_code=204)
302-
303-
except HTTPException:
304-
raise
305288
except Exception as e:
306-
raise HTTPException(
307-
status_code=500, detail={"error": f"Error deleting user: {str(e)}"}
308-
)
289+
raise handle_exception_on_proxy(e)
309290

310291

311292
@scim_router.patch(
@@ -341,12 +322,8 @@ async def patch_user(
341322

342323
return None
343324

344-
except HTTPException:
345-
raise
346325
except Exception as e:
347-
raise HTTPException(
348-
status_code=500, detail={"error": f"Error patching user: {str(e)}"}
349-
)
326+
raise handle_exception_on_proxy(e)
350327

351328

352329
# Group Endpoints
@@ -431,9 +408,7 @@ async def get_groups(
431408
)
432409

433410
except Exception as e:
434-
raise HTTPException(
435-
status_code=500, detail={"error": f"Error retrieving groups: {str(e)}"}
436-
)
411+
raise handle_exception_on_proxy(e)
437412

438413

439414
@scim_router.get(
@@ -469,12 +444,8 @@ async def get_group(
469444
)
470445
return scim_group
471446

472-
except HTTPException:
473-
raise
474447
except Exception as e:
475-
raise HTTPException(
476-
status_code=500, detail={"error": f"Error retrieving group: {str(e)}"}
477-
)
448+
raise handle_exception_on_proxy(e)
478449

479450

480451
@scim_router.post(
@@ -535,12 +506,8 @@ async def create_group(
535506
created_team
536507
)
537508
return scim_group
538-
except HTTPException:
539-
raise
540509
except Exception as e:
541-
raise HTTPException(
542-
status_code=500, detail={"error": f"Error creating group: {str(e)}"}
543-
)
510+
raise handle_exception_on_proxy(e)
544511

545512

546513
@scim_router.put(
@@ -655,12 +622,8 @@ async def update_group(
655622
},
656623
)
657624

658-
except HTTPException:
659-
raise
660625
except Exception as e:
661-
raise HTTPException(
662-
status_code=500, detail={"error": f"Error updating group: {str(e)}"}
663-
)
626+
raise handle_exception_on_proxy(e)
664627

665628

666629
@scim_router.delete(
@@ -709,12 +672,8 @@ async def delete_group(
709672

710673
return Response(status_code=204)
711674

712-
except HTTPException:
713-
raise
714675
except Exception as e:
715-
raise HTTPException(
716-
status_code=500, detail={"error": f"Error deleting group: {str(e)}"}
717-
)
676+
raise handle_exception_on_proxy(e)
718677

719678

720679
@scim_router.patch(
@@ -749,9 +708,5 @@ async def patch_group(
749708
detail={"error": f"Group not found with ID: {group_id}"},
750709
)
751710
return None
752-
except HTTPException:
753-
raise
754711
except Exception as e:
755-
raise HTTPException(
756-
status_code=500, detail={"error": f"Error patching group: {str(e)}"}
757-
)
712+
raise handle_exception_on_proxy(e)

litellm/types/guardrails.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,10 @@ class PresidioPresidioConfigModelUserInterface(BaseModel):
234234
# extra param to let the ui know this is a boolean
235235
json_schema_extra={"ui_type": GuardrailParamUITypes.BOOL},
236236
)
237+
presidio_language: Optional[str] = Field(
238+
default="en",
239+
description="Language code for Presidio PII analysis (e.g., 'en', 'de', 'es', 'fr')",
240+
)
237241

238242

239243
class PresidioConfigModel(PresidioPresidioConfigModelUserInterface):

0 commit comments

Comments
 (0)