Skip to content

Commit d677a6b

Browse files
[Bug Fix] Bedrock Guardrail - Don't raise exception on intervene action (#11875)
* Refactor Bedrock guardrails to handle BLOCKED vs ANONYMIZED actions * Enhance Bedrock guardrail masking with improved anonymization logic * fix _should_raise_guardrail_blocked_exception * test_bedrock_guardrail_uses_masked_output_without_masking_flags * fix linting error --------- Co-authored-by: Cursor Agent <[email protected]>
1 parent d4ba490 commit d677a6b

File tree

2 files changed

+341
-41
lines changed

2 files changed

+341
-41
lines changed

litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py

Lines changed: 98 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,9 @@ def convert_to_bedrock_format(
7979

8080
if messages:
8181
for message in messages:
82-
message_text_content: Optional[
83-
List[str]
84-
] = self.get_content_for_message(message=message)
82+
message_text_content: Optional[List[str]] = (
83+
self.get_content_for_message(message=message)
84+
)
8585
if message_text_content is None:
8686
continue
8787
for text_content in message_text_content:
@@ -241,7 +241,7 @@ def _should_raise_guardrail_blocked_exception(
241241
self, response: BedrockGuardrailResponse
242242
) -> bool:
243243
"""
244-
By default always raise an exception when a guardrail intervention is detected.
244+
Only raise exception for "BLOCKED" actions, not for "ANONYMIZED" actions.
245245
246246
If `self.mask_request_content` or `self.mask_response_content` is set to `True`, then use the output from the guardrail to mask the request or response content.
247247
"""
@@ -250,11 +250,68 @@ def _should_raise_guardrail_blocked_exception(
250250
if self.mask_request_content or self.mask_response_content:
251251
return False
252252

253-
# if intervention, return True
254-
if response.get("action") == "GUARDRAIL_INTERVENED":
255-
return True
256-
257253
# if no intervention, return False
254+
if response.get("action") != "GUARDRAIL_INTERVENED":
255+
return False
256+
257+
# Check assessments to determine if any actions were BLOCKED (vs ANONYMIZED)
258+
assessments = response.get("assessments", [])
259+
if not assessments:
260+
return False
261+
262+
for assessment in assessments:
263+
# Check topic policy
264+
topic_policy = assessment.get("topicPolicy")
265+
if topic_policy:
266+
topics = topic_policy.get("topics", [])
267+
for topic in topics:
268+
if topic.get("action") == "BLOCKED":
269+
return True
270+
271+
# Check content policy
272+
content_policy = assessment.get("contentPolicy")
273+
if content_policy:
274+
filters = content_policy.get("filters", [])
275+
for filter_item in filters:
276+
if filter_item.get("action") == "BLOCKED":
277+
return True
278+
279+
# Check word policy
280+
word_policy = assessment.get("wordPolicy")
281+
if word_policy:
282+
custom_words = word_policy.get("customWords", [])
283+
for custom_word in custom_words:
284+
if custom_word.get("action") == "BLOCKED":
285+
return True
286+
managed_words = word_policy.get("managedWordLists", [])
287+
for managed_word in managed_words:
288+
if managed_word.get("action") == "BLOCKED":
289+
return True
290+
291+
# Check sensitive information policy
292+
sensitive_info_policy = assessment.get("sensitiveInformationPolicy")
293+
if sensitive_info_policy:
294+
pii_entities = sensitive_info_policy.get("piiEntities", [])
295+
if pii_entities:
296+
for pii_entity in pii_entities:
297+
if pii_entity.get("action") == "BLOCKED":
298+
return True
299+
regexes = sensitive_info_policy.get("regexes", [])
300+
if regexes:
301+
for regex in regexes:
302+
if regex.get("action") == "BLOCKED":
303+
return True
304+
305+
# Check contextual grounding policy
306+
contextual_grounding_policy = assessment.get("contextualGroundingPolicy")
307+
if contextual_grounding_policy:
308+
grounding_filters = contextual_grounding_policy.get("filters", [])
309+
for filter_item in grounding_filters:
310+
if filter_item.get("action") == "BLOCKED":
311+
return True
312+
313+
# If we got here, intervention occurred but no BLOCKED actions found
314+
# This means all actions were ANONYMIZED or NONE, so don't raise exception
258315
return False
259316

260317
@log_guardrail_information
@@ -300,11 +357,11 @@ async def async_pre_call_hook(
300357
#########################################################
301358
########## 2. Update the messages with the guardrail response ##########
302359
#########################################################
303-
data[
304-
"messages"
305-
] = self._update_messages_with_updated_bedrock_guardrail_response(
306-
messages=new_messages,
307-
bedrock_guardrail_response=bedrock_guardrail_response,
360+
data["messages"] = (
361+
self._update_messages_with_updated_bedrock_guardrail_response(
362+
messages=new_messages,
363+
bedrock_guardrail_response=bedrock_guardrail_response,
364+
)
308365
)
309366

310367
#########################################################
@@ -354,11 +411,11 @@ async def async_moderation_hook(
354411
#########################################################
355412
########## 2. Update the messages with the guardrail response ##########
356413
#########################################################
357-
data[
358-
"messages"
359-
] = self._update_messages_with_updated_bedrock_guardrail_response(
360-
messages=new_messages,
361-
bedrock_guardrail_response=bedrock_guardrail_response,
414+
data["messages"] = (
415+
self._update_messages_with_updated_bedrock_guardrail_response(
416+
messages=new_messages,
417+
bedrock_guardrail_response=bedrock_guardrail_response,
418+
)
362419
)
363420

364421
#########################################################
@@ -408,11 +465,11 @@ async def async_post_call_success_hook(
408465
#########################################################
409466
########## 2. Update the messages with the guardrail response ##########
410467
#########################################################
411-
data[
412-
"messages"
413-
] = self._update_messages_with_updated_bedrock_guardrail_response(
414-
messages=new_messages,
415-
bedrock_guardrail_response=bedrock_guardrail_response,
468+
data["messages"] = (
469+
self._update_messages_with_updated_bedrock_guardrail_response(
470+
messages=new_messages,
471+
bedrock_guardrail_response=bedrock_guardrail_response,
472+
)
416473
)
417474

418475
#########################################################
@@ -440,21 +497,29 @@ def _update_messages_with_updated_bedrock_guardrail_response(
440497
Returns:
441498
List of messages with content masked according to guardrail response
442499
"""
443-
# Skip processing if masking is not enabled
444-
if not (self.mask_request_content or self.mask_response_content):
445-
return messages
446-
447500
# Get masked texts from guardrail response
448501
masked_texts = self._extract_masked_texts_from_response(
449502
bedrock_guardrail_response
450503
)
451-
if not masked_texts:
452-
return messages
453504

454-
# Apply masking to messages using index tracking
455-
return self._apply_masking_to_messages(
456-
messages=messages, masked_texts=masked_texts
457-
)
505+
# If guardrail provided masked output, use it regardless of masking flags
506+
# because the guardrail has already determined this content needs anonymization
507+
if masked_texts:
508+
verbose_proxy_logger.debug(
509+
"Bedrock guardrail provided masked output, applying to messages"
510+
)
511+
return self._apply_masking_to_messages(
512+
messages=messages, masked_texts=masked_texts
513+
)
514+
515+
# If masking is enabled but no masked texts available, still try to apply
516+
# (this maintains backward compatibility for edge cases)
517+
if self.mask_request_content or self.mask_response_content:
518+
verbose_proxy_logger.debug(
519+
"Masking enabled but no masked output from guardrail, returning original messages"
520+
)
521+
522+
return messages
458523

459524
async def async_post_call_streaming_iterator_hook(
460525
self,

0 commit comments

Comments
 (0)