Skip to content

Commit 4e366c4

Browse files
feat(py, claude agent sdk): Add wrapping for PostToolUseFailure hook (#2460)
1 parent 4e25507 commit 4e366c4

File tree

4 files changed

+280
-25
lines changed

4 files changed

+280
-25
lines changed

python/langsmith/integrations/claude_agent_sdk/_client.py

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,12 @@
88

99
from langsmith.run_helpers import get_current_run_tree, trace
1010

11-
from ._hooks import clear_active_tool_runs, post_tool_use_hook, pre_tool_use_hook
11+
from ._hooks import (
12+
clear_active_tool_runs,
13+
post_tool_use_failure_hook,
14+
post_tool_use_hook,
15+
pre_tool_use_hook,
16+
)
1217
from ._messages import (
1318
build_llm_input,
1419
extract_usage_from_result_message,
@@ -100,7 +105,6 @@ def begin_llm_run_from_assistant_messages(
100105
name=LLM_RUN_NAME,
101106
run_type="llm",
102107
inputs={"messages": inputs} if inputs else {},
103-
outputs=outputs[-1] if len(outputs) == 1 else {"content": outputs},
104108
extra={"metadata": {"ls_model_name": model}} if model else {},
105109
start_time=datetime.fromtimestamp(start_time, tz=timezone.utc)
106110
if start_time
@@ -112,6 +116,9 @@ def begin_llm_run_from_assistant_messages(
112116
except Exception as e:
113117
logger.warning(f"Failed to post LLM run: {e}")
114118

119+
# Set outputs after posting so they are sent with end_time on the patch.
120+
llm_run.outputs = outputs[-1] if len(outputs) == 1 else {"content": outputs}
121+
115122
final_content = (
116123
{"content": flatten_content_blocks(last_msg.content), "role": "assistant"}
117124
if hasattr(last_msg, "content")
@@ -121,39 +128,30 @@ def begin_llm_run_from_assistant_messages(
121128

122129

123130
def _inject_tracing_hooks(options: Any) -> None:
124-
"""Inject LangSmith tracing hooks into ClaudeAgentOptions.
125-
126-
This adds PreToolUse and PostToolUse hooks to capture ALL tool calls
127-
(built-in, external MCP, and SDK MCP). The hooks work across all LLM
128-
providers (Anthropic, Vertex AI, Kimi, etc.) because they use explicit
129-
tool_use_id correlation instead of relying on async context propagation.
130-
131-
Args:
132-
options: ClaudeAgentOptions instance to modify
133-
"""
131+
"""Inject LangSmith tracing hooks into ClaudeAgentOptions."""
134132
if not hasattr(options, "hooks"):
135133
return
136134

137135
# Initialize hooks dict if not present
138136
if options.hooks is None:
139137
options.hooks = {}
140138

141-
# Add PreToolUse hook if not already set
142-
if "PreToolUse" not in options.hooks:
143-
options.hooks["PreToolUse"] = []
144-
145-
# Add PostToolUse hook if not already set
146-
if "PostToolUse" not in options.hooks:
147-
options.hooks["PostToolUse"] = []
139+
for event in ("PreToolUse", "PostToolUse", "PostToolUseFailure"):
140+
if event not in options.hooks:
141+
options.hooks[event] = []
148142

149143
try:
150144
from claude_agent_sdk import HookMatcher # type: ignore[import-not-found]
151145

152146
langsmith_pre_matcher = HookMatcher(matcher=None, hooks=[pre_tool_use_hook])
153147
langsmith_post_matcher = HookMatcher(matcher=None, hooks=[post_tool_use_hook])
148+
langsmith_failure_matcher = HookMatcher(
149+
matcher=None, hooks=[post_tool_use_failure_hook]
150+
)
154151

155152
options.hooks["PreToolUse"].insert(0, langsmith_pre_matcher)
156153
options.hooks["PostToolUse"].insert(0, langsmith_post_matcher)
154+
options.hooks["PostToolUseFailure"].insert(0, langsmith_failure_matcher)
157155

158156
logger.debug("Injected LangSmith tracing hooks into ClaudeAgentOptions")
159157
except ImportError:

python/langsmith/integrations/claude_agent_sdk/_hooks.py

Lines changed: 78 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
1-
"""Hook-based tool tracing for Claude Agent SDK.
2-
3-
This module provides hook handlers that traces tool calls by intercepting
4-
`PreToolUse` and `PostToolUse` events.
5-
"""
1+
"""Hook-based tool tracing for Claude Agent SDK."""
62

73
import logging
84
import time
@@ -23,7 +19,6 @@
2319

2420
logger = logging.getLogger(__name__)
2521

26-
# Storage for correlating PreToolUse and PostToolUse events
2722
# Key: tool_use_id, Value: (run_tree, start_time)
2823
_active_tool_runs: dict[str, tuple[Any, float]] = {}
2924

@@ -177,6 +172,83 @@ async def post_tool_use_hook(
177172
return {}
178173

179174

175+
async def post_tool_use_failure_hook(
176+
input_data: "HookInput",
177+
tool_use_id: Optional[str],
178+
context: "HookContext",
179+
) -> "HookJSONOutput":
180+
"""Trace tool execution when it fails.
181+
182+
This hook fires for built-in tool failures (Bash, Read, Write, etc.)
183+
and is mutually exclusive with :func:`post_tool_use_hook` — when a
184+
built-in tool fails, only ``PostToolUseFailure`` fires.
185+
186+
Args:
187+
input_data: Contains ``tool_name``, ``tool_input``, ``error``,
188+
and optionally ``is_interrupt``.
189+
tool_use_id: Unique identifier for this tool invocation
190+
context: Hook context (currently contains only signal)
191+
192+
Returns:
193+
Hook output (empty dict)
194+
"""
195+
if not tool_use_id:
196+
logger.debug(
197+
"PostToolUseFailure hook called without tool_use_id, skipping trace"
198+
)
199+
return {}
200+
201+
tool_name: str = str(input_data.get("tool_name", "unknown_tool"))
202+
error: str = str(input_data.get("error", "Unknown error"))
203+
204+
# Check if this is a client-managed run (subagent or its tools)
205+
run_tree = _client_managed_runs.pop(tool_use_id, None)
206+
if run_tree:
207+
try:
208+
run_tree.end(
209+
outputs={"error": error},
210+
error=error,
211+
)
212+
run_tree.patch()
213+
except Exception as e:
214+
logger.warning(f"Failed to update client-managed run on failure: {e}")
215+
return {}
216+
217+
try:
218+
run_info = _active_tool_runs.pop(tool_use_id, None)
219+
if not run_info:
220+
logger.debug(
221+
f"No matching PreToolUse found for failed {tool_name} "
222+
f"(id={tool_use_id})"
223+
)
224+
return {}
225+
226+
tool_run, start_time = run_info
227+
228+
tool_run.end(
229+
outputs={"error": error},
230+
error=error,
231+
)
232+
233+
try:
234+
tool_run.patch()
235+
except Exception as e:
236+
logger.warning(f"Failed to patch failed tool run for {tool_name}: {e}")
237+
238+
duration_ms = (time.time() - start_time) * 1000
239+
logger.debug(
240+
f"Completed failed tool trace for {tool_name} "
241+
f"(id={tool_use_id}, duration={duration_ms:.2f}ms, error={error!r})"
242+
)
243+
244+
except Exception as e:
245+
logger.warning(
246+
f"Error in PostToolUseFailure hook for {tool_name}: {e}", exc_info=True
247+
)
248+
249+
return {}
250+
251+
180252
def clear_active_tool_runs() -> None:
181253
"""Clear all active tool runs.
182254
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
"""Integration tests for Claude Agent SDK tracing."""
2+
3+
import pytest
4+
5+
try:
6+
import claude_agent_sdk
7+
8+
CLAUDE_SDK_AVAILABLE = True
9+
except ImportError:
10+
CLAUDE_SDK_AVAILABLE = False
11+
12+
from langsmith.integrations.claude_agent_sdk._hooks import _active_tool_runs
13+
14+
pytestmark = pytest.mark.skipif(
15+
not CLAUDE_SDK_AVAILABLE, reason="Claude Agent SDK not installed"
16+
)
17+
18+
19+
@pytest.mark.asyncio
20+
async def test_tool_failure_creates_error_trace():
21+
"""A failing Bash command produces an errored tool run via PostToolUseFailure."""
22+
from langsmith.integrations.claude_agent_sdk import configure_claude_agent_sdk
23+
24+
configure_claude_agent_sdk(name="test.tool_failure")
25+
26+
options = claude_agent_sdk.ClaudeAgentOptions(
27+
permission_mode="bypassPermissions",
28+
allowed_tools=["Bash"],
29+
max_turns=2,
30+
)
31+
32+
tool_result_blocks = []
33+
async with claude_agent_sdk.ClaudeSDKClient(options=options) as client:
34+
await client.query(
35+
"Run this exact bash command: cat /tmp/__langsmith_test_nonexistent.txt"
36+
)
37+
async for msg in client.receive_response():
38+
if type(msg).__name__ == "UserMessage" and hasattr(msg, "content"):
39+
for block in msg.content:
40+
if type(block).__name__ == "ToolResultBlock":
41+
tool_result_blocks.append(block)
42+
43+
assert len(tool_result_blocks) >= 1
44+
assert tool_result_blocks[0].is_error is True
45+
46+
# PostToolUseFailure hook should have consumed the run — no orphans
47+
assert len(_active_tool_runs) == 0
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
"""Unit tests for Claude Agent SDK hooks."""
2+
3+
import asyncio
4+
import sys
5+
from unittest.mock import MagicMock
6+
7+
import pytest
8+
9+
from langsmith.integrations.claude_agent_sdk._hooks import (
10+
_active_tool_runs,
11+
_client_managed_runs,
12+
post_tool_use_failure_hook,
13+
post_tool_use_hook,
14+
pre_tool_use_hook,
15+
)
16+
from langsmith.run_trees import RunTree
17+
18+
ERROR_MSG = "Exit code 1\ncat: /nonexistent: No such file or directory"
19+
20+
21+
@pytest.fixture(autouse=True)
22+
def _clear_state():
23+
"""Reset global hook state between tests."""
24+
_active_tool_runs.clear()
25+
_client_managed_runs.clear()
26+
yield
27+
_active_tool_runs.clear()
28+
_client_managed_runs.clear()
29+
30+
31+
def _make_parent_run() -> RunTree:
32+
"""Create a detached RunTree suitable for parenting child runs."""
33+
return RunTree(name="test-parent", run_type="chain", client=MagicMock())
34+
35+
36+
class TestToolUseSuccessFlow:
37+
"""PreToolUse creates a child run; PostToolUse ends it with output."""
38+
39+
@pytest.fixture(autouse=True)
40+
def _set_parent(self):
41+
from langsmith.integrations.claude_agent_sdk import _tools
42+
43+
_tools.set_parent_run_tree(_make_parent_run())
44+
yield
45+
_tools.clear_parent_run_tree()
46+
47+
def test_success_flow(self):
48+
asyncio.run(
49+
pre_tool_use_hook(
50+
{"tool_name": "Bash", "tool_input": {"command": "echo hi"}},
51+
"tu_1",
52+
MagicMock(),
53+
)
54+
)
55+
56+
assert "tu_1" in _active_tool_runs
57+
tool_run, _ = _active_tool_runs["tu_1"]
58+
assert tool_run.name == "Bash"
59+
assert tool_run.run_type == "tool"
60+
assert tool_run.inputs == {"input": {"command": "echo hi"}}
61+
62+
asyncio.run(
63+
post_tool_use_hook(
64+
{
65+
"tool_name": "Bash",
66+
"tool_response": {"output": "hi", "is_error": False},
67+
},
68+
"tu_1",
69+
MagicMock(),
70+
)
71+
)
72+
73+
assert "tu_1" not in _active_tool_runs
74+
assert tool_run.outputs == {"output": "hi", "is_error": False}
75+
assert tool_run.error is None
76+
77+
78+
class TestToolUseFailureFlow:
79+
"""PreToolUse creates a child run; PostToolUseFailure marks it as errored."""
80+
81+
@pytest.fixture(autouse=True)
82+
def _set_parent(self):
83+
from langsmith.integrations.claude_agent_sdk import _tools
84+
85+
_tools.set_parent_run_tree(_make_parent_run())
86+
yield
87+
_tools.clear_parent_run_tree()
88+
89+
def test_failure_flow(self):
90+
asyncio.run(
91+
pre_tool_use_hook(
92+
{
93+
"tool_name": "Bash",
94+
"tool_input": {"command": "cat /nonexistent"},
95+
},
96+
"tu_2",
97+
MagicMock(),
98+
)
99+
)
100+
101+
tool_run, _ = _active_tool_runs["tu_2"]
102+
103+
asyncio.run(
104+
post_tool_use_failure_hook(
105+
{
106+
"tool_name": "Bash",
107+
"tool_input": {"command": "cat /nonexistent"},
108+
"error": ERROR_MSG,
109+
},
110+
"tu_2",
111+
MagicMock(),
112+
)
113+
)
114+
115+
assert "tu_2" not in _active_tool_runs
116+
assert tool_run.error == ERROR_MSG
117+
assert tool_run.outputs == {"error": ERROR_MSG}
118+
119+
120+
class TestInjectTracingHooks:
121+
def test_injects_all_three_hooks(self):
122+
from langsmith.integrations.claude_agent_sdk._client import (
123+
_inject_tracing_hooks,
124+
)
125+
126+
options = MagicMock()
127+
options.hooks = None
128+
129+
mock_module = MagicMock()
130+
sys.modules["claude_agent_sdk"] = mock_module
131+
try:
132+
_inject_tracing_hooks(options)
133+
finally:
134+
del sys.modules["claude_agent_sdk"]
135+
136+
for event in ("PreToolUse", "PostToolUse", "PostToolUseFailure"):
137+
assert event in options.hooks
138+
assert len(options.hooks[event]) == 1

0 commit comments

Comments
 (0)