feat(py, claude agent sdk): Add wrapping for PostToolUseFailure hook (#2460)

angus-langchain · web-flow · commit 4e366c4685da · 2026-02-20T15:59:27.000-08:00
diff --git a/python/langsmith/integrations/claude_agent_sdk/_client.py b/python/langsmith/integrations/claude_agent_sdk/_client.py
@@ -8,7 +8,12 @@
 
 from langsmith.run_helpers import get_current_run_tree, trace
 
-from ._hooks import clear_active_tool_runs, post_tool_use_hook, pre_tool_use_hook
+from ._hooks import (
+    clear_active_tool_runs,
+    post_tool_use_failure_hook,
+    post_tool_use_hook,
+    pre_tool_use_hook,
+)
 from ._messages import (
     build_llm_input,
     extract_usage_from_result_message,
@@ -100,7 +105,6 @@ def begin_llm_run_from_assistant_messages(
         name=LLM_RUN_NAME,
         run_type="llm",
         inputs={"messages": inputs} if inputs else {},
-        outputs=outputs[-1] if len(outputs) == 1 else {"content": outputs},
         extra={"metadata": {"ls_model_name": model}} if model else {},
         start_time=datetime.fromtimestamp(start_time, tz=timezone.utc)
         if start_time
@@ -112,6 +116,9 @@ def begin_llm_run_from_assistant_messages(
     except Exception as e:
         logger.warning(f"Failed to post LLM run: {e}")
 
+    # Set outputs after posting so they are sent with end_time on the patch.
+    llm_run.outputs = outputs[-1] if len(outputs) == 1 else {"content": outputs}
+
     final_content = (
         {"content": flatten_content_blocks(last_msg.content), "role": "assistant"}
         if hasattr(last_msg, "content")
@@ -121,39 +128,30 @@ def begin_llm_run_from_assistant_messages(
 
 
 def _inject_tracing_hooks(options: Any) -> None:
-    """Inject LangSmith tracing hooks into ClaudeAgentOptions.
-
-    This adds PreToolUse and PostToolUse hooks to capture ALL tool calls
-    (built-in, external MCP, and SDK MCP). The hooks work across all LLM
-    providers (Anthropic, Vertex AI, Kimi, etc.) because they use explicit
-    tool_use_id correlation instead of relying on async context propagation.
-
-    Args:
-        options: ClaudeAgentOptions instance to modify
-    """
+    """Inject LangSmith tracing hooks into ClaudeAgentOptions."""
     if not hasattr(options, "hooks"):
         return
 
     # Initialize hooks dict if not present
     if options.hooks is None:
         options.hooks = {}
 
-    # Add PreToolUse hook if not already set
-    if "PreToolUse" not in options.hooks:
-        options.hooks["PreToolUse"] = []
-
-    # Add PostToolUse hook if not already set
-    if "PostToolUse" not in options.hooks:
-        options.hooks["PostToolUse"] = []
+    for event in ("PreToolUse", "PostToolUse", "PostToolUseFailure"):
+        if event not in options.hooks:
+            options.hooks[event] = []
 
     try:
         from claude_agent_sdk import HookMatcher  # type: ignore[import-not-found]
 
         langsmith_pre_matcher = HookMatcher(matcher=None, hooks=[pre_tool_use_hook])
         langsmith_post_matcher = HookMatcher(matcher=None, hooks=[post_tool_use_hook])
+        langsmith_failure_matcher = HookMatcher(
+            matcher=None, hooks=[post_tool_use_failure_hook]
+        )
 
         options.hooks["PreToolUse"].insert(0, langsmith_pre_matcher)
         options.hooks["PostToolUse"].insert(0, langsmith_post_matcher)
+        options.hooks["PostToolUseFailure"].insert(0, langsmith_failure_matcher)
 
         logger.debug("Injected LangSmith tracing hooks into ClaudeAgentOptions")
     except ImportError:
diff --git a/python/langsmith/integrations/claude_agent_sdk/_hooks.py b/python/langsmith/integrations/claude_agent_sdk/_hooks.py
@@ -1,8 +1,4 @@
-"""Hook-based tool tracing for Claude Agent SDK.
-
-This module provides hook handlers that traces tool calls by intercepting
-`PreToolUse` and `PostToolUse` events.
-"""
+"""Hook-based tool tracing for Claude Agent SDK."""
 
 import logging
 import time
@@ -23,7 +19,6 @@
 
 logger = logging.getLogger(__name__)
 
-# Storage for correlating PreToolUse and PostToolUse events
 # Key: tool_use_id, Value: (run_tree, start_time)
 _active_tool_runs: dict[str, tuple[Any, float]] = {}
 
@@ -177,6 +172,83 @@ async def post_tool_use_hook(
     return {}
 
 
+async def post_tool_use_failure_hook(
+    input_data: "HookInput",
+    tool_use_id: Optional[str],
+    context: "HookContext",
+) -> "HookJSONOutput":
+    """Trace tool execution when it fails.
+
+    This hook fires for built-in tool failures (Bash, Read, Write, etc.)
+    and is mutually exclusive with :func:`post_tool_use_hook` — when a
+    built-in tool fails, only ``PostToolUseFailure`` fires.
+
+    Args:
+        input_data: Contains ``tool_name``, ``tool_input``, ``error``,
+            and optionally ``is_interrupt``.
+        tool_use_id: Unique identifier for this tool invocation
+        context: Hook context (currently contains only signal)
+
+    Returns:
+        Hook output (empty dict)
+    """
+    if not tool_use_id:
+        logger.debug(
+            "PostToolUseFailure hook called without tool_use_id, skipping trace"
+        )
+        return {}
+
+    tool_name: str = str(input_data.get("tool_name", "unknown_tool"))
+    error: str = str(input_data.get("error", "Unknown error"))
+
+    # Check if this is a client-managed run (subagent or its tools)
+    run_tree = _client_managed_runs.pop(tool_use_id, None)
+    if run_tree:
+        try:
+            run_tree.end(
+                outputs={"error": error},
+                error=error,
+            )
+            run_tree.patch()
+        except Exception as e:
+            logger.warning(f"Failed to update client-managed run on failure: {e}")
+        return {}
+
+    try:
+        run_info = _active_tool_runs.pop(tool_use_id, None)
+        if not run_info:
+            logger.debug(
+                f"No matching PreToolUse found for failed {tool_name} "
+                f"(id={tool_use_id})"
+            )
+            return {}
+
+        tool_run, start_time = run_info
+
+        tool_run.end(
+            outputs={"error": error},
+            error=error,
+        )
+
+        try:
+            tool_run.patch()
+        except Exception as e:
+            logger.warning(f"Failed to patch failed tool run for {tool_name}: {e}")
+
+        duration_ms = (time.time() - start_time) * 1000
+        logger.debug(
+            f"Completed failed tool trace for {tool_name} "
+            f"(id={tool_use_id}, duration={duration_ms:.2f}ms, error={error!r})"
+        )
+
+    except Exception as e:
+        logger.warning(
+            f"Error in PostToolUseFailure hook for {tool_name}: {e}", exc_info=True
+        )
+
+    return {}
+
+
 def clear_active_tool_runs() -> None:
     """Clear all active tool runs.
 
diff --git a/python/tests/integration_tests/wrappers/test_claude_agent_sdk.py b/python/tests/integration_tests/wrappers/test_claude_agent_sdk.py
@@ -0,0 +1,47 @@
+"""Integration tests for Claude Agent SDK tracing."""
+
+import pytest
+
+try:
+    import claude_agent_sdk
+
+    CLAUDE_SDK_AVAILABLE = True
+except ImportError:
+    CLAUDE_SDK_AVAILABLE = False
+
+from langsmith.integrations.claude_agent_sdk._hooks import _active_tool_runs
+
+pytestmark = pytest.mark.skipif(
+    not CLAUDE_SDK_AVAILABLE, reason="Claude Agent SDK not installed"
+)
+
+
+@pytest.mark.asyncio
+async def test_tool_failure_creates_error_trace():
+    """A failing Bash command produces an errored tool run via PostToolUseFailure."""
+    from langsmith.integrations.claude_agent_sdk import configure_claude_agent_sdk
+
+    configure_claude_agent_sdk(name="test.tool_failure")
+
+    options = claude_agent_sdk.ClaudeAgentOptions(
+        permission_mode="bypassPermissions",
+        allowed_tools=["Bash"],
+        max_turns=2,
+    )
+
+    tool_result_blocks = []
+    async with claude_agent_sdk.ClaudeSDKClient(options=options) as client:
+        await client.query(
+            "Run this exact bash command: cat /tmp/__langsmith_test_nonexistent.txt"
+        )
+        async for msg in client.receive_response():
+            if type(msg).__name__ == "UserMessage" and hasattr(msg, "content"):
+                for block in msg.content:
+                    if type(block).__name__ == "ToolResultBlock":
+                        tool_result_blocks.append(block)
+
+    assert len(tool_result_blocks) >= 1
+    assert tool_result_blocks[0].is_error is True
+
+    # PostToolUseFailure hook should have consumed the run — no orphans
+    assert len(_active_tool_runs) == 0
diff --git a/python/tests/unit_tests/wrappers/test_claude_agent_sdk_hooks.py b/python/tests/unit_tests/wrappers/test_claude_agent_sdk_hooks.py
@@ -0,0 +1,138 @@
+"""Unit tests for Claude Agent SDK hooks."""
+
+import asyncio
+import sys
+from unittest.mock import MagicMock
+
+import pytest
+
+from langsmith.integrations.claude_agent_sdk._hooks import (
+    _active_tool_runs,
+    _client_managed_runs,
+    post_tool_use_failure_hook,
+    post_tool_use_hook,
+    pre_tool_use_hook,
+)
+from langsmith.run_trees import RunTree
+
+ERROR_MSG = "Exit code 1\ncat: /nonexistent: No such file or directory"
+
+
+@pytest.fixture(autouse=True)
+def _clear_state():
+    """Reset global hook state between tests."""
+    _active_tool_runs.clear()
+    _client_managed_runs.clear()
+    yield
+    _active_tool_runs.clear()
+    _client_managed_runs.clear()
+
+
+def _make_parent_run() -> RunTree:
+    """Create a detached RunTree suitable for parenting child runs."""
+    return RunTree(name="test-parent", run_type="chain", client=MagicMock())
+
+
+class TestToolUseSuccessFlow:
+    """PreToolUse creates a child run; PostToolUse ends it with output."""
+
+    @pytest.fixture(autouse=True)
+    def _set_parent(self):
+        from langsmith.integrations.claude_agent_sdk import _tools
+
+        _tools.set_parent_run_tree(_make_parent_run())
+        yield
+        _tools.clear_parent_run_tree()
+
+    def test_success_flow(self):
+        asyncio.run(
+            pre_tool_use_hook(
+                {"tool_name": "Bash", "tool_input": {"command": "echo hi"}},
+                "tu_1",
+                MagicMock(),
+            )
+        )
+
+        assert "tu_1" in _active_tool_runs
+        tool_run, _ = _active_tool_runs["tu_1"]
+        assert tool_run.name == "Bash"
+        assert tool_run.run_type == "tool"
+        assert tool_run.inputs == {"input": {"command": "echo hi"}}
+
+        asyncio.run(
+            post_tool_use_hook(
+                {
+                    "tool_name": "Bash",
+                    "tool_response": {"output": "hi", "is_error": False},
+                },
+                "tu_1",
+                MagicMock(),
+            )
+        )
+
+        assert "tu_1" not in _active_tool_runs
+        assert tool_run.outputs == {"output": "hi", "is_error": False}
+        assert tool_run.error is None
+
+
+class TestToolUseFailureFlow:
+    """PreToolUse creates a child run; PostToolUseFailure marks it as errored."""
+
+    @pytest.fixture(autouse=True)
+    def _set_parent(self):
+        from langsmith.integrations.claude_agent_sdk import _tools
+
+        _tools.set_parent_run_tree(_make_parent_run())
+        yield
+        _tools.clear_parent_run_tree()
+
+    def test_failure_flow(self):
+        asyncio.run(
+            pre_tool_use_hook(
+                {
+                    "tool_name": "Bash",
+                    "tool_input": {"command": "cat /nonexistent"},
+                },
+                "tu_2",
+                MagicMock(),
+            )
+        )
+
+        tool_run, _ = _active_tool_runs["tu_2"]
+
+        asyncio.run(
+            post_tool_use_failure_hook(
+                {
+                    "tool_name": "Bash",
+                    "tool_input": {"command": "cat /nonexistent"},
+                    "error": ERROR_MSG,
+                },
+                "tu_2",
+                MagicMock(),
+            )
+        )
+
+        assert "tu_2" not in _active_tool_runs
+        assert tool_run.error == ERROR_MSG
+        assert tool_run.outputs == {"error": ERROR_MSG}
+
+
+class TestInjectTracingHooks:
+    def test_injects_all_three_hooks(self):
+        from langsmith.integrations.claude_agent_sdk._client import (
+            _inject_tracing_hooks,
+        )
+
+        options = MagicMock()
+        options.hooks = None
+
+        mock_module = MagicMock()
+        sys.modules["claude_agent_sdk"] = mock_module
+        try:
+            _inject_tracing_hooks(options)
+        finally:
+            del sys.modules["claude_agent_sdk"]
+
+        for event in ("PreToolUse", "PostToolUse", "PostToolUseFailure"):
+            assert event in options.hooks
+            assert len(options.hooks[event]) == 1