Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit b8899f7

Browse files
committedMay 7, 2025
feat: Context handling in realtime
1 parent b205f83 commit b8899f7

File tree

4 files changed

+246
-26
lines changed

4 files changed

+246
-26
lines changed
 

‎docs/voice/pipeline.md

Lines changed: 124 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,14 @@ from agents.voice import (
103103
VoicePipelineConfig
104104
)
105105
from agents.voice.models.sdk_realtime import SDKRealtimeLLM
106+
from dataclasses import dataclass
107+
108+
# Define a simple context class for state management (optional)
109+
@dataclass
110+
class MyAppContext:
111+
"""Context for the voice assistant."""
112+
user_name: str = "User"
113+
interaction_count: int = 0
106114

107115
# Create the input, config, and model
108116
input_stream = StreamedAudioInput()
@@ -114,11 +122,15 @@ config = VoicePipelineConfig(
114122
)
115123
model = SDKRealtimeLLM(model_name="gpt-4o-realtime-preview")
116124

117-
# Create the pipeline with tools
125+
# Create an app context instance (optional)
126+
app_context = MyAppContext()
127+
128+
# Create the pipeline with tools and shared context
118129
pipeline = RealtimeVoicePipeline(
119130
model=model,
120131
tools=[get_weather, get_time],
121132
config=config,
133+
shared_context=app_context, # Optional: shared state for context-aware tools
122134
)
123135

124136
# Start the pipeline
@@ -147,6 +159,117 @@ while True:
147159
break
148160
```
149161

162+
### Using Shared Context with Tools
163+
164+
The `RealtimeVoicePipeline` supports passing a shared context object to tools, allowing them to access and modify shared state across multiple interactions. This is useful for building more complex voice applications that need to maintain state, such as:
165+
166+
- Tracking user preferences
167+
- Maintaining conversation history
168+
- Counting interactions
169+
- Storing user information
170+
171+
#### Setting up a shared context
172+
173+
To use shared context with tools:
174+
175+
1. Define a context class (typically a dataclass) to hold your application state
176+
2. Create an instance of this class
177+
3. Pass it to the `RealtimeVoicePipeline` using the `shared_context` parameter
178+
4. Create tools that accept a `RunContextWrapper[YourContextType]` as their first parameter
179+
180+
```python
181+
from dataclasses import dataclass
182+
from agents.run_context import RunContextWrapper
183+
from agents.tool import function_tool
184+
185+
# Define your context class
186+
@dataclass
187+
class MyAppContext:
188+
"""Context for the voice assistant."""
189+
user_name: str
190+
interaction_count: int = 0
191+
192+
# Create a context-aware tool
193+
@function_tool
194+
def greet_user_and_count(context: RunContextWrapper[MyAppContext]) -> str:
195+
"""Greets the user by name and counts interactions."""
196+
# Access and modify the context
197+
context.context.interaction_count += 1
198+
199+
return f"Hello {context.context.user_name}! This is interaction number {context.context.interaction_count}."
200+
201+
# Create another context-aware tool
202+
@function_tool
203+
def get_user_details(context: RunContextWrapper[MyAppContext]) -> dict:
204+
"""Gets user details from the context."""
205+
return {
206+
"user_name": context.context.user_name,
207+
"interaction_count": context.context.interaction_count
208+
}
209+
210+
# Create your application context
211+
app_context = MyAppContext(user_name="Alice", interaction_count=0)
212+
213+
# Create the pipeline with shared context
214+
pipeline = RealtimeVoicePipeline(
215+
model=model,
216+
tools=[get_weather, get_time, greet_user_and_count, get_user_details],
217+
config=config,
218+
shared_context=app_context, # Pass the context here
219+
)
220+
```
221+
222+
#### How it works
223+
224+
1. The `RealtimeVoicePipeline` passes the shared context to its internal `ToolExecutor`
225+
2. When the LLM calls a tool, the `ToolExecutor` checks if the tool's first parameter is named `context`
226+
3. If it is, the executor wraps your context object in a `RunContextWrapper` and passes it to the tool
227+
4. The tool can then access and modify your context object via `context.context`
228+
5. Since all tools share the same context object, changes made by one tool are visible to other tools in future calls
229+
230+
This mechanism allows your tools to maintain shared state across turns and interactions in your voice application, without needing to set up a separate state management system.
231+
232+
#### Context-Aware vs. Standard Tools
233+
234+
You can mix both context-aware and standard tools in the same `RealtimeVoicePipeline`:
235+
236+
```python
237+
# A standard tool (no context parameter)
238+
@function_tool
239+
def get_weather(city: str) -> dict:
240+
"""Gets the weather for the specified city."""
241+
return {"temperature": 72, "condition": "sunny"}
242+
243+
# A context-aware tool (has context parameter)
244+
@function_tool
245+
def update_user_preference(context: RunContextWrapper[MyAppContext], preference: str, value: str) -> str:
246+
"""Updates a user preference in the context."""
247+
if not hasattr(context.context, "preferences"):
248+
context.context.preferences = {}
249+
context.context.preferences[preference] = value
250+
return f"Updated {preference} to {value}"
251+
```
252+
253+
**When to use standard tools:**
254+
255+
- For stateless operations that don't need to remember information between calls
256+
- For simple lookups or calculations based solely on the input parameters
257+
- When integration with external APIs or services doesn't require user-specific state
258+
259+
**When to use context-aware tools:**
260+
261+
- When tools need to access or modify shared state
262+
- For personalization features that adapt to the user
263+
- To implement features that track usage or interactions
264+
- When information gathered in one tool call needs to be available to another tool
265+
266+
**Important notes:**
267+
268+
- The first parameter of a context-aware tool must be named `context` and should have a type annotation of `RunContextWrapper[YourContextType]`
269+
- Type hints are recommended but not required; the parameter name `context` is sufficient for the tool to be detected as context-aware
270+
- The actual object inside `context.context` will be the instance you passed to `shared_context` when creating the pipeline
271+
- All context-aware tools see the same context instance, so changes are immediately visible to all tools
272+
150273
### Turn Detection Modes
151274

152275
The realtime models can operate in different turn detection modes, controlled via the `turn_detection` setting:

‎examples/voice/realtime_assistant.py

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,15 @@
1818
on applying for access to the realtime API.
1919
2020
Usage:
21-
python continuous_realtime_assistant.py
21+
python realtime_assistant.py
2222
"""
2323

2424
import asyncio
2525
import logging
2626
import os
2727
import time
2828
from typing import Dict, Any
29+
from dataclasses import dataclass
2930

3031
import numpy as np
3132
import sounddevice as sd # For microphone and speaker I/O
@@ -42,6 +43,7 @@
4243
)
4344
from agents.tool import function_tool, Tool
4445
from agents.voice.models.sdk_realtime import SDKRealtimeLLM
46+
from agents.run_context import RunContextWrapper
4547

4648
# Import the new event types from our SDK
4749
from agents.voice.realtime.model import (
@@ -60,6 +62,15 @@
6062
logger = logging.getLogger("realtime_assistant")
6163

6264

65+
# Define a dataclass for our application context
66+
@dataclass
67+
class MyAppContext:
68+
"""A simple context for the realtime voice assistant example."""
69+
70+
user_name: str
71+
interaction_count: int = 0
72+
73+
6374
# Define some sample tools
6475
@function_tool
6576
def get_weather(city: str) -> Dict[str, Any]:
@@ -75,6 +86,37 @@ def get_time(timezone: str = "UTC") -> Dict[str, Any]:
7586
return {"time": time.strftime("%H:%M:%S", time.gmtime()), "timezone": timezone}
7687

7788

89+
# Define a context-aware tool
90+
@function_tool
91+
def greet_user_and_count(context: RunContextWrapper[MyAppContext]) -> str:
92+
"""Greets the user by name and counts interactions."""
93+
logger.info(f"greet_user_and_count called with context: {context}")
94+
# Increment the interaction count
95+
context.context.interaction_count += 1
96+
97+
logger.info(
98+
f"Greeting user: {context.context.user_name}, "
99+
f"Interaction count: {context.context.interaction_count}"
100+
)
101+
102+
return f"Hello {context.context.user_name}! This is interaction number {context.context.interaction_count}."
103+
104+
105+
# Another context-aware tool that reads but doesn't modify the context
106+
@function_tool
107+
def get_user_details(context: RunContextWrapper[MyAppContext]) -> Dict[str, Any]:
108+
"""Gets the user's details from the context."""
109+
logger.info(f"get_user_details called with context: {context}")
110+
111+
logger.info(
112+
f"Returning user details: name={context.context.user_name}, count={context.context.interaction_count}"
113+
)
114+
return {
115+
"user_name": context.context.user_name,
116+
"interaction_count": context.context.interaction_count,
117+
}
118+
119+
78120
# Get the OpenAI API key from environment variables
79121
api_key = os.environ.get("OPENAI_API_KEY")
80122
if not api_key:
@@ -117,18 +159,22 @@ async def main():
117159
realtime_settings={
118160
"turn_detection": "server_vad", # Use server-side VAD
119161
"assistant_voice": "alloy",
120-
"system_message": "You are a helpful assistant that responds concisely.",
162+
"system_message": "You are a helpful assistant that responds concisely. You can use the greet_user_and_count tool to greet the user by name and the get_user_details tool to retrieve information about the user.",
121163
# Enable server-side noise / echo reduction
122164
"input_audio_noise_reduction": {},
123165
}
124166
)
125167
input_stream = StreamedAudioInput()
126168

127-
# Create the realtime pipeline
169+
# Create our application context
170+
app_context = MyAppContext(user_name="Anurag", interaction_count=0)
171+
172+
# Create the realtime pipeline with shared context
128173
pipeline = RealtimeVoicePipeline(
129174
model=model,
130-
tools=[get_weather, get_time],
175+
tools=[get_weather, get_time, greet_user_and_count, get_user_details],
131176
config=config,
177+
shared_context=app_context, # Pass the context to the pipeline
132178
)
133179

134180
# Track events and errors
@@ -321,6 +367,9 @@ async def toggle_push_to_talk_simulation():
321367

322368
logger.info(f"Total events processed: {event_count}")
323369

370+
# Print the final interaction count from the context
371+
logger.info(f"Final interaction count: {app_context.interaction_count}")
372+
324373
# Provide troubleshooting information if needed
325374
if error_occurred or event_count <= 1: # <=1 because turn_started is an event
326375
logger.error(f"Error occurred: {error_occurred}")

‎src/agents/voice/pipeline_realtime.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ def __init__(
3737
model: RealtimeLLMModel | str | None = None,
3838
tools: Sequence[Tool] = (),
3939
config: VoicePipelineConfig | None = None,
40+
shared_context: Any | None = None,
4041
):
4142
"""Create a new real-time voice pipeline.
4243
@@ -45,6 +46,7 @@ def __init__(
4546
or a string identifier for a model from the provider.
4647
tools: A sequence of tools available to the LLM.
4748
config: The pipeline configuration. If not provided, a default will be used.
49+
shared_context: An optional context object that will be passed to tools when they are executed.
4850
"""
4951
if isinstance(model, str) or model is None:
5052
self._model_name_to_load: str | None = model
@@ -59,7 +61,8 @@ def __init__(
5961

6062
self._tools = tools
6163
self._config = config or VoicePipelineConfig()
62-
self._tool_executor = ToolExecutor(tools)
64+
self._shared_context = shared_context
65+
self._tool_executor = ToolExecutor(tools, shared_context=shared_context)
6366

6467
def _get_model(self) -> RealtimeLLMModel:
6568
"""Get the real-time LLM model to use."""
Lines changed: 65 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
from __future__ import annotations
22

33
import json
4+
import inspect
45
from collections.abc import Sequence
5-
from typing import Any
6+
from typing import Any, get_type_hints, get_origin, Dict, Set
67

78
from ...exceptions import AgentsException, UserError
89
from ...logger import logger
@@ -17,11 +18,43 @@
1718
class ToolExecutor:
1819
"""Executes tools based on RealtimeEventToolCall events."""
1920

20-
def __init__(self, tools: Sequence[Tool]):
21-
self._tool_map: dict[str, FunctionTool] = {}
21+
def __init__(self, tools: Sequence[Tool], shared_context: Any | None = None):
22+
self._tool_map: Dict[str, FunctionTool] = {}
23+
self._shared_context = shared_context
24+
# Explicitly specify which tools need context - we'll set all tools with first param named "context"
25+
self._context_aware_tools: Set[str] = set()
26+
2227
for tool in tools:
2328
if isinstance(tool, FunctionTool):
2429
self._tool_map[tool.name] = tool
30+
31+
# Debug - log all attributes of the FunctionTool
32+
logger.info(f"FunctionTool {tool.name} attributes: {dir(tool)}")
33+
34+
# Get the original function if available
35+
if hasattr(tool, "function"):
36+
func = tool.function
37+
logger.info(f"Found function attribute for {tool.name}: {func}")
38+
if callable(func):
39+
# Check if first parameter is named "context" - simpler approach
40+
sig = inspect.signature(func)
41+
params = list(sig.parameters.keys())
42+
logger.info(f"Function {tool.name} params: {params}")
43+
if params and params[0] == "context":
44+
self._context_aware_tools.add(tool.name)
45+
logger.info(f"Detected context-aware tool: {tool.name}")
46+
else:
47+
# Try to inspect on_invoke_tool to see if we can find more info
48+
logger.info(
49+
f"Tool {tool.name} has no 'function' attribute. Examining on_invoke_tool: {tool.on_invoke_tool}"
50+
)
51+
52+
# Special hardcoded handling - for now, let's explicitly mark these tools as context-aware
53+
if tool.name in ["greet_user_and_count", "get_user_details"]:
54+
logger.info(
55+
f"Explicitly marking {tool.name} as context-aware based on name"
56+
)
57+
self._context_aware_tools.add(tool.name)
2558
else:
2659
# For now, only FunctionTools are supported by this simple executor.
2760
# We can extend this later if other tool types (e.g. ComputerTool) are needed
@@ -30,6 +63,8 @@ def __init__(self, tools: Sequence[Tool]):
3063
f"Tool '{tool.name}' is not a FunctionTool and will be ignored by ToolExecutor."
3164
)
3265

66+
logger.info(f"Context-aware tools: {self._context_aware_tools}")
67+
3368
async def execute(self, tool_call_event: RealtimeEventToolCall) -> str:
3469
"""Executes the specified tool and returns its string output.
3570
@@ -42,32 +77,42 @@ async def execute(self, tool_call_event: RealtimeEventToolCall) -> str:
4277
Raises:
4378
AgentsException: If the tool is not found or fails during execution.
4479
"""
45-
tool = self._tool_map.get(tool_call_event.tool_name)
80+
tool_name = tool_call_event.tool_name
81+
tool = self._tool_map.get(tool_name)
82+
4683
if not tool:
47-
err_msg = f"Tool '{tool_call_event.tool_name}' not found in ToolExecutor."
84+
err_msg = f"Tool '{tool_name}' not found in ToolExecutor."
4885
logger.error(err_msg)
4986
# Return an error string that can be sent back to the LLM
50-
return json.dumps(
51-
{"error": err_msg, "tool_name": tool_call_event.tool_name}
52-
)
87+
return json.dumps({"error": err_msg, "tool_name": tool_name})
5388

5489
# Convert arguments dict to JSON string, as expected by on_invoke_tool
5590
try:
5691
arguments_json = json.dumps(tool_call_event.arguments)
5792
except TypeError as e: # pragma: no cover
58-
err_msg = f"Failed to serialize arguments for tool '{tool.name}': {e}"
93+
err_msg = f"Failed to serialize arguments for tool '{tool_name}': {e}"
5994
logger.error(f"{err_msg} Arguments: {tool_call_event.arguments}")
60-
return json.dumps({"error": err_msg, "tool_name": tool.name})
95+
return json.dumps({"error": err_msg, "tool_name": tool_name})
6196

62-
logger.info(f"Executing tool: {tool.name} with args: {arguments_json}")
97+
logger.info(f"Executing tool: {tool_name} with args: {arguments_json}")
6398

6499
try:
65-
# Create an empty RunContextWrapper for now, as this executor is lightweight.
66-
# If context-dependent tools are needed, this might need to evolve or use a proper Runner.
67-
# The `on_invoke_tool` is expected to handle JSON string input.
68-
tool_output = await tool.on_invoke_tool(
69-
RunContextWrapper(context=None), arguments_json
70-
)
100+
# Check if this is a context-aware tool
101+
needs_context = tool_name in self._context_aware_tools
102+
103+
# Execute the tool with or without context
104+
if needs_context:
105+
logger.info(
106+
f"Tool {tool_name} is context-aware, passing RunContextWrapper"
107+
)
108+
tool_output = await tool.on_invoke_tool(
109+
RunContextWrapper(context=self._shared_context), arguments_json
110+
)
111+
else:
112+
logger.info(
113+
f"Tool {tool_name} is not context-aware, invoking without RunContextWrapper"
114+
)
115+
tool_output = await tool.on_invoke_tool(None, arguments_json)
71116

72117
# Ensure the output is a string (as expected by OpenAI tool result content)
73118
if not isinstance(tool_output, str):
@@ -80,10 +125,10 @@ async def execute(self, tool_call_event: RealtimeEventToolCall) -> str:
80125
tool_output_str = tool_output
81126

82127
logger.info(
83-
f"Tool {tool.name} executed successfully. Output length: {len(tool_output_str)}"
128+
f"Tool {tool_name} executed successfully. Output length: {len(tool_output_str)}"
84129
)
85130
return tool_output_str
86131
except Exception as e: # pragma: no cover
87-
logger.error(f"Error executing tool '{tool.name}': {e}", exc_info=True)
132+
logger.error(f"Error executing tool '{tool_name}': {e}", exc_info=True)
88133
# Return an error string that can be sent back to the LLM
89-
return json.dumps({"error": str(e), "tool_name": tool.name})
134+
return json.dumps({"error": str(e), "tool_name": tool_name})

0 commit comments

Comments
 (0)
Please sign in to comment.