diff --git a/agents/executive_assistant/__init__.py b/agents/executive_assistant/__init__.py new file mode 100644 index 000000000..a3f3a107e --- /dev/null +++ b/agents/executive_assistant/__init__.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +# mypy: ignore-errors + +"""Executive Assistant agent. + +This agent orchestrates other agents and provides voice capabilities using +Deepgram STT and TTS models. It maintains short-term and long-term memory and +can retrieve information via a simple RAG component. +""" +from agents import Agent # noqa: E402 + +from .memory import LongTermMemory, ShortTermMemory # noqa: E402 +from .rag import Retriever # noqa: E402 +from .tools import get_calendar_events, send_email # noqa: E402 + + +class ExecutiveAssistantState: + """Holds resources used by the Executive Assistant.""" + + def __init__(self, memory_path: str = "memory.json") -> None: + self.short_memory = ShortTermMemory() + self.long_memory = LongTermMemory(memory_path) + self.retriever = Retriever() + + +executive_assistant_agent = Agent( + name="ExecutiveAssistant", + instructions=( + "You are an executive assistant. Use the available tools to help the user. " + "Remember important facts during the conversation for later retrieval." + ), + model="gpt-4o-mini", + tools=[get_calendar_events, send_email], +) + +__all__ = ["ExecutiveAssistantState", "executive_assistant_agent"] diff --git a/agents/executive_assistant/memory.py b/agents/executive_assistant/memory.py new file mode 100644 index 000000000..5b2867181 --- /dev/null +++ b/agents/executive_assistant/memory.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + + +class ShortTermMemory: + """In-memory store for conversation turns.""" + + def __init__(self) -> None: + self._messages: list[dict[str, str]] = [] + + def add(self, role: str, content: str) -> None: + """Add a message to memory.""" + self._messages.append({"role": role, "content": content}) + + def to_list(self) -> list[dict[str, str]]: + """Return the last 20 messages.""" + return self._messages[-20:] + + +class LongTermMemory: + """Simple file backed memory store.""" + + def __init__(self, path: str | Path) -> None: + self._path = Path(path) + if self._path.exists(): + self._data = json.loads(self._path.read_text()) + else: + self._data = [] + + def add(self, item: Any) -> None: + """Persist an item to disk.""" + self._data.append(item) + self._path.write_text(json.dumps(self._data)) + + def all(self) -> list[Any]: + """Return all persisted items.""" + return list(self._data) diff --git a/agents/executive_assistant/rag.py b/agents/executive_assistant/rag.py new file mode 100644 index 000000000..c009fffae --- /dev/null +++ b/agents/executive_assistant/rag.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +from collections.abc import Iterable + + +class Retriever: + """Very small RAG retriever stub.""" + + def __init__(self, corpus: Iterable[str] | None = None) -> None: + self._corpus = list(corpus or []) + + def add(self, document: str) -> None: + """Add a document to the corpus.""" + self._corpus.append(document) + + def search(self, query: str) -> list[str]: + """Return documents containing the query string.""" + return [doc for doc in self._corpus if query.lower() in doc.lower()] diff --git a/agents/executive_assistant/tools.py b/agents/executive_assistant/tools.py new file mode 100644 index 000000000..1964edaa4 --- /dev/null +++ b/agents/executive_assistant/tools.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from agents import function_tool + + +@function_tool +def get_calendar_events(date: str) -> str: + """Retrieve calendar events for a given date.""" + # TODO: Integrate with calendar API. + return f"No events found for {date}." + + +@function_tool +def send_email(recipient: str, subject: str, body: str) -> str: + """Send a simple email.""" + # TODO: Integrate with email service. + return "Email sent." diff --git a/agents/pyproject.toml b/agents/pyproject.toml new file mode 100644 index 000000000..8693da800 --- /dev/null +++ b/agents/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "custom-agents" +version = "0.0.0" +requires-python = ">=3.9" + +[tool.hatch.build.targets.wheel] +packages = ["executive_assistant"] diff --git a/src/agents/voice/__init__.py b/src/agents/voice/__init__.py index e11ee4467..abaa98db7 100644 --- a/src/agents/voice/__init__.py +++ b/src/agents/voice/__init__.py @@ -10,6 +10,9 @@ TTSVoice, VoiceModelProvider, ) +from .models.deepgram_model_provider import DeepgramVoiceModelProvider +from .models.deepgram_stt import DeepgramSTTModel +from .models.deepgram_tts import DeepgramTTSModel from .models.openai_model_provider import OpenAIVoiceModelProvider from .models.openai_stt import OpenAISTTModel, OpenAISTTTranscriptionSession from .models.openai_tts import OpenAITTSModel @@ -38,6 +41,9 @@ "OpenAIVoiceModelProvider", "OpenAISTTModel", "OpenAITTSModel", + "DeepgramVoiceModelProvider", + "DeepgramSTTModel", + "DeepgramTTSModel", "VoiceStreamEventAudio", "VoiceStreamEventLifecycle", "VoiceStreamEvent", diff --git a/src/agents/voice/models/deepgram_model_provider.py b/src/agents/voice/models/deepgram_model_provider.py new file mode 100644 index 000000000..cd9e9e198 --- /dev/null +++ b/src/agents/voice/models/deepgram_model_provider.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +import httpx # type: ignore + +from ..model import STTModel, TTSModel, VoiceModelProvider +from .deepgram_stt import DeepgramSTTModel +from .deepgram_tts import DeepgramTTSModel + +DEFAULT_STT_MODEL = "nova-3" +DEFAULT_TTS_MODEL = "aura-2" + + +class DeepgramVoiceModelProvider(VoiceModelProvider): + """Voice model provider for Deepgram APIs.""" + + def __init__(self, api_key: str, *, client: httpx.AsyncClient | None = None) -> None: + self._api_key = api_key + self._client = client + + def _get_client(self) -> httpx.AsyncClient: + if self._client is None: + self._client = httpx.AsyncClient() + return self._client + + def get_stt_model(self, model_name: str | None) -> STTModel: + return DeepgramSTTModel( + model_name or DEFAULT_STT_MODEL, self._api_key, client=self._get_client() + ) + + def get_tts_model(self, model_name: str | None) -> TTSModel: + return DeepgramTTSModel( + model_name or DEFAULT_TTS_MODEL, self._api_key, client=self._get_client() + ) diff --git a/src/agents/voice/models/deepgram_stt.py b/src/agents/voice/models/deepgram_stt.py new file mode 100644 index 000000000..e2f633d83 --- /dev/null +++ b/src/agents/voice/models/deepgram_stt.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +from typing import Any + +import httpx # type: ignore + +from ..input import AudioInput, StreamedAudioInput +from ..model import StreamedTranscriptionSession, STTModel, STTModelSettings + + +class DeepgramSTTModel(STTModel): + """Speech-to-text model using Deepgram Nova 3.""" + + def __init__( + self, model: str, api_key: str, *, client: httpx.AsyncClient | None = None + ) -> None: + self.model = model + self.api_key = api_key + self._client = client or httpx.AsyncClient() + + @property + def model_name(self) -> str: + return self.model + + async def transcribe( + self, + input: AudioInput, + settings: STTModelSettings, + trace_include_sensitive_data: bool, + trace_include_sensitive_audio_data: bool, + ) -> str: + url = f"https://api.deepgram.com/v1/listen?model={self.model}" + headers = {"Authorization": f"Token {self.api_key}"} + filename, data, content_type = input.to_audio_file() + response = await self._client.post(url, headers=headers, content=data.getvalue()) + payload: dict[str, Any] = response.json() + return ( + payload.get("results", {}) + .get("channels", [{}])[0] + .get("alternatives", [{}])[0] + .get("transcript", "") + ) + + async def create_session( + self, + input: StreamedAudioInput, + settings: STTModelSettings, + trace_include_sensitive_data: bool, + trace_include_sensitive_audio_data: bool, + ) -> StreamedTranscriptionSession: + raise NotImplementedError("Streaming transcription is not implemented.") diff --git a/src/agents/voice/models/deepgram_tts.py b/src/agents/voice/models/deepgram_tts.py new file mode 100644 index 000000000..b035f35d4 --- /dev/null +++ b/src/agents/voice/models/deepgram_tts.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from collections.abc import AsyncIterator + +import httpx # type: ignore + +from ..model import TTSModel, TTSModelSettings + + +class DeepgramTTSModel(TTSModel): + """Text-to-speech model using Deepgram Aura 2.""" + + def __init__( + self, model: str, api_key: str, *, client: httpx.AsyncClient | None = None + ) -> None: + self.model = model + self.api_key = api_key + self._client = client or httpx.AsyncClient() + + @property + def model_name(self) -> str: + return self.model + + async def run(self, text: str, settings: TTSModelSettings) -> AsyncIterator[bytes]: + url = "https://api.deepgram.com/v1/speak" + headers = {"Authorization": f"Token {self.api_key}", "Content-Type": "application/json"} + payload = {"text": text, "model": self.model, "voice": settings.voice or "aura-2"} + response = await self._client.post(url, headers=headers, json=payload) + yield response.content diff --git a/tests/executive_assistant/test_agent.py b/tests/executive_assistant/test_agent.py new file mode 100644 index 000000000..b8e76e95d --- /dev/null +++ b/tests/executive_assistant/test_agent.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +import pytest + +from agents import Agent, Runner +from agents.agent import ToolsToFinalOutputResult +from agents.executive_assistant import executive_assistant_agent +from tests.fake_model import FakeModel + + +@pytest.mark.asyncio +async def test_agent_runs_with_fake_model() -> None: + model = FakeModel() + agent = Agent( + name=executive_assistant_agent.name, + instructions=executive_assistant_agent.instructions, + tools=executive_assistant_agent.tools, + model=model, + ) + model.set_next_output( + [ + {"role": "assistant", "content": "Hello"}, + ] + ) + + result: ToolsToFinalOutputResult = await Runner.run(agent, "hi") + assert result.final_output == "Hello" diff --git a/tests/voice/test_deepgram_models.py b/tests/voice/test_deepgram_models.py new file mode 100644 index 000000000..0b961a42f --- /dev/null +++ b/tests/voice/test_deepgram_models.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +import pytest + +from agents.voice import DeepgramVoiceModelProvider + + +@pytest.mark.asyncio +async def test_provider_returns_models() -> None: + provider = DeepgramVoiceModelProvider(api_key="key") + stt = provider.get_stt_model(None) + tts = provider.get_tts_model(None) + assert stt.model_name == "nova-3" + assert tts.model_name == "aura-2"