From 367081ddefd5fdca831a958eabf799ecb2715814 Mon Sep 17 00:00:00 2001 From: Kazuhiro Sera Date: Tue, 16 Dec 2025 17:22:02 +0900 Subject: [PATCH 1/2] fix: Enable creating/disposing Computer per agent run --- examples/tools/computer_use.py | 78 +++++++-- src/agents/__init__.py | 3 + src/agents/_run_impl.py | 26 ++- src/agents/models/openai_responses.py | 14 +- src/agents/run.py | 16 +- src/agents/run_context.py | 2 +- src/agents/tool.py | 207 ++++++++++++++++++++++- tests/test_computer_tool_lifecycle.py | 140 +++++++++++++++ tests/test_openai_responses_converter.py | 80 +++++---- 9 files changed, 498 insertions(+), 68 deletions(-) create mode 100644 tests/test_computer_tool_lifecycle.py diff --git a/examples/tools/computer_use.py b/examples/tools/computer_use.py index 0c17cf959..7de11e854 100644 --- a/examples/tools/computer_use.py +++ b/examples/tools/computer_use.py @@ -1,6 +1,11 @@ +# How to run this example: +# uv run python -m playwright install chromium +# uv run -m examples.tools.computer_use + import asyncio import base64 -from typing import Literal, Union +import sys +from typing import Any, Literal, Union from playwright.async_api import Browser, Page, Playwright, async_playwright @@ -8,9 +13,11 @@ Agent, AsyncComputer, Button, + ComputerProvider, ComputerTool, Environment, ModelSettings, + RunContextWrapper, Runner, trace, ) @@ -21,19 +28,48 @@ # logging.getLogger("openai.agents").addHandler(logging.StreamHandler()) -async def main(): +async def run_agent(computer_config: ComputerProvider[Any, AsyncComputer] | AsyncComputer) -> None: + with trace("Computer use example"): + agent = Agent( + name="Browser user", + instructions="You are a helpful agent. Find the current weather in Tokyo.", + tools=[ComputerTool(computer=computer_config)], + # Use the computer using model, and set truncation to auto because it is required. + model="computer-use-preview", + model_settings=ModelSettings(truncation="auto"), + ) + result = await Runner.run(agent, "What is the weather in Tokyo right now?") + print(result.final_output) + + +async def singleton_computer() -> None: + # Use a shared computer when you do not expect to run multiple agents concurrently. async with LocalPlaywrightComputer() as computer: - with trace("Computer use example"): - agent = Agent( - name="Browser user", - instructions="You are a helpful agent.", - tools=[ComputerTool(computer)], - # Use the computer using model, and set truncation to auto because its required - model="computer-use-preview", - model_settings=ModelSettings(truncation="auto"), - ) - result = await Runner.run(agent, "Search for SF sports news and summarize.") - print(result.final_output) + await run_agent(computer) + + +async def computer_per_request() -> None: + # Initialize a new computer per request to avoid sharing state between runs. + async def create_computer( + *, run_context: RunContextWrapper[Any] + ) -> LocalPlaywrightComputer: + print(f"Creating computer for run context: {run_context}") + return await LocalPlaywrightComputer().open() + + async def dispose_computer( + *, + run_context: RunContextWrapper[Any], + computer: LocalPlaywrightComputer, + ) -> None: + print(f"Disposing computer for run context: {run_context}") + await computer.close() + + await run_agent( + ComputerProvider( + create=create_computer, + dispose=dispose_computer, + ) + ) CUA_KEY_TO_PLAYWRIGHT_KEY = { @@ -93,6 +129,16 @@ async def __aexit__(self, exc_type, exc_val, exc_tb): await self._browser.close() if self._playwright: await self._playwright.stop() + return None + + async def open(self) -> "LocalPlaywrightComputer": + """Open resources without using a context manager.""" + await self.__aenter__() + return self + + async def close(self) -> None: + """Close resources without using a context manager.""" + await self.__aexit__(None, None, None) @property def playwright(self) -> Playwright: @@ -165,4 +211,8 @@ async def drag(self, path: list[tuple[int, int]]) -> None: if __name__ == "__main__": - asyncio.run(main()) + mode = (sys.argv[1] if len(sys.argv) > 1 else "").lower() + if mode == "singleton": + asyncio.run(singleton_computer()) + else: + asyncio.run(computer_per_request()) diff --git a/src/agents/__init__.py b/src/agents/__init__.py index 00a5ca21e..f4a8bddc1 100644 --- a/src/agents/__init__.py +++ b/src/agents/__init__.py @@ -88,6 +88,7 @@ ApplyPatchTool, CodeInterpreterTool, ComputerTool, + ComputerProvider, FileSearchTool, FunctionTool, FunctionToolResult, @@ -116,7 +117,9 @@ ToolOutputTextDict, WebSearchTool, default_tool_error_function, + dispose_resolved_computers, function_tool, + resolve_computer, ) from .tool_guardrails import ( ToolGuardrailFunctionOutput, diff --git a/src/agents/_run_impl.py b/src/agents/_run_impl.py index 48e8eebdf..599e1d321 100644 --- a/src/agents/_run_impl.py +++ b/src/agents/_run_impl.py @@ -95,6 +95,7 @@ ShellResult, ShellTool, Tool, + resolve_computer, ) from .tool_context import ToolContext from .tool_guardrails import ( @@ -461,6 +462,22 @@ def maybe_reset_tool_choice( return model_settings + @classmethod + async def initialize_computer_tools( + cls, + *, + tools: list[Tool], + context_wrapper: RunContextWrapper[TContext], + ) -> None: + """Resolve computer tools ahead of model invocation so each run gets its own instance.""" + computer_tools = [tool for tool in tools if isinstance(tool, ComputerTool)] + if not computer_tools: + return + + await asyncio.gather( + *(resolve_computer(tool=tool, run_context=context_wrapper) for tool in computer_tools) + ) + @classmethod def process_model_response( cls, @@ -1529,10 +1546,13 @@ async def execute( config: RunConfig, acknowledged_safety_checks: list[ComputerCallOutputAcknowledgedSafetyCheck] | None = None, ) -> RunItem: + computer = await resolve_computer( + tool=action.computer_tool, run_context=context_wrapper + ) output_func = ( - cls._get_screenshot_async(action.computer_tool.computer, action.tool_call) - if isinstance(action.computer_tool.computer, AsyncComputer) - else cls._get_screenshot_sync(action.computer_tool.computer, action.tool_call) + cls._get_screenshot_async(computer, action.tool_call) + if isinstance(computer, AsyncComputer) + else cls._get_screenshot_sync(computer, action.tool_call) ) _, _, output = await asyncio.gather( diff --git a/src/agents/models/openai_responses.py b/src/agents/models/openai_responses.py index a8695c89c..73092e4a4 100644 --- a/src/agents/models/openai_responses.py +++ b/src/agents/models/openai_responses.py @@ -21,6 +21,7 @@ from .. import _debug from ..agent_output import AgentOutputSchemaBase +from ..computer import AsyncComputer, Computer from ..exceptions import UserError from ..handoffs import Handoff from ..items import ItemHelpers, ModelResponse, TResponseInputItem @@ -491,11 +492,18 @@ def _convert_tool(cls, tool: Tool) -> tuple[ToolParam, ResponseIncludable | None includes = "file_search_call.results" if tool.include_search_results else None elif isinstance(tool, ComputerTool): + computer = tool.computer + if not isinstance(computer, (Computer, AsyncComputer)): + raise UserError( + "Computer tool is not initialized for serialization. Call " + "resolve_computer({ tool, run_context }) with a run context first " + "when building payloads manually." + ) converted_tool = { "type": "computer_use_preview", - "environment": tool.computer.environment, - "display_width": tool.computer.dimensions[0], - "display_height": tool.computer.dimensions[1], + "environment": computer.environment, + "display_width": computer.dimensions[0], + "display_height": computer.dimensions[1], } includes = None elif isinstance(tool, HostedMCPTool): diff --git a/src/agents/run.py b/src/agents/run.py index e772b254e..f6707b33b 100644 --- a/src/agents/run.py +++ b/src/agents/run.py @@ -71,7 +71,7 @@ RunItemStreamEvent, StreamEvent, ) -from .tool import Tool +from .tool import Tool, dispose_resolved_computers from .tool_guardrails import ToolInputGuardrailResult, ToolOutputGuardrailResult from .tracing import Span, SpanError, agent_span, get_current_trace, trace from .tracing.span_data import AgentSpanData @@ -600,6 +600,9 @@ async def run( try: while True: all_tools = await AgentRunner._get_all_tools(current_agent, context_wrapper) + await RunImpl.initialize_computer_tools( + tools=all_tools, context_wrapper=context_wrapper + ) # Start an agent span if we don't have one. This span is ended if the current # agent changes, or if the agent loop ends. @@ -782,6 +785,10 @@ async def run( ) raise finally: + try: + await dispose_resolved_computers(run_context=context_wrapper) + except Exception as error: + logger.warning("Failed to dispose computers after run: %s", error) if current_span: current_span.finish(reset_current=True) @@ -1113,6 +1120,9 @@ async def _start_streaming( break all_tools = await cls._get_all_tools(current_agent, context_wrapper) + await RunImpl.initialize_computer_tools( + tools=all_tools, context_wrapper=context_wrapper + ) # Start an agent span if we don't have one. This span is ended if the current # agent changes, or if the agent loop ends. @@ -1323,6 +1333,10 @@ async def _start_streaming( logger.debug( f"Error in streamed_result finalize for agent {current_agent.name} - {e}" ) + try: + await dispose_resolved_computers(run_context=context_wrapper) + except Exception as error: + logger.warning("Failed to dispose computers after streamed run: %s", error) if current_span: current_span.finish(reset_current=True) if streamed_result.trace: diff --git a/src/agents/run_context.py b/src/agents/run_context.py index 579a215f2..19934366b 100644 --- a/src/agents/run_context.py +++ b/src/agents/run_context.py @@ -8,7 +8,7 @@ TContext = TypeVar("TContext", default=Any) -@dataclass +@dataclass(eq=False) class RunContextWrapper(Generic[TContext]): """This wraps the context object that you passed to `Runner.run()`. It also contains information about the usage of the agent run so far. diff --git a/src/agents/tool.py b/src/agents/tool.py index 499a84045..078de6245 100644 --- a/src/agents/tool.py +++ b/src/agents/tool.py @@ -2,9 +2,20 @@ import inspect import json +import weakref from collections.abc import Awaitable from dataclasses import dataclass, field -from typing import TYPE_CHECKING, Any, Callable, Literal, Union, overload +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Generic, + Literal, + Protocol, + TypeVar, + Union, + overload, +) from openai.types.responses.file_search_tool_param import Filters, RankingOptions from openai.types.responses.response_computer_tool_call import ( @@ -21,7 +32,7 @@ from . import _debug from .computer import AsyncComputer, Computer from .editor import ApplyPatchEditor -from .exceptions import ModelBehaviorError +from .exceptions import ModelBehaviorError, UserError from .function_schema import DocstringStyle, function_schema from .logger import logger from .run_context import RunContextWrapper @@ -129,6 +140,45 @@ class ToolOutputFileContentDict(TypedDict, total=False): ValidToolOutputPydanticModels ) +ComputerLike = Computer | AsyncComputer +ContextT = TypeVar("ContextT") +ComputerT = TypeVar("ComputerT", Computer, AsyncComputer) + + +class ComputerCreate(Protocol[ContextT, ComputerT]): + """Initializes a computer for the current run context.""" + + def __call__( + self, *, run_context: RunContextWrapper[ContextT] + ) -> MaybeAwaitable[ComputerT]: + ... + + +class ComputerDispose(Protocol[ContextT, ComputerT]): + """Cleans up a computer initialized for a run context.""" + + def __call__( + self, *, + run_context: RunContextWrapper[ContextT], + computer: ComputerT, + ) -> MaybeAwaitable[None]: + ... + + +@dataclass +class ComputerProvider(Generic[ContextT, ComputerT]): + """Configures create/dispose hooks for per-run computer lifecycle management.""" + + create: ComputerCreate[ContextT, ComputerT] + dispose: ComputerDispose[ContextT, ComputerT] | None = None + + +ComputerConfig = ( + ComputerLike + | ComputerCreate[ContextT, ComputerT] + | ComputerProvider[ContextT, ComputerT] +) + @dataclass class FunctionToolResult: @@ -237,23 +287,162 @@ def name(self): return "web_search" -@dataclass -class ComputerTool: +@dataclass(eq=False) +class ComputerTool(Generic[ContextT, ComputerT]): """A hosted tool that lets the LLM control a computer.""" - computer: Computer | AsyncComputer - """The computer implementation, which describes the environment and dimensions of the computer, - as well as implements the computer actions like click, screenshot, etc. - """ + computer: ComputerConfig[ContextT, ComputerT] + """The computer implementation, or a factory that produces a computer per run.""" on_safety_check: Callable[[ComputerToolSafetyCheckData], MaybeAwaitable[bool]] | None = None """Optional callback to acknowledge computer tool safety checks.""" + def __post_init__(self) -> None: + _store_computer_initializer(self) + @property def name(self): return "computer_use_preview" +@dataclass +class _ResolvedComputer(Generic[ContextT]): + computer: ComputerLike + dispose: ComputerDispose[ContextT, ComputerLike] | None = None + + +_computer_cache: weakref.WeakKeyDictionary[ + ComputerTool[Any, ComputerLike], + weakref.WeakKeyDictionary[RunContextWrapper[Any], _ResolvedComputer[Any]], +] = weakref.WeakKeyDictionary() +_computer_initializer_map: weakref.WeakKeyDictionary[ + ComputerTool[Any, ComputerLike], ComputerConfig[Any, ComputerLike] +] = weakref.WeakKeyDictionary() +_computers_by_run_context: weakref.WeakKeyDictionary[ + RunContextWrapper[Any], dict[ComputerTool[Any, ComputerLike], _ResolvedComputer[Any]] +] = weakref.WeakKeyDictionary() + + +def _is_computer_provider(candidate: object) -> bool: + return isinstance(candidate, ComputerProvider) or ( + hasattr(candidate, "create") and callable(getattr(candidate, "create")) + ) + + +def _store_computer_initializer(tool: ComputerTool[Any, ComputerLike]) -> None: + config = tool.computer + if callable(config) or _is_computer_provider(config): + _computer_initializer_map[tool] = config + + +def _get_computer_initializer( + tool: ComputerTool[ContextT, ComputerLike] +) -> ComputerConfig[ContextT, ComputerLike] | None: + if tool in _computer_initializer_map: + return _computer_initializer_map[tool] + + if callable(tool.computer) or _is_computer_provider(tool.computer): + return tool.computer + + return None + + +def _track_resolved_computer( + *, + tool: ComputerTool[Any, ComputerLike], + run_context: RunContextWrapper[Any], + resolved: _ResolvedComputer[Any], +) -> None: + resolved_by_run = _computers_by_run_context.get(run_context) + if resolved_by_run is None: + resolved_by_run = {} + _computers_by_run_context[run_context] = resolved_by_run + resolved_by_run[tool] = resolved + + +async def resolve_computer( + *, + tool: ComputerTool[ContextT, ComputerLike], + run_context: RunContextWrapper[ContextT], +) -> ComputerLike: + """Resolve a computer for a given run context, initializing it if needed.""" + per_context = _computer_cache.get(tool) + if per_context is None: + per_context = weakref.WeakKeyDictionary() + _computer_cache[tool] = per_context + + cached = per_context.get(run_context) + if cached is not None: + _track_resolved_computer(tool=tool, run_context=run_context, resolved=cached) + return cached.computer + + initializer_config = _get_computer_initializer(tool) + lifecycle = ( + initializer_config if _is_computer_provider(initializer_config) else None # type: ignore[arg-type] + ) + initializer = None + disposer = lifecycle.dispose if lifecycle else None + + if lifecycle: + initializer = lifecycle.create + elif callable(initializer_config): + initializer = initializer_config + elif _is_computer_provider(tool.computer): + lifecycle = tool.computer # type: ignore[assignment] + initializer = lifecycle.create + disposer = lifecycle.dispose + + if initializer: + computer_candidate = initializer(run_context=run_context) + computer = ( + await computer_candidate + if inspect.isawaitable(computer_candidate) + else computer_candidate + ) + else: + computer = tool.computer # type: ignore[assignment] + + if not isinstance(computer, (Computer, AsyncComputer)): + raise UserError("The computer tool did not provide a computer instance.") + + resolved = _ResolvedComputer(computer=computer, dispose=disposer) + per_context[run_context] = resolved + _track_resolved_computer(tool=tool, run_context=run_context, resolved=resolved) + tool.computer = computer # type: ignore[assignment] + return computer + + +async def dispose_resolved_computers( + *, run_context: RunContextWrapper[ContextT] +) -> None: + """Dispose any computer instances created for the provided run context.""" + resolved_by_tool = _computers_by_run_context.pop(run_context, None) + if not resolved_by_tool: + return + + disposers: list[tuple[ComputerDispose[ContextT, ComputerLike], ComputerLike]] = [] + + for tool, resolved in resolved_by_tool.items(): + per_context = _computer_cache.get(tool) + if per_context is not None: + per_context.pop(run_context, None) + + initializer = _get_computer_initializer(tool) + if initializer is not None: + tool.computer = initializer # type: ignore[assignment] + + if resolved.dispose is not None: + disposers.append((resolved.dispose, resolved.computer)) + + for dispose, computer in disposers: + try: + result = dispose(run_context=run_context, computer=computer) + if inspect.isawaitable(result): + await result + except Exception as exc: + logger.warning("Failed to dispose computer for run context: %s", exc) + + @dataclass class ComputerToolSafetyCheckData: """Information about a computer tool safety check.""" @@ -473,7 +662,7 @@ def type(self) -> str: FunctionTool, FileSearchTool, WebSearchTool, - ComputerTool, + ComputerTool[Any, ComputerLike], HostedMCPTool, ShellTool, ApplyPatchTool, diff --git a/tests/test_computer_tool_lifecycle.py b/tests/test_computer_tool_lifecycle.py new file mode 100644 index 000000000..6a1e626d3 --- /dev/null +++ b/tests/test_computer_tool_lifecycle.py @@ -0,0 +1,140 @@ +from __future__ import annotations + +from typing import Any +from unittest.mock import AsyncMock + +import pytest +from openai.types.responses import ResponseOutputMessage, ResponseOutputText + +from agents import ( + Agent, + ComputerProvider, + ComputerTool, + RunContextWrapper, + Runner, + dispose_resolved_computers, + resolve_computer, +) +from agents.computer import Button, Computer, Environment +from tests.fake_model import FakeModel + + +class FakeComputer(Computer): + def __init__(self, label: str = "computer") -> None: + self.label = label + + @property + def environment(self) -> Environment: + return "mac" + + @property + def dimensions(self) -> tuple[int, int]: + return (1, 1) + + def screenshot(self) -> str: + return "img" + + def click(self, x: int, y: int, button: Button) -> None: + return None + + def double_click(self, x: int, y: int) -> None: + return None + + def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + return None + + def type(self, text: str) -> None: + return None + + def wait(self) -> None: + return None + + def move(self, x: int, y: int) -> None: + return None + + def keypress(self, keys: list[str]) -> None: + return None + + def drag(self, path: list[tuple[int, int]]) -> None: + return None + + +def _make_message(text: str) -> ResponseOutputMessage: + return ResponseOutputMessage( + id="msg-1", + content=[ResponseOutputText(annotations=[], text=text, type="output_text")], + role="assistant", + status="completed", + type="message", + ) + + +@pytest.mark.asyncio +async def test_resolve_computer_per_run_context() -> None: + counter = 0 + + async def create_computer(*_: Any, **__: Any) -> FakeComputer: + nonlocal counter + counter += 1 + return FakeComputer(label=f"computer-{counter}") + + tool = ComputerTool(computer=create_computer) + ctx_a = RunContextWrapper(context=None) + ctx_b = RunContextWrapper(context=None) + + comp_a1 = await resolve_computer(tool=tool, run_context=ctx_a) + comp_a2 = await resolve_computer(tool=tool, run_context=ctx_a) + comp_b1 = await resolve_computer(tool=tool, run_context=ctx_b) + + assert comp_a1 is comp_a2 + assert comp_a1 is not comp_b1 + assert tool.computer is comp_b1 + assert counter == 2 + + await dispose_resolved_computers(run_context=ctx_a) + comp_a3 = await resolve_computer(tool=tool, run_context=ctx_a) + + assert comp_a3 is not comp_a1 + assert counter == 3 + await dispose_resolved_computers(run_context=ctx_b) + await dispose_resolved_computers(run_context=ctx_a) + + +@pytest.mark.asyncio +async def test_runner_disposes_computer_after_run() -> None: + created = FakeComputer("created") + create = AsyncMock(return_value=created) + dispose = AsyncMock() + + tool = ComputerTool(computer=ComputerProvider(create=create, dispose=dispose)) + model = FakeModel(initial_output=[_make_message("done")]) + agent = Agent(name="ComputerAgent", model=model, tools=[tool]) + + result = await Runner.run(agent, "hello") + + assert result.final_output == "done" + create.assert_awaited_once() + dispose.assert_awaited_once() + dispose.assert_awaited_with(run_context=result.context_wrapper, computer=created) + + +@pytest.mark.asyncio +async def test_streamed_run_disposes_computer_after_completion() -> None: + created = FakeComputer("streaming") + create = AsyncMock(return_value=created) + dispose = AsyncMock() + + tool = ComputerTool(computer=ComputerProvider(create=create, dispose=dispose)) + model = FakeModel(initial_output=[_make_message("done")]) + agent = Agent(name="ComputerAgent", model=model, tools=[tool]) + + streamed_result = Runner.run_streamed(agent, "hello") + async for _ in streamed_result.stream_events(): + pass + + assert streamed_result.final_output == "done" + create.assert_awaited_once() + dispose.assert_awaited_once() + dispose.assert_awaited_with( + run_context=streamed_result.context_wrapper, computer=created + ) diff --git a/tests/test_openai_responses_converter.py b/tests/test_openai_responses_converter.py index f0ae2e816..62e058e00 100644 --- a/tests/test_openai_responses_converter.py +++ b/tests/test_openai_responses_converter.py @@ -43,6 +43,43 @@ from agents.models.openai_responses import Converter +class DummyComputer(Computer): + @property + def environment(self): + return "mac" + + @property + def dimensions(self): + return (800, 600) + + def screenshot(self) -> str: + raise NotImplementedError + + def click(self, x: int, y: int, button: str) -> None: + raise NotImplementedError + + def double_click(self, x: int, y: int) -> None: + raise NotImplementedError + + def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + raise NotImplementedError + + def type(self, text: str) -> None: + raise NotImplementedError + + def wait(self) -> None: + raise NotImplementedError + + def move(self, x: int, y: int) -> None: + raise NotImplementedError + + def keypress(self, keys: list[str]) -> None: + raise NotImplementedError + + def drag(self, path: list[tuple[int, int]]) -> None: + raise NotImplementedError + + def test_convert_tool_choice_standard_values(): """ Make sure that the standard tool_choice values map to themselves or @@ -110,43 +147,6 @@ def test_convert_tools_basic_types_and_includes(): # Web search tool with custom params web_tool = WebSearchTool(user_location=None, search_context_size="high") - # Dummy computer tool subclassing the Computer ABC with minimal methods. - class DummyComputer(Computer): - @property - def environment(self): - return "mac" - - @property - def dimensions(self): - return (800, 600) - - def screenshot(self) -> str: - raise NotImplementedError - - def click(self, x: int, y: int, button: str) -> None: - raise NotImplementedError - - def double_click(self, x: int, y: int) -> None: - raise NotImplementedError - - def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: - raise NotImplementedError - - def type(self, text: str) -> None: - raise NotImplementedError - - def wait(self) -> None: - raise NotImplementedError - - def move(self, x: int, y: int) -> None: - raise NotImplementedError - - def keypress(self, keys: list[str]) -> None: - raise NotImplementedError - - def drag(self, path: list[tuple[int, int]]) -> None: - raise NotImplementedError - # Wrap our concrete computer in a ComputerTool for conversion. comp_tool = ComputerTool(computer=DummyComputer()) tools: list[Tool] = [tool_fn, file_tool, web_tool, comp_tool] @@ -203,3 +203,9 @@ def test_convert_tools_includes_handoffs(): assert handoff_tool.get("description") == Handoff.default_tool_description(agent) # No includes for handoffs by default. assert converted.includes == [] + + +def test_convert_tools_requires_initialized_computer(): + comp_tool = ComputerTool(computer=lambda **_: DummyComputer()) + with pytest.raises(UserError, match="resolve_computer"): + Converter.convert_tools(tools=[comp_tool], handoffs=[]) From 3169a616ecc02e5b97ad0ff06d9bcb7090dd4c2b Mon Sep 17 00:00:00 2001 From: Kazuhiro Sera Date: Tue, 16 Dec 2025 17:48:36 +0900 Subject: [PATCH 2/2] fix; --- examples/tools/computer_use.py | 88 +++++++++---------- src/agents/__init__.py | 5 +- src/agents/_run_impl.py | 6 +- src/agents/tool.py | 117 ++++++++++++-------------- tests/test_computer_tool_lifecycle.py | 8 +- 5 files changed, 109 insertions(+), 115 deletions(-) diff --git a/examples/tools/computer_use.py b/examples/tools/computer_use.py index 7de11e854..d59c21ba4 100644 --- a/examples/tools/computer_use.py +++ b/examples/tools/computer_use.py @@ -28,50 +28,6 @@ # logging.getLogger("openai.agents").addHandler(logging.StreamHandler()) -async def run_agent(computer_config: ComputerProvider[Any, AsyncComputer] | AsyncComputer) -> None: - with trace("Computer use example"): - agent = Agent( - name="Browser user", - instructions="You are a helpful agent. Find the current weather in Tokyo.", - tools=[ComputerTool(computer=computer_config)], - # Use the computer using model, and set truncation to auto because it is required. - model="computer-use-preview", - model_settings=ModelSettings(truncation="auto"), - ) - result = await Runner.run(agent, "What is the weather in Tokyo right now?") - print(result.final_output) - - -async def singleton_computer() -> None: - # Use a shared computer when you do not expect to run multiple agents concurrently. - async with LocalPlaywrightComputer() as computer: - await run_agent(computer) - - -async def computer_per_request() -> None: - # Initialize a new computer per request to avoid sharing state between runs. - async def create_computer( - *, run_context: RunContextWrapper[Any] - ) -> LocalPlaywrightComputer: - print(f"Creating computer for run context: {run_context}") - return await LocalPlaywrightComputer().open() - - async def dispose_computer( - *, - run_context: RunContextWrapper[Any], - computer: LocalPlaywrightComputer, - ) -> None: - print(f"Disposing computer for run context: {run_context}") - await computer.close() - - await run_agent( - ComputerProvider( - create=create_computer, - dispose=dispose_computer, - ) - ) - - CUA_KEY_TO_PLAYWRIGHT_KEY = { "/": "Divide", "\\": "Backslash", @@ -210,6 +166,50 @@ async def drag(self, path: list[tuple[int, int]]) -> None: await self.page.mouse.up() +async def run_agent( + computer_config: ComputerProvider[LocalPlaywrightComputer] | AsyncComputer, +) -> None: + with trace("Computer use example"): + agent = Agent( + name="Browser user", + instructions="You are a helpful agent. Find the current weather in Tokyo.", + tools=[ComputerTool(computer=computer_config)], + # Use the computer using model, and set truncation to auto because it is required. + model="computer-use-preview", + model_settings=ModelSettings(truncation="auto"), + ) + result = await Runner.run(agent, "What is the weather in Tokyo right now?") + print(result.final_output) + + +async def singleton_computer() -> None: + # Use a shared computer when you do not expect to run multiple agents concurrently. + async with LocalPlaywrightComputer() as computer: + await run_agent(computer) + + +async def computer_per_request() -> None: + # Initialize a new computer per request to avoid sharing state between runs. + async def create_computer(*, run_context: RunContextWrapper[Any]) -> LocalPlaywrightComputer: + print(f"Creating computer for run context: {run_context}") + return await LocalPlaywrightComputer().open() + + async def dispose_computer( + *, + run_context: RunContextWrapper[Any], + computer: LocalPlaywrightComputer, + ) -> None: + print(f"Disposing computer for run context: {run_context}") + await computer.close() + + await run_agent( + ComputerProvider[LocalPlaywrightComputer]( + create=create_computer, + dispose=dispose_computer, + ) + ) + + if __name__ == "__main__": mode = (sys.argv[1] if len(sys.argv) > 1 else "").lower() if mode == "singleton": diff --git a/src/agents/__init__.py b/src/agents/__init__.py index f4a8bddc1..882323157 100644 --- a/src/agents/__init__.py +++ b/src/agents/__init__.py @@ -87,8 +87,8 @@ from .tool import ( ApplyPatchTool, CodeInterpreterTool, - ComputerTool, ComputerProvider, + ComputerTool, FileSearchTool, FunctionTool, FunctionToolResult, @@ -304,6 +304,7 @@ def enable_verbose_stdout_logging(): "FunctionTool", "FunctionToolResult", "ComputerTool", + "ComputerProvider", "FileSearchTool", "CodeInterpreterTool", "ImageGenerationTool", @@ -335,6 +336,8 @@ def enable_verbose_stdout_logging(): "ToolOutputFileContent", "ToolOutputFileContentDict", "function_tool", + "resolve_computer", + "dispose_resolved_computers", "Usage", "add_trace_processor", "agent_span", diff --git a/src/agents/_run_impl.py b/src/agents/_run_impl.py index 599e1d321..4b6b6df64 100644 --- a/src/agents/_run_impl.py +++ b/src/agents/_run_impl.py @@ -160,7 +160,7 @@ class ToolRunFunction: @dataclass class ToolRunComputerAction: tool_call: ResponseComputerToolCall - computer_tool: ComputerTool + computer_tool: ComputerTool[Any] @dataclass @@ -1546,9 +1546,7 @@ async def execute( config: RunConfig, acknowledged_safety_checks: list[ComputerCallOutputAcknowledgedSafetyCheck] | None = None, ) -> RunItem: - computer = await resolve_computer( - tool=action.computer_tool, run_context=context_wrapper - ) + computer = await resolve_computer(tool=action.computer_tool, run_context=context_wrapper) output_func = ( cls._get_screenshot_async(computer, action.tool_call) if isinstance(computer, AsyncComputer) diff --git a/src/agents/tool.py b/src/agents/tool.py index 078de6245..8c8d3e988 100644 --- a/src/agents/tool.py +++ b/src/agents/tool.py @@ -14,6 +14,7 @@ Protocol, TypeVar, Union, + cast, overload, ) @@ -140,44 +141,42 @@ class ToolOutputFileContentDict(TypedDict, total=False): ValidToolOutputPydanticModels ) -ComputerLike = Computer | AsyncComputer -ContextT = TypeVar("ContextT") -ComputerT = TypeVar("ComputerT", Computer, AsyncComputer) +ComputerLike = Union[Computer, AsyncComputer] +ComputerT = TypeVar("ComputerT", bound=ComputerLike) +ComputerT_co = TypeVar("ComputerT_co", bound=ComputerLike, covariant=True) +ComputerT_contra = TypeVar("ComputerT_contra", bound=ComputerLike, contravariant=True) -class ComputerCreate(Protocol[ContextT, ComputerT]): +class ComputerCreate(Protocol[ComputerT_co]): """Initializes a computer for the current run context.""" - def __call__( - self, *, run_context: RunContextWrapper[ContextT] - ) -> MaybeAwaitable[ComputerT]: - ... + def __call__(self, *, run_context: RunContextWrapper[Any]) -> MaybeAwaitable[ComputerT_co]: ... -class ComputerDispose(Protocol[ContextT, ComputerT]): +class ComputerDispose(Protocol[ComputerT_contra]): """Cleans up a computer initialized for a run context.""" def __call__( - self, *, - run_context: RunContextWrapper[ContextT], - computer: ComputerT, - ) -> MaybeAwaitable[None]: - ... + self, + *, + run_context: RunContextWrapper[Any], + computer: ComputerT_contra, + ) -> MaybeAwaitable[None]: ... @dataclass -class ComputerProvider(Generic[ContextT, ComputerT]): +class ComputerProvider(Generic[ComputerT]): """Configures create/dispose hooks for per-run computer lifecycle management.""" - create: ComputerCreate[ContextT, ComputerT] - dispose: ComputerDispose[ContextT, ComputerT] | None = None + create: ComputerCreate[ComputerT] + dispose: ComputerDispose[ComputerT] | None = None -ComputerConfig = ( - ComputerLike - | ComputerCreate[ContextT, ComputerT] - | ComputerProvider[ContextT, ComputerT] -) +ComputerConfig = Union[ + ComputerT, + ComputerCreate[ComputerT], + ComputerProvider[ComputerT], +] @dataclass @@ -288,10 +287,10 @@ def name(self): @dataclass(eq=False) -class ComputerTool(Generic[ContextT, ComputerT]): +class ComputerTool(Generic[ComputerT]): """A hosted tool that lets the LLM control a computer.""" - computer: ComputerConfig[ContextT, ComputerT] + computer: ComputerConfig[ComputerT] """The computer implementation, or a factory that produces a computer per run.""" on_safety_check: Callable[[ComputerToolSafetyCheckData], MaybeAwaitable[bool]] | None = None @@ -306,38 +305,36 @@ def name(self): @dataclass -class _ResolvedComputer(Generic[ContextT]): +class _ResolvedComputer: computer: ComputerLike - dispose: ComputerDispose[ContextT, ComputerLike] | None = None + dispose: ComputerDispose[ComputerLike] | None = None _computer_cache: weakref.WeakKeyDictionary[ - ComputerTool[Any, ComputerLike], - weakref.WeakKeyDictionary[RunContextWrapper[Any], _ResolvedComputer[Any]], -] = weakref.WeakKeyDictionary() -_computer_initializer_map: weakref.WeakKeyDictionary[ - ComputerTool[Any, ComputerLike], ComputerConfig[Any, ComputerLike] + ComputerTool[Any], + weakref.WeakKeyDictionary[RunContextWrapper[Any], _ResolvedComputer], ] = weakref.WeakKeyDictionary() +_computer_initializer_map: weakref.WeakKeyDictionary[ComputerTool[Any], ComputerConfig[Any]] = ( + weakref.WeakKeyDictionary() +) _computers_by_run_context: weakref.WeakKeyDictionary[ - RunContextWrapper[Any], dict[ComputerTool[Any, ComputerLike], _ResolvedComputer[Any]] + RunContextWrapper[Any], dict[ComputerTool[Any], _ResolvedComputer] ] = weakref.WeakKeyDictionary() def _is_computer_provider(candidate: object) -> bool: return isinstance(candidate, ComputerProvider) or ( - hasattr(candidate, "create") and callable(getattr(candidate, "create")) + hasattr(candidate, "create") and callable(candidate.create) ) -def _store_computer_initializer(tool: ComputerTool[Any, ComputerLike]) -> None: +def _store_computer_initializer(tool: ComputerTool[Any]) -> None: config = tool.computer if callable(config) or _is_computer_provider(config): _computer_initializer_map[tool] = config -def _get_computer_initializer( - tool: ComputerTool[ContextT, ComputerLike] -) -> ComputerConfig[ContextT, ComputerLike] | None: +def _get_computer_initializer(tool: ComputerTool[Any]) -> ComputerConfig[Any] | None: if tool in _computer_initializer_map: return _computer_initializer_map[tool] @@ -349,9 +346,9 @@ def _get_computer_initializer( def _track_resolved_computer( *, - tool: ComputerTool[Any, ComputerLike], + tool: ComputerTool[Any], run_context: RunContextWrapper[Any], - resolved: _ResolvedComputer[Any], + resolved: _ResolvedComputer, ) -> None: resolved_by_run = _computers_by_run_context.get(run_context) if resolved_by_run is None: @@ -361,9 +358,7 @@ def _track_resolved_computer( async def resolve_computer( - *, - tool: ComputerTool[ContextT, ComputerLike], - run_context: RunContextWrapper[ContextT], + *, tool: ComputerTool[Any], run_context: RunContextWrapper[Any] ) -> ComputerLike: """Resolve a computer for a given run context, initializing it if needed.""" per_context = _computer_cache.get(tool) @@ -377,20 +372,22 @@ async def resolve_computer( return cached.computer initializer_config = _get_computer_initializer(tool) - lifecycle = ( - initializer_config if _is_computer_provider(initializer_config) else None # type: ignore[arg-type] + lifecycle: ComputerProvider[Any] | None = ( + cast(ComputerProvider[Any], initializer_config) + if _is_computer_provider(initializer_config) + else None ) - initializer = None - disposer = lifecycle.dispose if lifecycle else None + initializer: ComputerCreate[Any] | None = None + disposer: ComputerDispose[Any] | None = lifecycle.dispose if lifecycle else None - if lifecycle: + if lifecycle is not None: initializer = lifecycle.create elif callable(initializer_config): initializer = initializer_config elif _is_computer_provider(tool.computer): - lifecycle = tool.computer # type: ignore[assignment] - initializer = lifecycle.create - disposer = lifecycle.dispose + lifecycle_provider = cast(ComputerProvider[Any], tool.computer) + initializer = lifecycle_provider.create + disposer = lifecycle_provider.dispose if initializer: computer_candidate = initializer(run_context=run_context) @@ -400,7 +397,7 @@ async def resolve_computer( else computer_candidate ) else: - computer = tool.computer # type: ignore[assignment] + computer = cast(ComputerLike, tool.computer) if not isinstance(computer, (Computer, AsyncComputer)): raise UserError("The computer tool did not provide a computer instance.") @@ -408,31 +405,29 @@ async def resolve_computer( resolved = _ResolvedComputer(computer=computer, dispose=disposer) per_context[run_context] = resolved _track_resolved_computer(tool=tool, run_context=run_context, resolved=resolved) - tool.computer = computer # type: ignore[assignment] + tool.computer = computer return computer -async def dispose_resolved_computers( - *, run_context: RunContextWrapper[ContextT] -) -> None: +async def dispose_resolved_computers(*, run_context: RunContextWrapper[Any]) -> None: """Dispose any computer instances created for the provided run context.""" resolved_by_tool = _computers_by_run_context.pop(run_context, None) if not resolved_by_tool: return - disposers: list[tuple[ComputerDispose[ContextT, ComputerLike], ComputerLike]] = [] + disposers: list[tuple[ComputerDispose[ComputerLike], ComputerLike]] = [] - for tool, resolved in resolved_by_tool.items(): + for tool, _resolved in resolved_by_tool.items(): per_context = _computer_cache.get(tool) if per_context is not None: per_context.pop(run_context, None) initializer = _get_computer_initializer(tool) if initializer is not None: - tool.computer = initializer # type: ignore[assignment] + tool.computer = initializer - if resolved.dispose is not None: - disposers.append((resolved.dispose, resolved.computer)) + if _resolved.dispose is not None: + disposers.append((_resolved.dispose, _resolved.computer)) for dispose, computer in disposers: try: @@ -662,7 +657,7 @@ def type(self) -> str: FunctionTool, FileSearchTool, WebSearchTool, - ComputerTool[Any, ComputerLike], + ComputerTool[Any], HostedMCPTool, ShellTool, ApplyPatchTool, diff --git a/tests/test_computer_tool_lifecycle.py b/tests/test_computer_tool_lifecycle.py index 6a1e626d3..258a8588b 100644 --- a/tests/test_computer_tool_lifecycle.py +++ b/tests/test_computer_tool_lifecycle.py @@ -106,7 +106,7 @@ async def test_runner_disposes_computer_after_run() -> None: create = AsyncMock(return_value=created) dispose = AsyncMock() - tool = ComputerTool(computer=ComputerProvider(create=create, dispose=dispose)) + tool = ComputerTool(computer=ComputerProvider[FakeComputer](create=create, dispose=dispose)) model = FakeModel(initial_output=[_make_message("done")]) agent = Agent(name="ComputerAgent", model=model, tools=[tool]) @@ -124,7 +124,7 @@ async def test_streamed_run_disposes_computer_after_completion() -> None: create = AsyncMock(return_value=created) dispose = AsyncMock() - tool = ComputerTool(computer=ComputerProvider(create=create, dispose=dispose)) + tool = ComputerTool(computer=ComputerProvider[FakeComputer](create=create, dispose=dispose)) model = FakeModel(initial_output=[_make_message("done")]) agent = Agent(name="ComputerAgent", model=model, tools=[tool]) @@ -135,6 +135,4 @@ async def test_streamed_run_disposes_computer_after_completion() -> None: assert streamed_result.final_output == "done" create.assert_awaited_once() dispose.assert_awaited_once() - dispose.assert_awaited_with( - run_context=streamed_result.context_wrapper, computer=created - ) + dispose.assert_awaited_with(run_context=streamed_result.context_wrapper, computer=created)