Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 67 additions & 17 deletions examples/tools/computer_use.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
# How to run this example:
# uv run python -m playwright install chromium
# uv run -m examples.tools.computer_use

import asyncio
import base64
from typing import Literal, Union
import sys
from typing import Any, Literal, Union

from playwright.async_api import Browser, Page, Playwright, async_playwright

from agents import (
Agent,
AsyncComputer,
Button,
ComputerProvider,
ComputerTool,
Environment,
ModelSettings,
RunContextWrapper,
Runner,
trace,
)
Expand All @@ -21,21 +28,6 @@
# logging.getLogger("openai.agents").addHandler(logging.StreamHandler())


async def main():
async with LocalPlaywrightComputer() as computer:
with trace("Computer use example"):
agent = Agent(
name="Browser user",
instructions="You are a helpful agent.",
tools=[ComputerTool(computer)],
# Use the computer using model, and set truncation to auto because its required
model="computer-use-preview",
model_settings=ModelSettings(truncation="auto"),
)
result = await Runner.run(agent, "Search for SF sports news and summarize.")
print(result.final_output)


CUA_KEY_TO_PLAYWRIGHT_KEY = {
"/": "Divide",
"\\": "Backslash",
Expand Down Expand Up @@ -93,6 +85,16 @@ async def __aexit__(self, exc_type, exc_val, exc_tb):
await self._browser.close()
if self._playwright:
await self._playwright.stop()
return None

async def open(self) -> "LocalPlaywrightComputer":
"""Open resources without using a context manager."""
await self.__aenter__()
return self

async def close(self) -> None:
"""Close resources without using a context manager."""
await self.__aexit__(None, None, None)

@property
def playwright(self) -> Playwright:
Expand Down Expand Up @@ -164,5 +166,53 @@ async def drag(self, path: list[tuple[int, int]]) -> None:
await self.page.mouse.up()


async def run_agent(
computer_config: ComputerProvider[LocalPlaywrightComputer] | AsyncComputer,
) -> None:
with trace("Computer use example"):
agent = Agent(
name="Browser user",
instructions="You are a helpful agent. Find the current weather in Tokyo.",
tools=[ComputerTool(computer=computer_config)],
# Use the computer using model, and set truncation to auto because it is required.
model="computer-use-preview",
model_settings=ModelSettings(truncation="auto"),
)
result = await Runner.run(agent, "What is the weather in Tokyo right now?")
print(result.final_output)


async def singleton_computer() -> None:
# Use a shared computer when you do not expect to run multiple agents concurrently.
async with LocalPlaywrightComputer() as computer:
await run_agent(computer)


async def computer_per_request() -> None:
# Initialize a new computer per request to avoid sharing state between runs.
async def create_computer(*, run_context: RunContextWrapper[Any]) -> LocalPlaywrightComputer:
print(f"Creating computer for run context: {run_context}")
return await LocalPlaywrightComputer().open()

async def dispose_computer(
*,
run_context: RunContextWrapper[Any],
computer: LocalPlaywrightComputer,
) -> None:
print(f"Disposing computer for run context: {run_context}")
await computer.close()

await run_agent(
ComputerProvider[LocalPlaywrightComputer](
create=create_computer,
dispose=dispose_computer,
)
)


if __name__ == "__main__":
asyncio.run(main())
mode = (sys.argv[1] if len(sys.argv) > 1 else "").lower()
if mode == "singleton":
asyncio.run(singleton_computer())
else:
asyncio.run(computer_per_request())
6 changes: 6 additions & 0 deletions src/agents/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@
from .tool import (
ApplyPatchTool,
CodeInterpreterTool,
ComputerProvider,
ComputerTool,
FileSearchTool,
FunctionTool,
Expand Down Expand Up @@ -116,7 +117,9 @@
ToolOutputTextDict,
WebSearchTool,
default_tool_error_function,
dispose_resolved_computers,
function_tool,
resolve_computer,
)
from .tool_guardrails import (
ToolGuardrailFunctionOutput,
Expand Down Expand Up @@ -301,6 +304,7 @@ def enable_verbose_stdout_logging():
"FunctionTool",
"FunctionToolResult",
"ComputerTool",
"ComputerProvider",
"FileSearchTool",
"CodeInterpreterTool",
"ImageGenerationTool",
Expand Down Expand Up @@ -332,6 +336,8 @@ def enable_verbose_stdout_logging():
"ToolOutputFileContent",
"ToolOutputFileContentDict",
"function_tool",
"resolve_computer",
"dispose_resolved_computers",
"Usage",
"add_trace_processor",
"agent_span",
Expand Down
26 changes: 22 additions & 4 deletions src/agents/_run_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@
ShellResult,
ShellTool,
Tool,
resolve_computer,
)
from .tool_context import ToolContext
from .tool_guardrails import (
Expand Down Expand Up @@ -159,7 +160,7 @@ class ToolRunFunction:
@dataclass
class ToolRunComputerAction:
tool_call: ResponseComputerToolCall
computer_tool: ComputerTool
computer_tool: ComputerTool[Any]


@dataclass
Expand Down Expand Up @@ -461,6 +462,22 @@ def maybe_reset_tool_choice(

return model_settings

@classmethod
async def initialize_computer_tools(
cls,
*,
tools: list[Tool],
context_wrapper: RunContextWrapper[TContext],
) -> None:
"""Resolve computer tools ahead of model invocation so each run gets its own instance."""
computer_tools = [tool for tool in tools if isinstance(tool, ComputerTool)]
if not computer_tools:
return

await asyncio.gather(
*(resolve_computer(tool=tool, run_context=context_wrapper) for tool in computer_tools)
)

@classmethod
def process_model_response(
cls,
Expand Down Expand Up @@ -1529,10 +1546,11 @@ async def execute(
config: RunConfig,
acknowledged_safety_checks: list[ComputerCallOutputAcknowledgedSafetyCheck] | None = None,
) -> RunItem:
computer = await resolve_computer(tool=action.computer_tool, run_context=context_wrapper)
output_func = (
cls._get_screenshot_async(action.computer_tool.computer, action.tool_call)
if isinstance(action.computer_tool.computer, AsyncComputer)
else cls._get_screenshot_sync(action.computer_tool.computer, action.tool_call)
cls._get_screenshot_async(computer, action.tool_call)
if isinstance(computer, AsyncComputer)
else cls._get_screenshot_sync(computer, action.tool_call)
)

_, _, output = await asyncio.gather(
Expand Down
14 changes: 11 additions & 3 deletions src/agents/models/openai_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

from .. import _debug
from ..agent_output import AgentOutputSchemaBase
from ..computer import AsyncComputer, Computer
from ..exceptions import UserError
from ..handoffs import Handoff
from ..items import ItemHelpers, ModelResponse, TResponseInputItem
Expand Down Expand Up @@ -491,11 +492,18 @@ def _convert_tool(cls, tool: Tool) -> tuple[ToolParam, ResponseIncludable | None

includes = "file_search_call.results" if tool.include_search_results else None
elif isinstance(tool, ComputerTool):
computer = tool.computer
if not isinstance(computer, (Computer, AsyncComputer)):
raise UserError(
"Computer tool is not initialized for serialization. Call "
"resolve_computer({ tool, run_context }) with a run context first "
"when building payloads manually."
)
converted_tool = {
"type": "computer_use_preview",
"environment": tool.computer.environment,
"display_width": tool.computer.dimensions[0],
"display_height": tool.computer.dimensions[1],
"environment": computer.environment,
"display_width": computer.dimensions[0],
"display_height": computer.dimensions[1],
}
includes = None
elif isinstance(tool, HostedMCPTool):
Expand Down
16 changes: 15 additions & 1 deletion src/agents/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
RunItemStreamEvent,
StreamEvent,
)
from .tool import Tool
from .tool import Tool, dispose_resolved_computers
from .tool_guardrails import ToolInputGuardrailResult, ToolOutputGuardrailResult
from .tracing import Span, SpanError, agent_span, get_current_trace, trace
from .tracing.span_data import AgentSpanData
Expand Down Expand Up @@ -600,6 +600,9 @@ async def run(
try:
while True:
all_tools = await AgentRunner._get_all_tools(current_agent, context_wrapper)
await RunImpl.initialize_computer_tools(
tools=all_tools, context_wrapper=context_wrapper
)

# Start an agent span if we don't have one. This span is ended if the current
# agent changes, or if the agent loop ends.
Expand Down Expand Up @@ -782,6 +785,10 @@ async def run(
)
raise
finally:
try:
await dispose_resolved_computers(run_context=context_wrapper)
except Exception as error:
logger.warning("Failed to dispose computers after run: %s", error)
if current_span:
current_span.finish(reset_current=True)

Expand Down Expand Up @@ -1113,6 +1120,9 @@ async def _start_streaming(
break

all_tools = await cls._get_all_tools(current_agent, context_wrapper)
await RunImpl.initialize_computer_tools(
tools=all_tools, context_wrapper=context_wrapper
)

# Start an agent span if we don't have one. This span is ended if the current
# agent changes, or if the agent loop ends.
Expand Down Expand Up @@ -1323,6 +1333,10 @@ async def _start_streaming(
logger.debug(
f"Error in streamed_result finalize for agent {current_agent.name} - {e}"
)
try:
await dispose_resolved_computers(run_context=context_wrapper)
except Exception as error:
logger.warning("Failed to dispose computers after streamed run: %s", error)
if current_span:
current_span.finish(reset_current=True)
if streamed_result.trace:
Expand Down
2 changes: 1 addition & 1 deletion src/agents/run_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
TContext = TypeVar("TContext", default=Any)


@dataclass
@dataclass(eq=False)
class RunContextWrapper(Generic[TContext]):
"""This wraps the context object that you passed to `Runner.run()`. It also contains
information about the usage of the agent run so far.
Expand Down
Loading