From e70d52ab12c686b7d4f57baa036a202cdc36db80 Mon Sep 17 00:00:00 2001 From: Vamshi Balanaga Date: Thu, 11 Dec 2025 20:50:05 -0800 Subject: [PATCH 1/2] Add support for responses API; maintain backward compatability --- openevolve/config.py | 12 + openevolve/llm/openai.py | 117 ++++++++- pyproject.toml | 2 +- tests/test_openai_model_detection.py | 87 ++++++ tests/test_reasoning_effort_config.py | 51 +++- tests/test_responses_api.py | 363 ++++++++++++++++++++++++++ 6 files changed, 623 insertions(+), 9 deletions(-) create mode 100644 tests/test_responses_api.py diff --git a/openevolve/config.py b/openevolve/config.py index 543874496..0611992f5 100644 --- a/openevolve/config.py +++ b/openevolve/config.py @@ -78,6 +78,13 @@ class LLMModelConfig: # Reasoning parameters reasoning_effort: Optional[str] = None + # API type selection: "auto" (default), "responses", or "chat_completions" + # - "auto": Use Responses API for OpenAI endpoints, Chat Completions for others + # - "responses": Force use of OpenAI Responses API + # - "chat_completions": Force use of Chat Completions API + # None means inherit from parent config (defaults to "auto") + api_type: Optional[str] = None + def __post_init__(self): """Post-initialization to resolve ${VAR} env var references in api_key""" self.api_key = _resolve_env_var(self.api_key) @@ -116,6 +123,9 @@ class LLMConfig(LLMModelConfig): # Reasoning parameters (inherited from LLMModelConfig but can be overridden) reasoning_effort: Optional[str] = None + # API type for LLM level (defaults to "auto" for auto-detection) + api_type: str = "auto" + def __post_init__(self): """Post-initialization to set up model configurations""" super().__post_init__() # Resolve ${VAR} in api_key at LLMConfig level @@ -170,6 +180,7 @@ def __post_init__(self): "retry_delay": self.retry_delay, "random_seed": self.random_seed, "reasoning_effort": self.reasoning_effort, + "api_type": self.api_type, } self.update_model_params(shared_config) @@ -223,6 +234,7 @@ def rebuild_models(self) -> None: "retry_delay": self.retry_delay, "random_seed": self.random_seed, "reasoning_effort": self.reasoning_effort, + "api_type": self.api_type, } self.update_model_params(shared_config) diff --git a/openevolve/llm/openai.py b/openevolve/llm/openai.py index 48cd81f96..e7f1b7059 100644 --- a/openevolve/llm/openai.py +++ b/openevolve/llm/openai.py @@ -34,6 +34,7 @@ def __init__( self.api_key = model_cfg.api_key self.random_seed = getattr(model_cfg, "random_seed", None) self.reasoning_effort = getattr(model_cfg, "reasoning_effort", None) + self.api_type = getattr(model_cfg, "api_type", "auto") # Set up API client # OpenAI client requires max_retries to be int, not None @@ -45,6 +46,9 @@ def __init__( max_retries=max_retries, ) + # Determine which API to use (Responses API vs Chat Completions) + self.use_responses_api = self._should_use_responses_api() + # Only log unique models to reduce duplication if not hasattr(logger, "_initialized_models"): logger._initialized_models = set() @@ -53,6 +57,39 @@ def __init__( logger.info(f"Initialized OpenAI LLM with model: {self.model}") logger._initialized_models.add(self.model) + def _should_use_responses_api(self) -> bool: + """ + Determine if the Responses API should be used instead of Chat Completions. + + The Responses API is only available on OpenAI's official endpoints. + For other providers (OpenRouter, Google AI Studio, local servers, etc.), + we must use the Chat Completions API for compatibility. + + Returns: + True if Responses API should be used, False for Chat Completions + """ + # Normalize api_type (None defaults to "auto") + api_type = self.api_type if self.api_type is not None else "auto" + + # Check for explicit override + if api_type == "responses": + return True + if api_type == "chat_completions": + return False + + # Auto-detect based on API base URL + if not self.api_base: + return False + + api_lower = self.api_base.lower() + + # Only use Responses API for official OpenAI endpoints + return ( + api_lower.startswith("https://api.openai.com") or + api_lower.startswith("https://eu.api.openai.com") or + api_lower.startswith("https://apac.api.openai.com") + ) + async def generate(self, prompt: str, **kwargs) -> str: """Generate text from a prompt""" return await self.generate_with_context( @@ -159,14 +196,82 @@ async def generate_with_context( raise async def _call_api(self, params: Dict[str, Any]) -> str: - """Make the actual API call""" + """Make the actual API call, dispatching to appropriate API""" # Use asyncio to run the blocking API call in a thread pool loop = asyncio.get_event_loop() - response = await loop.run_in_executor( - None, lambda: self.client.chat.completions.create(**params) - ) + + if self.use_responses_api: + response = await loop.run_in_executor( + None, lambda: self._call_responses_api(params) + ) + response_text = response.output_text + else: + response = await loop.run_in_executor( + None, lambda: self.client.chat.completions.create(**params) + ) + response_text = response.choices[0].message.content + # Logging of system prompt, user message and response content logger = logging.getLogger(__name__) logger.debug(f"API parameters: {params}") - logger.debug(f"API response: {response.choices[0].message.content}") - return response.choices[0].message.content + logger.debug(f"API response: {response_text}") + return response_text + + def _call_responses_api(self, chat_params: Dict[str, Any]) -> Any: + """ + Convert Chat Completions params to Responses API format and make the call. + + The Responses API uses a different parameter structure: + - 'messages' -> 'input' (can be array of messages) + - System message in 'messages' -> 'instructions' parameter + - 'max_tokens'/'max_completion_tokens' -> 'max_output_tokens' + - 'reasoning_effort' -> 'reasoning: {"effort": ...}' + + Args: + chat_params: Parameters in Chat Completions format + + Returns: + Response object from client.responses.create() + """ + messages = chat_params["messages"] + + # Extract system message as instructions, keep other messages as input + instructions = None + input_messages = [] + for msg in messages: + if msg["role"] == "system": + instructions = msg["content"] + else: + input_messages.append(msg) + + # Build Responses API params + resp_params = { + "model": chat_params["model"], + "input": input_messages, + } + + if instructions: + resp_params["instructions"] = instructions + + # Map token limits (Responses API uses max_output_tokens) + if "max_completion_tokens" in chat_params: + resp_params["max_output_tokens"] = chat_params["max_completion_tokens"] + elif "max_tokens" in chat_params: + resp_params["max_output_tokens"] = chat_params["max_tokens"] + + # Map sampling parameters + if "temperature" in chat_params: + resp_params["temperature"] = chat_params["temperature"] + if "top_p" in chat_params: + resp_params["top_p"] = chat_params["top_p"] + if "seed" in chat_params: + resp_params["seed"] = chat_params["seed"] + + # Map reasoning_effort to nested format for Responses API + if "reasoning_effort" in chat_params: + resp_params["reasoning"] = {"effort": chat_params["reasoning_effort"]} + + # Disable conversation storage (not needed for OpenEvolve's use case) + resp_params["store"] = False + + return self.client.responses.create(**resp_params) diff --git a/pyproject.toml b/pyproject.toml index 8bf564feb..a43c57621 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ authors = [ {name = "codelion"} ] dependencies = [ - "openai>=1.0.0", + "openai>=1.80.0", # Required for Responses API "pyyaml>=6.0", "numpy>=1.22.0", "tqdm>=4.64.0", diff --git a/tests/test_openai_model_detection.py b/tests/test_openai_model_detection.py index c8665abd0..0247b2c4d 100644 --- a/tests/test_openai_model_detection.py +++ b/tests/test_openai_model_detection.py @@ -94,5 +94,92 @@ def is_reasoning_model(model_name, api_base): ) +class TestResponsesAPIDetection(unittest.TestCase): + """Test Responses API vs Chat Completions API selection logic""" + + def _should_use_responses_api(self, api_base, api_type="auto"): + """Test function that mimics the logic in openai.py""" + # Check for explicit override + if api_type == "responses": + return True + if api_type == "chat_completions": + return False + + # Auto-detect based on API base URL + if not api_base: + return False + + api_lower = api_base.lower() + + # Only use Responses API for official OpenAI endpoints + return ( + api_lower.startswith("https://api.openai.com") or + api_lower.startswith("https://eu.api.openai.com") or + api_lower.startswith("https://apac.api.openai.com") + ) + + def test_openai_endpoints_use_responses_api(self): + """Test that official OpenAI endpoints use Responses API by default""" + test_cases = [ + ("https://api.openai.com/v1", True, "Main OpenAI endpoint"), + ("https://api.openai.com", True, "OpenAI without path"), + ("https://eu.api.openai.com/v1", True, "EU endpoint"), + ("https://apac.api.openai.com/v1", True, "APAC endpoint"), + ("https://API.OPENAI.COM/v1", True, "Uppercase URL"), + ] + + for api_base, expected, description in test_cases: + with self.subTest(api_base=api_base, desc=description): + result = self._should_use_responses_api(api_base) + self.assertEqual( + result, + expected, + f"API base '{api_base}' ({description}): expected {expected}, got {result}", + ) + + def test_non_openai_endpoints_use_chat_completions(self): + """Test that non-OpenAI endpoints use Chat Completions API""" + test_cases = [ + ("https://generativelanguage.googleapis.com/v1beta/openai/", False, "Google AI Studio"), + ("https://openrouter.ai/api/v1", False, "OpenRouter"), + ("http://localhost:8000/v1", False, "Local server"), + ("https://api.anthropic.com/v1", False, "Anthropic"), + ("https://api.deepseek.com/v1", False, "DeepSeek"), + (None, False, "None API base"), + ("", False, "Empty API base"), + ] + + for api_base, expected, description in test_cases: + with self.subTest(api_base=api_base, desc=description): + result = self._should_use_responses_api(api_base) + self.assertEqual( + result, + expected, + f"API base '{api_base}' ({description}): expected {expected}, got {result}", + ) + + def test_explicit_api_type_override(self): + """Test that api_type override works correctly""" + # Force responses API even for non-OpenAI endpoint + self.assertTrue( + self._should_use_responses_api("http://localhost:8000/v1", api_type="responses") + ) + + # Force chat completions even for OpenAI endpoint + self.assertFalse( + self._should_use_responses_api("https://api.openai.com/v1", api_type="chat_completions") + ) + + # Auto detection with OpenAI endpoint + self.assertTrue( + self._should_use_responses_api("https://api.openai.com/v1", api_type="auto") + ) + + # Auto detection with non-OpenAI endpoint + self.assertFalse( + self._should_use_responses_api("http://localhost:8000/v1", api_type="auto") + ) + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_reasoning_effort_config.py b/tests/test_reasoning_effort_config.py index 584c7ddfd..b4bd79b8a 100644 --- a/tests/test_reasoning_effort_config.py +++ b/tests/test_reasoning_effort_config.py @@ -132,6 +132,7 @@ def test_openai_llm_uses_reasoning_effort(self): model_cfg.api_key = "test-key" model_cfg.random_seed = None model_cfg.reasoning_effort = "high" + model_cfg.api_type = "chat_completions" # Force Chat Completions API for this test # Mock OpenAI client to avoid actual API calls with unittest.mock.patch('openai.OpenAI'): @@ -140,8 +141,8 @@ def test_openai_llm_uses_reasoning_effort(self): # Verify the reasoning_effort is stored self.assertEqual(llm.reasoning_effort, "high") - def test_reasoning_effort_passed_to_api_params(self): - """Test that reasoning_effort is included in API call parameters""" + def test_reasoning_effort_passed_to_api_params_chat_completions(self): + """Test that reasoning_effort is included in API call parameters (Chat Completions)""" model_cfg = Mock() model_cfg.name = "gpt-oss-120b" model_cfg.system_message = "system" @@ -155,6 +156,7 @@ def test_reasoning_effort_passed_to_api_params(self): model_cfg.api_key = "test-key" model_cfg.random_seed = None model_cfg.reasoning_effort = "medium" + model_cfg.api_type = "chat_completions" # Force Chat Completions API for this test with unittest.mock.patch('openai.OpenAI'): llm = OpenAILLM(model_cfg) @@ -178,6 +180,51 @@ def test_reasoning_effort_passed_to_api_params(self): # Verify the API was called with reasoning_effort llm.client.chat.completions.create.assert_called_once_with(**test_params) + def test_reasoning_effort_passed_to_responses_api(self): + """Test that reasoning_effort is converted to nested format for Responses API""" + model_cfg = Mock() + model_cfg.name = "gpt-oss-120b" + model_cfg.system_message = "system" + model_cfg.temperature = 0.7 + model_cfg.top_p = 0.95 + model_cfg.max_tokens = 4096 + model_cfg.timeout = 60 + model_cfg.retries = 3 + model_cfg.retry_delay = 5 + model_cfg.api_base = "https://api.openai.com/v1" + model_cfg.api_key = "test-key" + model_cfg.random_seed = None + model_cfg.reasoning_effort = "medium" + model_cfg.api_type = "responses" # Force Responses API for this test + + with unittest.mock.patch('openai.OpenAI'): + llm = OpenAILLM(model_cfg) + + # Test the _call_api method directly with mocked client + mock_response = Mock() + mock_response.output_text = "Test response" + llm.client.responses.create.return_value = mock_response + + # Input params in Chat Completions format + test_params = { + "model": "gpt-oss-120b", + "messages": [{"role": "system", "content": "Test"}, {"role": "user", "content": "Test user"}], + "max_completion_tokens": 4096, + "reasoning_effort": "medium" + } + + result = asyncio.run(llm._call_api(test_params)) + + # Verify the Responses API was called with nested reasoning format + llm.client.responses.create.assert_called_once() + call_args = llm.client.responses.create.call_args + self.assertEqual(call_args.kwargs["model"], "gpt-oss-120b") + self.assertEqual(call_args.kwargs["instructions"], "Test") + self.assertEqual(call_args.kwargs["input"], [{"role": "user", "content": "Test user"}]) + self.assertEqual(call_args.kwargs["reasoning"], {"effort": "medium"}) + self.assertEqual(call_args.kwargs["max_output_tokens"], 4096) + self.assertFalse(call_args.kwargs["store"]) + def test_yaml_file_loading_with_reasoning_effort(self): """Test loading reasoning_effort from actual YAML file""" yaml_content = """ diff --git a/tests/test_responses_api.py b/tests/test_responses_api.py new file mode 100644 index 000000000..c1bf12843 --- /dev/null +++ b/tests/test_responses_api.py @@ -0,0 +1,363 @@ +""" +Tests for OpenAI Responses API migration + +This module tests the Responses API integration, including: +- Parameter conversion from Chat Completions format to Responses API format +- API selection logic based on endpoint and api_type config +- Response parsing differences between the two APIs +""" + +import unittest +import asyncio +from unittest.mock import Mock, patch + + +class TestResponsesAPIParameterConversion(unittest.TestCase): + """Test that Chat Completions parameters are correctly converted to Responses API format""" + + def setUp(self): + """Set up test fixtures""" + from openevolve.llm.openai import OpenAILLM + + self.model_cfg = Mock() + self.model_cfg.name = "gpt-4o" + self.model_cfg.system_message = "You are a helpful assistant" + self.model_cfg.temperature = 0.7 + self.model_cfg.top_p = 0.95 + self.model_cfg.max_tokens = 4096 + self.model_cfg.timeout = 60 + self.model_cfg.retries = 3 + self.model_cfg.retry_delay = 5 + self.model_cfg.api_base = "https://api.openai.com/v1" + self.model_cfg.api_key = "test-key" + self.model_cfg.random_seed = None + self.model_cfg.reasoning_effort = None + self.model_cfg.api_type = "responses" # Force Responses API + + def test_messages_to_input_conversion(self): + """Test that messages array is converted to input parameter""" + with patch('openai.OpenAI'): + from openevolve.llm.openai import OpenAILLM + llm = OpenAILLM(self.model_cfg) + + mock_response = Mock() + mock_response.output_text = "Test response" + llm.client.responses.create.return_value = mock_response + + chat_params = { + "model": "gpt-4o", + "messages": [ + {"role": "system", "content": "Be helpful"}, + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there!"}, + {"role": "user", "content": "How are you?"} + ], + "temperature": 0.7, + "max_tokens": 100 + } + + asyncio.run(llm._call_api(chat_params)) + + call_args = llm.client.responses.create.call_args.kwargs + + # System message should become instructions + self.assertEqual(call_args["instructions"], "Be helpful") + + # Other messages should be in input array (excluding system) + expected_input = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there!"}, + {"role": "user", "content": "How are you?"} + ] + self.assertEqual(call_args["input"], expected_input) + + def test_max_tokens_conversion(self): + """Test that max_tokens is converted to max_output_tokens""" + with patch('openai.OpenAI'): + from openevolve.llm.openai import OpenAILLM + llm = OpenAILLM(self.model_cfg) + + mock_response = Mock() + mock_response.output_text = "Test" + llm.client.responses.create.return_value = mock_response + + # Test with max_tokens + chat_params = { + "model": "gpt-4o", + "messages": [{"role": "user", "content": "Hi"}], + "max_tokens": 500 + } + + asyncio.run(llm._call_api(chat_params)) + call_args = llm.client.responses.create.call_args.kwargs + self.assertEqual(call_args["max_output_tokens"], 500) + + def test_max_completion_tokens_conversion(self): + """Test that max_completion_tokens takes precedence over max_tokens""" + with patch('openai.OpenAI'): + from openevolve.llm.openai import OpenAILLM + llm = OpenAILLM(self.model_cfg) + + mock_response = Mock() + mock_response.output_text = "Test" + llm.client.responses.create.return_value = mock_response + + # Test with max_completion_tokens (should take precedence) + chat_params = { + "model": "gpt-4o", + "messages": [{"role": "user", "content": "Hi"}], + "max_tokens": 500, + "max_completion_tokens": 1000 + } + + asyncio.run(llm._call_api(chat_params)) + call_args = llm.client.responses.create.call_args.kwargs + self.assertEqual(call_args["max_output_tokens"], 1000) + + def test_reasoning_effort_nested_format(self): + """Test that reasoning_effort is converted to nested reasoning object""" + with patch('openai.OpenAI'): + from openevolve.llm.openai import OpenAILLM + llm = OpenAILLM(self.model_cfg) + + mock_response = Mock() + mock_response.output_text = "Test" + llm.client.responses.create.return_value = mock_response + + chat_params = { + "model": "o3-mini", + "messages": [{"role": "user", "content": "Think hard"}], + "reasoning_effort": "high" + } + + asyncio.run(llm._call_api(chat_params)) + call_args = llm.client.responses.create.call_args.kwargs + self.assertEqual(call_args["reasoning"], {"effort": "high"}) + + def test_store_disabled(self): + """Test that store is set to False for OpenEvolve use case""" + with patch('openai.OpenAI'): + from openevolve.llm.openai import OpenAILLM + llm = OpenAILLM(self.model_cfg) + + mock_response = Mock() + mock_response.output_text = "Test" + llm.client.responses.create.return_value = mock_response + + chat_params = { + "model": "gpt-4o", + "messages": [{"role": "user", "content": "Hi"}] + } + + asyncio.run(llm._call_api(chat_params)) + call_args = llm.client.responses.create.call_args.kwargs + self.assertFalse(call_args["store"]) + + def test_sampling_params_preserved(self): + """Test that temperature, top_p, and seed are preserved""" + with patch('openai.OpenAI'): + from openevolve.llm.openai import OpenAILLM + llm = OpenAILLM(self.model_cfg) + + mock_response = Mock() + mock_response.output_text = "Test" + llm.client.responses.create.return_value = mock_response + + chat_params = { + "model": "gpt-4o", + "messages": [{"role": "user", "content": "Hi"}], + "temperature": 0.5, + "top_p": 0.9, + "seed": 42 + } + + asyncio.run(llm._call_api(chat_params)) + call_args = llm.client.responses.create.call_args.kwargs + self.assertEqual(call_args["temperature"], 0.5) + self.assertEqual(call_args["top_p"], 0.9) + self.assertEqual(call_args["seed"], 42) + + +class TestAPISelectionInOpenAILLM(unittest.TestCase): + """Test the API selection logic in the OpenAILLM class""" + + def _create_model_cfg(self, api_base, api_type="auto"): + """Helper to create a mock model config""" + model_cfg = Mock() + model_cfg.name = "gpt-4o" + model_cfg.system_message = "test" + model_cfg.temperature = 0.7 + model_cfg.top_p = 0.95 + model_cfg.max_tokens = 4096 + model_cfg.timeout = 60 + model_cfg.retries = 3 + model_cfg.retry_delay = 5 + model_cfg.api_base = api_base + model_cfg.api_key = "test-key" + model_cfg.random_seed = None + model_cfg.reasoning_effort = None + model_cfg.api_type = api_type + return model_cfg + + def test_openai_endpoint_uses_responses_api(self): + """Test that OpenAI endpoints use Responses API by default""" + with patch('openai.OpenAI'): + from openevolve.llm.openai import OpenAILLM + + llm = OpenAILLM(self._create_model_cfg("https://api.openai.com/v1")) + self.assertTrue(llm.use_responses_api) + + llm = OpenAILLM(self._create_model_cfg("https://eu.api.openai.com/v1")) + self.assertTrue(llm.use_responses_api) + + llm = OpenAILLM(self._create_model_cfg("https://apac.api.openai.com/v1")) + self.assertTrue(llm.use_responses_api) + + def test_non_openai_endpoint_uses_chat_completions(self): + """Test that non-OpenAI endpoints use Chat Completions API""" + with patch('openai.OpenAI'): + from openevolve.llm.openai import OpenAILLM + + llm = OpenAILLM(self._create_model_cfg("https://openrouter.ai/api/v1")) + self.assertFalse(llm.use_responses_api) + + llm = OpenAILLM(self._create_model_cfg("http://localhost:8000/v1")) + self.assertFalse(llm.use_responses_api) + + llm = OpenAILLM(self._create_model_cfg("https://generativelanguage.googleapis.com/v1beta/openai/")) + self.assertFalse(llm.use_responses_api) + + def test_api_type_override_forces_responses(self): + """Test that api_type='responses' forces Responses API""" + with patch('openai.OpenAI'): + from openevolve.llm.openai import OpenAILLM + + # Non-OpenAI endpoint with responses override + llm = OpenAILLM(self._create_model_cfg("http://localhost:8000/v1", api_type="responses")) + self.assertTrue(llm.use_responses_api) + + def test_api_type_override_forces_chat_completions(self): + """Test that api_type='chat_completions' forces Chat Completions API""" + with patch('openai.OpenAI'): + from openevolve.llm.openai import OpenAILLM + + # OpenAI endpoint with chat_completions override + llm = OpenAILLM(self._create_model_cfg("https://api.openai.com/v1", api_type="chat_completions")) + self.assertFalse(llm.use_responses_api) + + +class TestResponsesAPIResponseParsing(unittest.TestCase): + """Test that responses from both APIs are correctly parsed""" + + def _create_model_cfg(self, api_type): + """Helper to create a mock model config""" + model_cfg = Mock() + model_cfg.name = "gpt-4o" + model_cfg.system_message = "test" + model_cfg.temperature = 0.7 + model_cfg.top_p = 0.95 + model_cfg.max_tokens = 4096 + model_cfg.timeout = 60 + model_cfg.retries = 3 + model_cfg.retry_delay = 5 + model_cfg.api_base = "https://api.openai.com/v1" + model_cfg.api_key = "test-key" + model_cfg.random_seed = None + model_cfg.reasoning_effort = None + model_cfg.api_type = api_type + return model_cfg + + def test_responses_api_output_text(self): + """Test that Responses API response.output_text is returned""" + with patch('openai.OpenAI'): + from openevolve.llm.openai import OpenAILLM + llm = OpenAILLM(self._create_model_cfg("responses")) + + mock_response = Mock() + mock_response.output_text = "This is from Responses API" + llm.client.responses.create.return_value = mock_response + + result = asyncio.run(llm._call_api({ + "model": "gpt-4o", + "messages": [{"role": "user", "content": "Hi"}] + })) + + self.assertEqual(result, "This is from Responses API") + + def test_chat_completions_message_content(self): + """Test that Chat Completions response.choices[0].message.content is returned""" + with patch('openai.OpenAI'): + from openevolve.llm.openai import OpenAILLM + llm = OpenAILLM(self._create_model_cfg("chat_completions")) + + mock_response = Mock() + mock_response.choices = [Mock()] + mock_response.choices[0].message.content = "This is from Chat Completions" + llm.client.chat.completions.create.return_value = mock_response + + result = asyncio.run(llm._call_api({ + "model": "gpt-4o", + "messages": [{"role": "user", "content": "Hi"}] + })) + + self.assertEqual(result, "This is from Chat Completions") + + +class TestConfigWithAPIType(unittest.TestCase): + """Test that api_type config option works correctly""" + + def test_api_type_default_is_none_for_model(self): + """Test that api_type defaults to None in LLMModelConfig (inherits from parent)""" + from openevolve.config import LLMModelConfig + + config = LLMModelConfig() + self.assertIsNone(config.api_type) + + def test_api_type_default_is_auto_for_llm(self): + """Test that api_type defaults to 'auto' in LLMConfig""" + from openevolve.config import LLMConfig + + config = LLMConfig() + self.assertEqual(config.api_type, "auto") + + def test_api_type_in_shared_config(self): + """Test that api_type is propagated to models via shared config""" + from openevolve.config import Config + + yaml_config = { + "llm": { + "api_base": "https://api.openai.com/v1", + "api_key": "test-key", + "api_type": "chat_completions", # Force chat completions at LLM level + "models": [{"name": "gpt-4o", "weight": 1.0}] + } + } + + config = Config.from_dict(yaml_config) + + # Model should inherit api_type from LLM config + self.assertEqual(config.llm.models[0].api_type, "chat_completions") + + def test_api_type_model_override(self): + """Test that model-level api_type overrides LLM-level""" + from openevolve.config import Config + + yaml_config = { + "llm": { + "api_base": "https://api.openai.com/v1", + "api_key": "test-key", + "api_type": "chat_completions", + "models": [ + {"name": "gpt-4o", "weight": 1.0, "api_type": "responses"} # Override + ] + } + } + + config = Config.from_dict(yaml_config) + + # Model-level override should take precedence + self.assertEqual(config.llm.models[0].api_type, "responses") + + +if __name__ == "__main__": + unittest.main() From d0960c6f795888b51ca6c23030a94b77b83f6864 Mon Sep 17 00:00:00 2001 From: Vamshi Balanaga Date: Thu, 11 Dec 2025 21:03:03 -0800 Subject: [PATCH 2/2] Add new fields to default config --- configs/default_config.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/configs/default_config.yaml b/configs/default_config.yaml index 928465bf5..7fcc5c0ed 100644 --- a/configs/default_config.yaml +++ b/configs/default_config.yaml @@ -41,11 +41,13 @@ llm: api_key: null # API key (defaults to OPENAI_API_KEY env variable) # or use ${VAR} syntax to specify which environment variable to read from: # api_key: ${GEMINI_API_KEY} # Reads API key from $GEMINI_API_KEY + api_type: "auto" # API type: "auto, "responses" or "chat_completions" # Generation parameters temperature: 0.7 # Temperature for generation (higher = more creative) top_p: 0.95 # Top-p sampling parameter max_tokens: 4096 # Maximum tokens to generate + reasoning_effort: "medium" # Reasoning effort: "low", "medium", "high", "xhigh" # Request parameters timeout: 60 # Timeout for API requests in seconds