From e70d52ab12c686b7d4f57baa036a202cdc36db80 Mon Sep 17 00:00:00 2001
From: Vamshi Balanaga <vamshi@partcl.com>
Date: Thu, 11 Dec 2025 20:50:05 -0800
Subject: [PATCH 1/2] Add support for responses API; maintain backward
 compatability

---
 openevolve/config.py                  |  12 +
 openevolve/llm/openai.py              | 117 ++++++++-
 pyproject.toml                        |   2 +-
 tests/test_openai_model_detection.py  |  87 ++++++
 tests/test_reasoning_effort_config.py |  51 +++-
 tests/test_responses_api.py           | 363 ++++++++++++++++++++++++++
 6 files changed, 623 insertions(+), 9 deletions(-)
 create mode 100644 tests/test_responses_api.py

diff --git a/openevolve/config.py b/openevolve/config.py
index 543874496..0611992f5 100644
--- a/openevolve/config.py
+++ b/openevolve/config.py
@@ -78,6 +78,13 @@ class LLMModelConfig:
     # Reasoning parameters
     reasoning_effort: Optional[str] = None
 
+    # API type selection: "auto" (default), "responses", or "chat_completions"
+    # - "auto": Use Responses API for OpenAI endpoints, Chat Completions for others
+    # - "responses": Force use of OpenAI Responses API
+    # - "chat_completions": Force use of Chat Completions API
+    # None means inherit from parent config (defaults to "auto")
+    api_type: Optional[str] = None
+
     def __post_init__(self):
         """Post-initialization to resolve ${VAR} env var references in api_key"""
         self.api_key = _resolve_env_var(self.api_key)
@@ -116,6 +123,9 @@ class LLMConfig(LLMModelConfig):
     # Reasoning parameters (inherited from LLMModelConfig but can be overridden)
     reasoning_effort: Optional[str] = None
 
+    # API type for LLM level (defaults to "auto" for auto-detection)
+    api_type: str = "auto"
+
     def __post_init__(self):
         """Post-initialization to set up model configurations"""
         super().__post_init__()  # Resolve ${VAR} in api_key at LLMConfig level
@@ -170,6 +180,7 @@ def __post_init__(self):
             "retry_delay": self.retry_delay,
             "random_seed": self.random_seed,
             "reasoning_effort": self.reasoning_effort,
+            "api_type": self.api_type,
         }
         self.update_model_params(shared_config)
 
@@ -223,6 +234,7 @@ def rebuild_models(self) -> None:
             "retry_delay": self.retry_delay,
             "random_seed": self.random_seed,
             "reasoning_effort": self.reasoning_effort,
+            "api_type": self.api_type,
         }
         self.update_model_params(shared_config)
 
diff --git a/openevolve/llm/openai.py b/openevolve/llm/openai.py
index 48cd81f96..e7f1b7059 100644
--- a/openevolve/llm/openai.py
+++ b/openevolve/llm/openai.py
@@ -34,6 +34,7 @@ def __init__(
         self.api_key = model_cfg.api_key
         self.random_seed = getattr(model_cfg, "random_seed", None)
         self.reasoning_effort = getattr(model_cfg, "reasoning_effort", None)
+        self.api_type = getattr(model_cfg, "api_type", "auto")
 
         # Set up API client
         # OpenAI client requires max_retries to be int, not None
@@ -45,6 +46,9 @@ def __init__(
             max_retries=max_retries,
         )
 
+        # Determine which API to use (Responses API vs Chat Completions)
+        self.use_responses_api = self._should_use_responses_api()
+
         # Only log unique models to reduce duplication
         if not hasattr(logger, "_initialized_models"):
             logger._initialized_models = set()
@@ -53,6 +57,39 @@ def __init__(
             logger.info(f"Initialized OpenAI LLM with model: {self.model}")
             logger._initialized_models.add(self.model)
 
+    def _should_use_responses_api(self) -> bool:
+        """
+        Determine if the Responses API should be used instead of Chat Completions.
+        
+        The Responses API is only available on OpenAI's official endpoints.
+        For other providers (OpenRouter, Google AI Studio, local servers, etc.),
+        we must use the Chat Completions API for compatibility.
+        
+        Returns:
+            True if Responses API should be used, False for Chat Completions
+        """
+        # Normalize api_type (None defaults to "auto")
+        api_type = self.api_type if self.api_type is not None else "auto"
+        
+        # Check for explicit override
+        if api_type == "responses":
+            return True
+        if api_type == "chat_completions":
+            return False
+        
+        # Auto-detect based on API base URL
+        if not self.api_base:
+            return False
+        
+        api_lower = self.api_base.lower()
+        
+        # Only use Responses API for official OpenAI endpoints
+        return (
+            api_lower.startswith("https://api.openai.com") or
+            api_lower.startswith("https://eu.api.openai.com") or
+            api_lower.startswith("https://apac.api.openai.com")
+        )
+
     async def generate(self, prompt: str, **kwargs) -> str:
         """Generate text from a prompt"""
         return await self.generate_with_context(
@@ -159,14 +196,82 @@ async def generate_with_context(
                     raise
 
     async def _call_api(self, params: Dict[str, Any]) -> str:
-        """Make the actual API call"""
+        """Make the actual API call, dispatching to appropriate API"""
         # Use asyncio to run the blocking API call in a thread pool
         loop = asyncio.get_event_loop()
-        response = await loop.run_in_executor(
-            None, lambda: self.client.chat.completions.create(**params)
-        )
+
+        if self.use_responses_api:
+            response = await loop.run_in_executor(
+                None, lambda: self._call_responses_api(params)
+            )
+            response_text = response.output_text
+        else:
+            response = await loop.run_in_executor(
+                None, lambda: self.client.chat.completions.create(**params)
+            )
+            response_text = response.choices[0].message.content
+
         # Logging of system prompt, user message and response content
         logger = logging.getLogger(__name__)
         logger.debug(f"API parameters: {params}")
-        logger.debug(f"API response: {response.choices[0].message.content}")
-        return response.choices[0].message.content
+        logger.debug(f"API response: {response_text}")
+        return response_text
+
+    def _call_responses_api(self, chat_params: Dict[str, Any]) -> Any:
+        """
+        Convert Chat Completions params to Responses API format and make the call.
+        
+        The Responses API uses a different parameter structure:
+        - 'messages' -> 'input' (can be array of messages)
+        - System message in 'messages' -> 'instructions' parameter
+        - 'max_tokens'/'max_completion_tokens' -> 'max_output_tokens'
+        - 'reasoning_effort' -> 'reasoning: {"effort": ...}'
+        
+        Args:
+            chat_params: Parameters in Chat Completions format
+            
+        Returns:
+            Response object from client.responses.create()
+        """
+        messages = chat_params["messages"]
+
+        # Extract system message as instructions, keep other messages as input
+        instructions = None
+        input_messages = []
+        for msg in messages:
+            if msg["role"] == "system":
+                instructions = msg["content"]
+            else:
+                input_messages.append(msg)
+
+        # Build Responses API params
+        resp_params = {
+            "model": chat_params["model"],
+            "input": input_messages,
+        }
+
+        if instructions:
+            resp_params["instructions"] = instructions
+
+        # Map token limits (Responses API uses max_output_tokens)
+        if "max_completion_tokens" in chat_params:
+            resp_params["max_output_tokens"] = chat_params["max_completion_tokens"]
+        elif "max_tokens" in chat_params:
+            resp_params["max_output_tokens"] = chat_params["max_tokens"]
+
+        # Map sampling parameters
+        if "temperature" in chat_params:
+            resp_params["temperature"] = chat_params["temperature"]
+        if "top_p" in chat_params:
+            resp_params["top_p"] = chat_params["top_p"]
+        if "seed" in chat_params:
+            resp_params["seed"] = chat_params["seed"]
+
+        # Map reasoning_effort to nested format for Responses API
+        if "reasoning_effort" in chat_params:
+            resp_params["reasoning"] = {"effort": chat_params["reasoning_effort"]}
+
+        # Disable conversation storage (not needed for OpenEvolve's use case)
+        resp_params["store"] = False
+
+        return self.client.responses.create(**resp_params)
diff --git a/pyproject.toml b/pyproject.toml
index 8bf564feb..a43c57621 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,7 +13,7 @@ authors = [
     {name = "codelion"}
 ]
 dependencies = [
-    "openai>=1.0.0",
+    "openai>=1.80.0",  # Required for Responses API
     "pyyaml>=6.0",
     "numpy>=1.22.0",
     "tqdm>=4.64.0",
diff --git a/tests/test_openai_model_detection.py b/tests/test_openai_model_detection.py
index c8665abd0..0247b2c4d 100644
--- a/tests/test_openai_model_detection.py
+++ b/tests/test_openai_model_detection.py
@@ -94,5 +94,92 @@ def is_reasoning_model(model_name, api_base):
                 )
 
 
+class TestResponsesAPIDetection(unittest.TestCase):
+    """Test Responses API vs Chat Completions API selection logic"""
+
+    def _should_use_responses_api(self, api_base, api_type="auto"):
+        """Test function that mimics the logic in openai.py"""
+        # Check for explicit override
+        if api_type == "responses":
+            return True
+        if api_type == "chat_completions":
+            return False
+        
+        # Auto-detect based on API base URL
+        if not api_base:
+            return False
+        
+        api_lower = api_base.lower()
+        
+        # Only use Responses API for official OpenAI endpoints
+        return (
+            api_lower.startswith("https://api.openai.com") or
+            api_lower.startswith("https://eu.api.openai.com") or
+            api_lower.startswith("https://apac.api.openai.com")
+        )
+
+    def test_openai_endpoints_use_responses_api(self):
+        """Test that official OpenAI endpoints use Responses API by default"""
+        test_cases = [
+            ("https://api.openai.com/v1", True, "Main OpenAI endpoint"),
+            ("https://api.openai.com", True, "OpenAI without path"),
+            ("https://eu.api.openai.com/v1", True, "EU endpoint"),
+            ("https://apac.api.openai.com/v1", True, "APAC endpoint"),
+            ("https://API.OPENAI.COM/v1", True, "Uppercase URL"),
+        ]
+
+        for api_base, expected, description in test_cases:
+            with self.subTest(api_base=api_base, desc=description):
+                result = self._should_use_responses_api(api_base)
+                self.assertEqual(
+                    result,
+                    expected,
+                    f"API base '{api_base}' ({description}): expected {expected}, got {result}",
+                )
+
+    def test_non_openai_endpoints_use_chat_completions(self):
+        """Test that non-OpenAI endpoints use Chat Completions API"""
+        test_cases = [
+            ("https://generativelanguage.googleapis.com/v1beta/openai/", False, "Google AI Studio"),
+            ("https://openrouter.ai/api/v1", False, "OpenRouter"),
+            ("http://localhost:8000/v1", False, "Local server"),
+            ("https://api.anthropic.com/v1", False, "Anthropic"),
+            ("https://api.deepseek.com/v1", False, "DeepSeek"),
+            (None, False, "None API base"),
+            ("", False, "Empty API base"),
+        ]
+
+        for api_base, expected, description in test_cases:
+            with self.subTest(api_base=api_base, desc=description):
+                result = self._should_use_responses_api(api_base)
+                self.assertEqual(
+                    result,
+                    expected,
+                    f"API base '{api_base}' ({description}): expected {expected}, got {result}",
+                )
+
+    def test_explicit_api_type_override(self):
+        """Test that api_type override works correctly"""
+        # Force responses API even for non-OpenAI endpoint
+        self.assertTrue(
+            self._should_use_responses_api("http://localhost:8000/v1", api_type="responses")
+        )
+        
+        # Force chat completions even for OpenAI endpoint
+        self.assertFalse(
+            self._should_use_responses_api("https://api.openai.com/v1", api_type="chat_completions")
+        )
+        
+        # Auto detection with OpenAI endpoint
+        self.assertTrue(
+            self._should_use_responses_api("https://api.openai.com/v1", api_type="auto")
+        )
+        
+        # Auto detection with non-OpenAI endpoint
+        self.assertFalse(
+            self._should_use_responses_api("http://localhost:8000/v1", api_type="auto")
+        )
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_reasoning_effort_config.py b/tests/test_reasoning_effort_config.py
index 584c7ddfd..b4bd79b8a 100644
--- a/tests/test_reasoning_effort_config.py
+++ b/tests/test_reasoning_effort_config.py
@@ -132,6 +132,7 @@ def test_openai_llm_uses_reasoning_effort(self):
         model_cfg.api_key = "test-key"
         model_cfg.random_seed = None
         model_cfg.reasoning_effort = "high"
+        model_cfg.api_type = "chat_completions"  # Force Chat Completions API for this test
         
         # Mock OpenAI client to avoid actual API calls
         with unittest.mock.patch('openai.OpenAI'):
@@ -140,8 +141,8 @@ def test_openai_llm_uses_reasoning_effort(self):
         # Verify the reasoning_effort is stored
         self.assertEqual(llm.reasoning_effort, "high")
 
-    def test_reasoning_effort_passed_to_api_params(self):
-        """Test that reasoning_effort is included in API call parameters"""
+    def test_reasoning_effort_passed_to_api_params_chat_completions(self):
+        """Test that reasoning_effort is included in API call parameters (Chat Completions)"""
         model_cfg = Mock()
         model_cfg.name = "gpt-oss-120b" 
         model_cfg.system_message = "system"
@@ -155,6 +156,7 @@ def test_reasoning_effort_passed_to_api_params(self):
         model_cfg.api_key = "test-key"
         model_cfg.random_seed = None
         model_cfg.reasoning_effort = "medium"
+        model_cfg.api_type = "chat_completions"  # Force Chat Completions API for this test
         
         with unittest.mock.patch('openai.OpenAI'):
             llm = OpenAILLM(model_cfg)
@@ -178,6 +180,51 @@ def test_reasoning_effort_passed_to_api_params(self):
             # Verify the API was called with reasoning_effort
             llm.client.chat.completions.create.assert_called_once_with(**test_params)
 
+    def test_reasoning_effort_passed_to_responses_api(self):
+        """Test that reasoning_effort is converted to nested format for Responses API"""
+        model_cfg = Mock()
+        model_cfg.name = "gpt-oss-120b" 
+        model_cfg.system_message = "system"
+        model_cfg.temperature = 0.7
+        model_cfg.top_p = 0.95
+        model_cfg.max_tokens = 4096
+        model_cfg.timeout = 60
+        model_cfg.retries = 3
+        model_cfg.retry_delay = 5
+        model_cfg.api_base = "https://api.openai.com/v1"
+        model_cfg.api_key = "test-key"
+        model_cfg.random_seed = None
+        model_cfg.reasoning_effort = "medium"
+        model_cfg.api_type = "responses"  # Force Responses API for this test
+        
+        with unittest.mock.patch('openai.OpenAI'):
+            llm = OpenAILLM(model_cfg)
+            
+            # Test the _call_api method directly with mocked client
+            mock_response = Mock()
+            mock_response.output_text = "Test response"
+            llm.client.responses.create.return_value = mock_response
+            
+            # Input params in Chat Completions format
+            test_params = {
+                "model": "gpt-oss-120b",
+                "messages": [{"role": "system", "content": "Test"}, {"role": "user", "content": "Test user"}],
+                "max_completion_tokens": 4096,
+                "reasoning_effort": "medium"
+            }
+            
+            result = asyncio.run(llm._call_api(test_params))
+            
+            # Verify the Responses API was called with nested reasoning format
+            llm.client.responses.create.assert_called_once()
+            call_args = llm.client.responses.create.call_args
+            self.assertEqual(call_args.kwargs["model"], "gpt-oss-120b")
+            self.assertEqual(call_args.kwargs["instructions"], "Test")
+            self.assertEqual(call_args.kwargs["input"], [{"role": "user", "content": "Test user"}])
+            self.assertEqual(call_args.kwargs["reasoning"], {"effort": "medium"})
+            self.assertEqual(call_args.kwargs["max_output_tokens"], 4096)
+            self.assertFalse(call_args.kwargs["store"])
+
     def test_yaml_file_loading_with_reasoning_effort(self):
         """Test loading reasoning_effort from actual YAML file"""
         yaml_content = """
diff --git a/tests/test_responses_api.py b/tests/test_responses_api.py
new file mode 100644
index 000000000..c1bf12843
--- /dev/null
+++ b/tests/test_responses_api.py
@@ -0,0 +1,363 @@
+"""
+Tests for OpenAI Responses API migration
+
+This module tests the Responses API integration, including:
+- Parameter conversion from Chat Completions format to Responses API format
+- API selection logic based on endpoint and api_type config
+- Response parsing differences between the two APIs
+"""
+
+import unittest
+import asyncio
+from unittest.mock import Mock, patch
+
+
+class TestResponsesAPIParameterConversion(unittest.TestCase):
+    """Test that Chat Completions parameters are correctly converted to Responses API format"""
+
+    def setUp(self):
+        """Set up test fixtures"""
+        from openevolve.llm.openai import OpenAILLM
+        
+        self.model_cfg = Mock()
+        self.model_cfg.name = "gpt-4o"
+        self.model_cfg.system_message = "You are a helpful assistant"
+        self.model_cfg.temperature = 0.7
+        self.model_cfg.top_p = 0.95
+        self.model_cfg.max_tokens = 4096
+        self.model_cfg.timeout = 60
+        self.model_cfg.retries = 3
+        self.model_cfg.retry_delay = 5
+        self.model_cfg.api_base = "https://api.openai.com/v1"
+        self.model_cfg.api_key = "test-key"
+        self.model_cfg.random_seed = None
+        self.model_cfg.reasoning_effort = None
+        self.model_cfg.api_type = "responses"  # Force Responses API
+
+    def test_messages_to_input_conversion(self):
+        """Test that messages array is converted to input parameter"""
+        with patch('openai.OpenAI'):
+            from openevolve.llm.openai import OpenAILLM
+            llm = OpenAILLM(self.model_cfg)
+            
+            mock_response = Mock()
+            mock_response.output_text = "Test response"
+            llm.client.responses.create.return_value = mock_response
+            
+            chat_params = {
+                "model": "gpt-4o",
+                "messages": [
+                    {"role": "system", "content": "Be helpful"},
+                    {"role": "user", "content": "Hello"},
+                    {"role": "assistant", "content": "Hi there!"},
+                    {"role": "user", "content": "How are you?"}
+                ],
+                "temperature": 0.7,
+                "max_tokens": 100
+            }
+            
+            asyncio.run(llm._call_api(chat_params))
+            
+            call_args = llm.client.responses.create.call_args.kwargs
+            
+            # System message should become instructions
+            self.assertEqual(call_args["instructions"], "Be helpful")
+            
+            # Other messages should be in input array (excluding system)
+            expected_input = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi there!"},
+                {"role": "user", "content": "How are you?"}
+            ]
+            self.assertEqual(call_args["input"], expected_input)
+
+    def test_max_tokens_conversion(self):
+        """Test that max_tokens is converted to max_output_tokens"""
+        with patch('openai.OpenAI'):
+            from openevolve.llm.openai import OpenAILLM
+            llm = OpenAILLM(self.model_cfg)
+            
+            mock_response = Mock()
+            mock_response.output_text = "Test"
+            llm.client.responses.create.return_value = mock_response
+            
+            # Test with max_tokens
+            chat_params = {
+                "model": "gpt-4o",
+                "messages": [{"role": "user", "content": "Hi"}],
+                "max_tokens": 500
+            }
+            
+            asyncio.run(llm._call_api(chat_params))
+            call_args = llm.client.responses.create.call_args.kwargs
+            self.assertEqual(call_args["max_output_tokens"], 500)
+
+    def test_max_completion_tokens_conversion(self):
+        """Test that max_completion_tokens takes precedence over max_tokens"""
+        with patch('openai.OpenAI'):
+            from openevolve.llm.openai import OpenAILLM
+            llm = OpenAILLM(self.model_cfg)
+            
+            mock_response = Mock()
+            mock_response.output_text = "Test"
+            llm.client.responses.create.return_value = mock_response
+            
+            # Test with max_completion_tokens (should take precedence)
+            chat_params = {
+                "model": "gpt-4o",
+                "messages": [{"role": "user", "content": "Hi"}],
+                "max_tokens": 500,
+                "max_completion_tokens": 1000
+            }
+            
+            asyncio.run(llm._call_api(chat_params))
+            call_args = llm.client.responses.create.call_args.kwargs
+            self.assertEqual(call_args["max_output_tokens"], 1000)
+
+    def test_reasoning_effort_nested_format(self):
+        """Test that reasoning_effort is converted to nested reasoning object"""
+        with patch('openai.OpenAI'):
+            from openevolve.llm.openai import OpenAILLM
+            llm = OpenAILLM(self.model_cfg)
+            
+            mock_response = Mock()
+            mock_response.output_text = "Test"
+            llm.client.responses.create.return_value = mock_response
+            
+            chat_params = {
+                "model": "o3-mini",
+                "messages": [{"role": "user", "content": "Think hard"}],
+                "reasoning_effort": "high"
+            }
+            
+            asyncio.run(llm._call_api(chat_params))
+            call_args = llm.client.responses.create.call_args.kwargs
+            self.assertEqual(call_args["reasoning"], {"effort": "high"})
+
+    def test_store_disabled(self):
+        """Test that store is set to False for OpenEvolve use case"""
+        with patch('openai.OpenAI'):
+            from openevolve.llm.openai import OpenAILLM
+            llm = OpenAILLM(self.model_cfg)
+            
+            mock_response = Mock()
+            mock_response.output_text = "Test"
+            llm.client.responses.create.return_value = mock_response
+            
+            chat_params = {
+                "model": "gpt-4o",
+                "messages": [{"role": "user", "content": "Hi"}]
+            }
+            
+            asyncio.run(llm._call_api(chat_params))
+            call_args = llm.client.responses.create.call_args.kwargs
+            self.assertFalse(call_args["store"])
+
+    def test_sampling_params_preserved(self):
+        """Test that temperature, top_p, and seed are preserved"""
+        with patch('openai.OpenAI'):
+            from openevolve.llm.openai import OpenAILLM
+            llm = OpenAILLM(self.model_cfg)
+            
+            mock_response = Mock()
+            mock_response.output_text = "Test"
+            llm.client.responses.create.return_value = mock_response
+            
+            chat_params = {
+                "model": "gpt-4o",
+                "messages": [{"role": "user", "content": "Hi"}],
+                "temperature": 0.5,
+                "top_p": 0.9,
+                "seed": 42
+            }
+            
+            asyncio.run(llm._call_api(chat_params))
+            call_args = llm.client.responses.create.call_args.kwargs
+            self.assertEqual(call_args["temperature"], 0.5)
+            self.assertEqual(call_args["top_p"], 0.9)
+            self.assertEqual(call_args["seed"], 42)
+
+
+class TestAPISelectionInOpenAILLM(unittest.TestCase):
+    """Test the API selection logic in the OpenAILLM class"""
+
+    def _create_model_cfg(self, api_base, api_type="auto"):
+        """Helper to create a mock model config"""
+        model_cfg = Mock()
+        model_cfg.name = "gpt-4o"
+        model_cfg.system_message = "test"
+        model_cfg.temperature = 0.7
+        model_cfg.top_p = 0.95
+        model_cfg.max_tokens = 4096
+        model_cfg.timeout = 60
+        model_cfg.retries = 3
+        model_cfg.retry_delay = 5
+        model_cfg.api_base = api_base
+        model_cfg.api_key = "test-key"
+        model_cfg.random_seed = None
+        model_cfg.reasoning_effort = None
+        model_cfg.api_type = api_type
+        return model_cfg
+
+    def test_openai_endpoint_uses_responses_api(self):
+        """Test that OpenAI endpoints use Responses API by default"""
+        with patch('openai.OpenAI'):
+            from openevolve.llm.openai import OpenAILLM
+            
+            llm = OpenAILLM(self._create_model_cfg("https://api.openai.com/v1"))
+            self.assertTrue(llm.use_responses_api)
+            
+            llm = OpenAILLM(self._create_model_cfg("https://eu.api.openai.com/v1"))
+            self.assertTrue(llm.use_responses_api)
+            
+            llm = OpenAILLM(self._create_model_cfg("https://apac.api.openai.com/v1"))
+            self.assertTrue(llm.use_responses_api)
+
+    def test_non_openai_endpoint_uses_chat_completions(self):
+        """Test that non-OpenAI endpoints use Chat Completions API"""
+        with patch('openai.OpenAI'):
+            from openevolve.llm.openai import OpenAILLM
+            
+            llm = OpenAILLM(self._create_model_cfg("https://openrouter.ai/api/v1"))
+            self.assertFalse(llm.use_responses_api)
+            
+            llm = OpenAILLM(self._create_model_cfg("http://localhost:8000/v1"))
+            self.assertFalse(llm.use_responses_api)
+            
+            llm = OpenAILLM(self._create_model_cfg("https://generativelanguage.googleapis.com/v1beta/openai/"))
+            self.assertFalse(llm.use_responses_api)
+
+    def test_api_type_override_forces_responses(self):
+        """Test that api_type='responses' forces Responses API"""
+        with patch('openai.OpenAI'):
+            from openevolve.llm.openai import OpenAILLM
+            
+            # Non-OpenAI endpoint with responses override
+            llm = OpenAILLM(self._create_model_cfg("http://localhost:8000/v1", api_type="responses"))
+            self.assertTrue(llm.use_responses_api)
+
+    def test_api_type_override_forces_chat_completions(self):
+        """Test that api_type='chat_completions' forces Chat Completions API"""
+        with patch('openai.OpenAI'):
+            from openevolve.llm.openai import OpenAILLM
+            
+            # OpenAI endpoint with chat_completions override
+            llm = OpenAILLM(self._create_model_cfg("https://api.openai.com/v1", api_type="chat_completions"))
+            self.assertFalse(llm.use_responses_api)
+
+
+class TestResponsesAPIResponseParsing(unittest.TestCase):
+    """Test that responses from both APIs are correctly parsed"""
+
+    def _create_model_cfg(self, api_type):
+        """Helper to create a mock model config"""
+        model_cfg = Mock()
+        model_cfg.name = "gpt-4o"
+        model_cfg.system_message = "test"
+        model_cfg.temperature = 0.7
+        model_cfg.top_p = 0.95
+        model_cfg.max_tokens = 4096
+        model_cfg.timeout = 60
+        model_cfg.retries = 3
+        model_cfg.retry_delay = 5
+        model_cfg.api_base = "https://api.openai.com/v1"
+        model_cfg.api_key = "test-key"
+        model_cfg.random_seed = None
+        model_cfg.reasoning_effort = None
+        model_cfg.api_type = api_type
+        return model_cfg
+
+    def test_responses_api_output_text(self):
+        """Test that Responses API response.output_text is returned"""
+        with patch('openai.OpenAI'):
+            from openevolve.llm.openai import OpenAILLM
+            llm = OpenAILLM(self._create_model_cfg("responses"))
+            
+            mock_response = Mock()
+            mock_response.output_text = "This is from Responses API"
+            llm.client.responses.create.return_value = mock_response
+            
+            result = asyncio.run(llm._call_api({
+                "model": "gpt-4o",
+                "messages": [{"role": "user", "content": "Hi"}]
+            }))
+            
+            self.assertEqual(result, "This is from Responses API")
+
+    def test_chat_completions_message_content(self):
+        """Test that Chat Completions response.choices[0].message.content is returned"""
+        with patch('openai.OpenAI'):
+            from openevolve.llm.openai import OpenAILLM
+            llm = OpenAILLM(self._create_model_cfg("chat_completions"))
+            
+            mock_response = Mock()
+            mock_response.choices = [Mock()]
+            mock_response.choices[0].message.content = "This is from Chat Completions"
+            llm.client.chat.completions.create.return_value = mock_response
+            
+            result = asyncio.run(llm._call_api({
+                "model": "gpt-4o",
+                "messages": [{"role": "user", "content": "Hi"}]
+            }))
+            
+            self.assertEqual(result, "This is from Chat Completions")
+
+
+class TestConfigWithAPIType(unittest.TestCase):
+    """Test that api_type config option works correctly"""
+
+    def test_api_type_default_is_none_for_model(self):
+        """Test that api_type defaults to None in LLMModelConfig (inherits from parent)"""
+        from openevolve.config import LLMModelConfig
+        
+        config = LLMModelConfig()
+        self.assertIsNone(config.api_type)
+
+    def test_api_type_default_is_auto_for_llm(self):
+        """Test that api_type defaults to 'auto' in LLMConfig"""
+        from openevolve.config import LLMConfig
+        
+        config = LLMConfig()
+        self.assertEqual(config.api_type, "auto")
+
+    def test_api_type_in_shared_config(self):
+        """Test that api_type is propagated to models via shared config"""
+        from openevolve.config import Config
+        
+        yaml_config = {
+            "llm": {
+                "api_base": "https://api.openai.com/v1",
+                "api_key": "test-key",
+                "api_type": "chat_completions",  # Force chat completions at LLM level
+                "models": [{"name": "gpt-4o", "weight": 1.0}]
+            }
+        }
+        
+        config = Config.from_dict(yaml_config)
+        
+        # Model should inherit api_type from LLM config
+        self.assertEqual(config.llm.models[0].api_type, "chat_completions")
+
+    def test_api_type_model_override(self):
+        """Test that model-level api_type overrides LLM-level"""
+        from openevolve.config import Config
+        
+        yaml_config = {
+            "llm": {
+                "api_base": "https://api.openai.com/v1",
+                "api_key": "test-key",
+                "api_type": "chat_completions",
+                "models": [
+                    {"name": "gpt-4o", "weight": 1.0, "api_type": "responses"}  # Override
+                ]
+            }
+        }
+        
+        config = Config.from_dict(yaml_config)
+        
+        # Model-level override should take precedence
+        self.assertEqual(config.llm.models[0].api_type, "responses")
+
+
+if __name__ == "__main__":
+    unittest.main()

From d0960c6f795888b51ca6c23030a94b77b83f6864 Mon Sep 17 00:00:00 2001
From: Vamshi Balanaga <vamshi@partcl.com>
Date: Thu, 11 Dec 2025 21:03:03 -0800
Subject: [PATCH 2/2] Add new fields to default config

---
 configs/default_config.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/configs/default_config.yaml b/configs/default_config.yaml
index 928465bf5..7fcc5c0ed 100644
--- a/configs/default_config.yaml
+++ b/configs/default_config.yaml
@@ -41,11 +41,13 @@ llm:
   api_key: null                       # API key (defaults to OPENAI_API_KEY env variable)
   # or use ${VAR} syntax to specify which environment variable to read from:
   # api_key: ${GEMINI_API_KEY}        # Reads API key from $GEMINI_API_KEY
+  api_type: "auto"                    # API type: "auto, "responses" or "chat_completions"
 
   # Generation parameters
   temperature: 0.7                    # Temperature for generation (higher = more creative)
   top_p: 0.95                         # Top-p sampling parameter
   max_tokens: 4096                    # Maximum tokens to generate
+  reasoning_effort: "medium"          # Reasoning effort: "low", "medium", "high", "xhigh"
 
   # Request parameters
   timeout: 60                         # Timeout for API requests in seconds