From 177aaf583833f9ed04bca2373367c79c95b678a6 Mon Sep 17 00:00:00 2001 From: Kunal Karmakar Date: Tue, 31 Mar 2026 16:28:01 +0000 Subject: [PATCH 1/2] Support azure openai responses --- .../crewai/llms/providers/azure/completion.py | 183 +++++++- .../tests/llms/azure/test_azure_responses.py | 407 ++++++++++++++++++ 2 files changed, 574 insertions(+), 16 deletions(-) create mode 100644 lib/crewai/tests/llms/azure/test_azure_responses.py diff --git a/lib/crewai/src/crewai/llms/providers/azure/completion.py b/lib/crewai/src/crewai/llms/providers/azure/completion.py index cac811bc7e1..dc278032798 100644 --- a/lib/crewai/src/crewai/llms/providers/azure/completion.py +++ b/lib/crewai/src/crewai/llms/providers/azure/completion.py @@ -89,8 +89,22 @@ class AzureCompletion(BaseLLM): is_openai_model: bool = False is_azure_openai_endpoint: bool = False + # Responses API settings + api: Literal["completions", "responses"] = "completions" + reasoning_effort: str | None = None + instructions: str | None = None + store: bool | None = None + previous_response_id: str | None = None + include: list[str] | None = None + builtin_tools: list[str] | None = None + parse_tool_outputs: bool = False + auto_chain: bool = False + auto_chain_reasoning: bool = False + max_completion_tokens: int | None = None + _client: Any = PrivateAttr(default=None) _async_client: Any = PrivateAttr(default=None) + _responses_delegate: Any = PrivateAttr(default=None) @model_validator(mode="before") @classmethod @@ -143,17 +157,95 @@ def _normalize_azure_fields(cls, data: Any) -> Any: def _init_clients(self) -> AzureCompletion: if not self.api_key: raise ValueError("Azure API key is required.") - client_kwargs: dict[str, Any] = { - "endpoint": self.endpoint, - "credential": AzureKeyCredential(self.api_key), - } - if self.api_version: - client_kwargs["api_version"] = self.api_version - self._client = ChatCompletionsClient(**client_kwargs) - self._async_client = AsyncChatCompletionsClient(**client_kwargs) + if self.api == "responses": + self._init_responses_delegate() + else: + client_kwargs: dict[str, Any] = { + "endpoint": self.endpoint, + "credential": AzureKeyCredential(self.api_key), + } + if self.api_version: + client_kwargs["api_version"] = self.api_version + + self._client = ChatCompletionsClient(**client_kwargs) + self._async_client = AsyncChatCompletionsClient(**client_kwargs) return self + def _init_responses_delegate(self) -> None: + """Create an OpenAICompletion delegate for the Azure OpenAI Responses API. + + The Azure OpenAI Responses API uses the standard OpenAI Python SDK + with a base_url pointing to the Azure resource's /openai/v1/ endpoint. + """ + from crewai.llms.providers.openai.completion import OpenAICompletion + + base_url = self._get_responses_base_url() + + delegate_kwargs: dict[str, Any] = { + "model": self.model, + "api_key": self.api_key, + "base_url": base_url, + "api": "responses", + "provider": "openai", + "stream": self.stream, + } + + if self.temperature is not None: + delegate_kwargs["temperature"] = self.temperature + if self.top_p is not None: + delegate_kwargs["top_p"] = self.top_p + if self.max_tokens is not None: + delegate_kwargs["max_tokens"] = self.max_tokens + if self.max_completion_tokens is not None: + delegate_kwargs["max_completion_tokens"] = self.max_completion_tokens + if self.stop: + delegate_kwargs["stop"] = self.stop + if self.timeout is not None: + delegate_kwargs["timeout"] = self.timeout + if self.max_retries != 2: + delegate_kwargs["max_retries"] = self.max_retries + if self.reasoning_effort is not None: + delegate_kwargs["reasoning_effort"] = self.reasoning_effort + if self.instructions is not None: + delegate_kwargs["instructions"] = self.instructions + if self.store is not None: + delegate_kwargs["store"] = self.store + if self.previous_response_id is not None: + delegate_kwargs["previous_response_id"] = self.previous_response_id + if self.include is not None: + delegate_kwargs["include"] = self.include + if self.builtin_tools is not None: + delegate_kwargs["builtin_tools"] = self.builtin_tools + if self.parse_tool_outputs: + delegate_kwargs["parse_tool_outputs"] = self.parse_tool_outputs + if self.auto_chain: + delegate_kwargs["auto_chain"] = self.auto_chain + if self.auto_chain_reasoning: + delegate_kwargs["auto_chain_reasoning"] = self.auto_chain_reasoning + if self.response_format is not None: + delegate_kwargs["response_format"] = self.response_format + if self.additional_params: + delegate_kwargs["additional_params"] = self.additional_params + + self._responses_delegate = OpenAICompletion(**delegate_kwargs) + + def _get_responses_base_url(self) -> str: + """Construct the base URL for the Azure OpenAI Responses API. + + Extracts the scheme and host from the configured endpoint and appends + the ``/openai/v1/`` path required by the Azure OpenAI Responses API. + + Returns: + The Responses API base URL, e.g. + ``https://myresource.openai.azure.com/openai/v1/`` + """ + if not self.endpoint: + raise ValueError("Azure endpoint is required for Responses API") + parsed = urlparse(self.endpoint) + base = f"{parsed.scheme}://{parsed.netloc}" + return f"{base}/openai/v1/" + def to_config_dict(self) -> dict[str, Any]: """Extend base config with Azure-specific fields.""" config = super().to_config_dict() @@ -173,6 +265,16 @@ def to_config_dict(self) -> dict[str, Any]: config["presence_penalty"] = self.presence_penalty if self.max_tokens is not None: config["max_tokens"] = self.max_tokens + if self.api != "completions": + config["api"] = self.api + if self.reasoning_effort is not None: + config["reasoning_effort"] = self.reasoning_effort + if self.instructions is not None: + config["instructions"] = self.instructions + if self.store is not None: + config["store"] = self.store + if self.max_completion_tokens is not None: + config["max_completion_tokens"] = self.max_completion_tokens return config @staticmethod @@ -278,10 +380,10 @@ def call( from_agent: Any | None = None, response_model: type[BaseModel] | None = None, ) -> str | Any: - """Call Azure AI Inference chat completions API. + """Call Azure AI Inference API (Chat Completions or Responses). Args: - messages: Input messages for the chat completion + messages: Input messages for the completion tools: List of tool/function definitions callbacks: Callback functions (not used in native implementation) available_functions: Available functions for tool calling @@ -290,8 +392,19 @@ def call( response_model: Response model Returns: - Chat completion response or tool call result + Completion response or tool call result """ + if self.api == "responses": + return self._responses_delegate.call( + messages=messages, + tools=tools, + callbacks=callbacks, + available_functions=available_functions, + from_task=from_task, + from_agent=from_agent, + response_model=response_model, + ) + with llm_call_context(): try: # Emit call started event @@ -350,10 +463,10 @@ async def acall( # type: ignore[return] from_agent: Any | None = None, response_model: type[BaseModel] | None = None, ) -> str | Any: - """Call Azure AI Inference chat completions API asynchronously. + """Call Azure AI Inference API asynchronously (Chat Completions or Responses). Args: - messages: Input messages for the chat completion + messages: Input messages for the completion tools: List of tool/function definitions callbacks: Callback functions (not used in native implementation) available_functions: Available functions for tool calling @@ -362,8 +475,19 @@ async def acall( # type: ignore[return] response_model: Pydantic model for structured output Returns: - Chat completion response or tool call result + Completion response or tool call result """ + if self.api == "responses": + return await self._responses_delegate.acall( + messages=messages, + tools=tools, + callbacks=callbacks, + available_functions=available_functions, + from_task=from_task, + from_agent=from_agent, + response_model=response_model, + ) + with llm_call_context(): try: self._emit_call_started_event( @@ -1013,7 +1137,8 @@ async def _ahandle_streaming_completion( def supports_function_calling(self) -> bool: """Check if the model supports function calling.""" - # Azure OpenAI models support function calling + if self.api == "responses": + return True return self.is_openai_model def supports_stop_words(self) -> bool: @@ -1023,6 +1148,8 @@ def supports_stop_words(self) -> bool: computer-use-preview) do not support stop sequences. See: https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-models/concepts/models-sold-directly-by-azure """ + if self.api == "responses": + return False model_lower = self.model.lower() if self.model else "" if "gpt-5" in model_lower: @@ -1099,13 +1226,37 @@ def _extract_azure_token_usage(response: ChatCompletions) -> dict[str, Any]: return result return {"total_tokens": 0} + @property + def last_response_id(self) -> str | None: + """Get the last response ID from Responses API auto-chaining.""" + if self._responses_delegate is not None: + return self._responses_delegate.last_response_id + return None + + @property + def last_reasoning_items(self) -> list[Any] | None: + """Get the last reasoning items from Responses API auto-chain reasoning.""" + if self._responses_delegate is not None: + return self._responses_delegate.last_reasoning_items + return None + + def reset_chain(self) -> None: + """Reset the Responses API auto-chain state.""" + if self._responses_delegate is not None: + self._responses_delegate.reset_chain() + + def reset_reasoning_chain(self) -> None: + """Reset the Responses API reasoning chain state.""" + if self._responses_delegate is not None: + self._responses_delegate.reset_reasoning_chain() + async def aclose(self) -> None: """Close the async client and clean up resources. This ensures proper cleanup of the underlying aiohttp session to avoid unclosed connector warnings. """ - if hasattr(self._async_client, "close"): + if self._async_client and hasattr(self._async_client, "close"): await self._async_client.close() async def __aenter__(self) -> Self: diff --git a/lib/crewai/tests/llms/azure/test_azure_responses.py b/lib/crewai/tests/llms/azure/test_azure_responses.py new file mode 100644 index 00000000000..207dd5de68d --- /dev/null +++ b/lib/crewai/tests/llms/azure/test_azure_responses.py @@ -0,0 +1,407 @@ +"""Tests for Azure OpenAI Responses API support. + +Verifies that AzureCompletion with api='responses' correctly delegates +to OpenAICompletion configured with the Azure OpenAI /openai/v1/ base URL. +""" + +import os +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture +def azure_env(): + """Set Azure environment variables for tests.""" + with patch.dict(os.environ, { + "AZURE_API_KEY": "test-azure-key", + "AZURE_ENDPOINT": "https://myresource.openai.azure.com", + }): + yield + + +@pytest.fixture +def mock_openai_completion(): + """Mock OpenAICompletion to avoid real client creation. + + Patches at the source module so that the dynamic import inside + _init_responses_delegate picks up the mock. + """ + instance = MagicMock() + instance.call = MagicMock(return_value="responses-result") + instance.acall = AsyncMock(return_value="async-responses-result") + instance.last_response_id = "resp_abc123" + instance.last_reasoning_items = [{"type": "reasoning"}] + instance.reset_chain = MagicMock() + instance.reset_reasoning_chain = MagicMock() + MockCls = MagicMock(return_value=instance) + + with patch( + "crewai.llms.providers.openai.completion.OpenAICompletion", + MockCls, + ): + yield MockCls, instance + + +# --------------------------------------------------------------------------- +# Helper to build AzureCompletion with api="responses" while mocking imports +# --------------------------------------------------------------------------- + +def _create_azure_responses(**overrides): + """Create an AzureCompletion(api='responses'). + + Must be called inside a context where OpenAICompletion is already mocked + (i.e. via the ``mock_openai_completion`` fixture). + """ + from crewai.llms.providers.azure.completion import AzureCompletion + + defaults = { + "model": "gpt-4o", + "api_key": "test-azure-key", + "endpoint": "https://myresource.openai.azure.com", + "api": "responses", + } + defaults.update(overrides) + return AzureCompletion(**defaults) + + +# --------------------------------------------------------------------------- +# Initialization tests +# --------------------------------------------------------------------------- + +class TestAzureResponsesInit: + """Test initialization with api='responses'.""" + + def test_default_api_is_completions(self): + """Default api should be 'completions' (existing behaviour).""" + from crewai.llms.providers.azure.completion import AzureCompletion + + comp = AzureCompletion( + model="gpt-4o", + api_key="key", + endpoint="https://res.openai.azure.com", + ) + assert comp.api == "completions" + assert comp._responses_delegate is None + + def test_responses_api_creates_delegate(self, mock_openai_completion): + MockCls, instance = mock_openai_completion + comp = _create_azure_responses() + + assert comp.api == "responses" + assert comp._responses_delegate is instance + MockCls.assert_called_once() + + def test_completions_clients_not_created_in_responses_mode( + self, mock_openai_completion + ): + """When api='responses', azure-ai-inference clients should not be created.""" + MockCls, _ = mock_openai_completion + comp = _create_azure_responses() + + assert comp._client is None + assert comp._async_client is None + + def test_responses_base_url_from_base_endpoint(self, mock_openai_completion): + MockCls, _ = mock_openai_completion + comp = _create_azure_responses( + endpoint="https://myresource.openai.azure.com", + ) + call_kwargs = MockCls.call_args[1] + assert call_kwargs["base_url"] == "https://myresource.openai.azure.com/openai/v1/" + + def test_responses_base_url_strips_deployment_path(self, mock_openai_completion): + """Endpoint with /openai/deployments/... should still produce correct base_url.""" + MockCls, _ = mock_openai_completion + comp = _create_azure_responses( + endpoint="https://myresource.openai.azure.com/openai/deployments/gpt-4o", + ) + call_kwargs = MockCls.call_args[1] + assert call_kwargs["base_url"] == "https://myresource.openai.azure.com/openai/v1/" + + def test_responses_base_url_preserves_port(self, mock_openai_completion): + MockCls, _ = mock_openai_completion + comp = _create_azure_responses( + endpoint="https://myresource.openai.azure.com:8443/openai/deployments/gpt-4o", + ) + call_kwargs = MockCls.call_args[1] + assert call_kwargs["base_url"] == "https://myresource.openai.azure.com:8443/openai/v1/" + + def test_delegate_receives_model_and_api_key(self, mock_openai_completion): + MockCls, _ = mock_openai_completion + comp = _create_azure_responses( + model="gpt-4o", + api_key="my-key", + ) + call_kwargs = MockCls.call_args[1] + assert call_kwargs["model"] == "gpt-4o" + assert call_kwargs["api_key"] == "my-key" + assert call_kwargs["api"] == "responses" + assert call_kwargs["provider"] == "openai" + + def test_delegate_receives_optional_params(self, mock_openai_completion): + MockCls, _ = mock_openai_completion + comp = _create_azure_responses( + temperature=0.5, + top_p=0.9, + max_tokens=1000, + max_completion_tokens=800, + reasoning_effort="medium", + instructions="Be helpful", + store=True, + previous_response_id="resp_prev", + include=["reasoning.encrypted_content"], + builtin_tools=["web_search"], + parse_tool_outputs=True, + auto_chain=True, + auto_chain_reasoning=True, + stream=True, + ) + call_kwargs = MockCls.call_args[1] + assert call_kwargs["temperature"] == 0.5 + assert call_kwargs["top_p"] == 0.9 + assert call_kwargs["max_tokens"] == 1000 + assert call_kwargs["max_completion_tokens"] == 800 + assert call_kwargs["reasoning_effort"] == "medium" + assert call_kwargs["instructions"] == "Be helpful" + assert call_kwargs["store"] is True + assert call_kwargs["previous_response_id"] == "resp_prev" + assert call_kwargs["include"] == ["reasoning.encrypted_content"] + assert call_kwargs["builtin_tools"] == ["web_search"] + assert call_kwargs["parse_tool_outputs"] is True + assert call_kwargs["auto_chain"] is True + assert call_kwargs["auto_chain_reasoning"] is True + assert call_kwargs["stream"] is True + + def test_delegate_omits_unset_optional_params(self, mock_openai_completion): + """Params left at defaults should not be passed to the delegate.""" + MockCls, _ = mock_openai_completion + comp = _create_azure_responses() + call_kwargs = MockCls.call_args[1] + # These should NOT be in kwargs because they were not set + assert "temperature" not in call_kwargs + assert "reasoning_effort" not in call_kwargs + assert "instructions" not in call_kwargs + assert "store" not in call_kwargs + assert "max_completion_tokens" not in call_kwargs + + +# --------------------------------------------------------------------------- +# Call delegation tests +# --------------------------------------------------------------------------- + +class TestAzureResponsesCall: + """Test call / acall delegation to the Responses API.""" + + def test_call_delegates_to_responses(self, mock_openai_completion): + MockCls, instance = mock_openai_completion + comp = _create_azure_responses() + + messages = [{"role": "user", "content": "Hello"}] + result = comp.call(messages=messages, from_task="task1", from_agent="agent1") + + assert result == "responses-result" + instance.call.assert_called_once_with( + messages=messages, + tools=None, + callbacks=None, + available_functions=None, + from_task="task1", + from_agent="agent1", + response_model=None, + ) + + @pytest.mark.asyncio + async def test_acall_delegates_to_responses(self, mock_openai_completion): + MockCls, instance = mock_openai_completion + comp = _create_azure_responses() + + messages = [{"role": "user", "content": "Hello"}] + result = await comp.acall(messages=messages) + + assert result == "async-responses-result" + instance.acall.assert_called_once() + + def test_call_with_tools_delegates(self, mock_openai_completion): + MockCls, instance = mock_openai_completion + comp = _create_azure_responses() + + tools = [{"type": "function", "function": {"name": "test"}}] + available_fns = {"test": lambda: "ok"} + comp.call( + messages="Hello", + tools=tools, + available_functions=available_fns, + ) + + call_kwargs = instance.call.call_args[1] + assert call_kwargs["tools"] == tools + assert call_kwargs["available_functions"] == available_fns + + def test_completions_call_unchanged(self): + """Default api='completions' should not delegate to responses.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + comp = AzureCompletion( + model="gpt-4o", + api_key="key", + endpoint="https://res.openai.azure.com", + ) + + with patch.object(comp._client, "complete") as mock_complete: + mock_msg = MagicMock() + mock_msg.content = "completions-result" + mock_msg.tool_calls = None + mock_choice = MagicMock() + mock_choice.message = mock_msg + mock_resp = MagicMock() + mock_resp.choices = [mock_choice] + mock_resp.usage = MagicMock( + prompt_tokens=10, completion_tokens=5, total_tokens=15 + ) + mock_resp.usage.prompt_tokens_details = None + mock_complete.return_value = mock_resp + + result = comp.call(messages=[{"role": "user", "content": "Hi"}]) + assert result == "completions-result" + mock_complete.assert_called_once() + + +# --------------------------------------------------------------------------- +# Delegated property & method tests +# --------------------------------------------------------------------------- + +class TestAzureResponsesProperties: + """Test properties and methods delegated to the responses delegate.""" + + def test_last_response_id(self, mock_openai_completion): + MockCls, _ = mock_openai_completion + comp = _create_azure_responses() + assert comp.last_response_id == "resp_abc123" + + def test_last_response_id_none_for_completions(self): + from crewai.llms.providers.azure.completion import AzureCompletion + + comp = AzureCompletion( + model="gpt-4o", api_key="key", + endpoint="https://res.openai.azure.com", + ) + assert comp.last_response_id is None + + def test_last_reasoning_items(self, mock_openai_completion): + MockCls, _ = mock_openai_completion + comp = _create_azure_responses() + assert comp.last_reasoning_items == [{"type": "reasoning"}] + + def test_reset_chain(self, mock_openai_completion): + MockCls, instance = mock_openai_completion + comp = _create_azure_responses() + comp.reset_chain() + instance.reset_chain.assert_called_once() + + def test_reset_reasoning_chain(self, mock_openai_completion): + MockCls, instance = mock_openai_completion + comp = _create_azure_responses() + comp.reset_reasoning_chain() + instance.reset_reasoning_chain.assert_called_once() + + def test_reset_chain_noop_for_completions(self): + """reset_chain should not raise when delegate is None.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + comp = AzureCompletion( + model="gpt-4o", api_key="key", + endpoint="https://res.openai.azure.com", + ) + comp.reset_chain() # should not raise + + +# --------------------------------------------------------------------------- +# Feature-support method tests +# --------------------------------------------------------------------------- + +class TestAzureResponsesFeatures: + """Test supports_* and config methods.""" + + def test_supports_function_calling_responses(self, mock_openai_completion): + MockCls, _ = mock_openai_completion + comp = _create_azure_responses() + assert comp.supports_function_calling() is True + + def test_supports_function_calling_completions_openai_model(self): + from crewai.llms.providers.azure.completion import AzureCompletion + + comp = AzureCompletion( + model="gpt-4o", api_key="key", + endpoint="https://res.openai.azure.com", + ) + assert comp.supports_function_calling() is True + + def test_supports_stop_words_false_for_responses(self, mock_openai_completion): + MockCls, _ = mock_openai_completion + comp = _create_azure_responses() + assert comp.supports_stop_words() is False + + def test_supports_stop_words_true_for_completions_gpt4(self): + from crewai.llms.providers.azure.completion import AzureCompletion + + comp = AzureCompletion( + model="gpt-4o", api_key="key", + endpoint="https://res.openai.azure.com", + ) + assert comp.supports_stop_words() is True + + def test_to_config_dict_includes_responses_fields(self, mock_openai_completion): + MockCls, _ = mock_openai_completion + comp = _create_azure_responses( + reasoning_effort="high", + instructions="Be concise", + store=True, + max_completion_tokens=500, + ) + config = comp.to_config_dict() + assert config["api"] == "responses" + assert config["reasoning_effort"] == "high" + assert config["instructions"] == "Be concise" + assert config["store"] is True + assert config["max_completion_tokens"] == 500 + + def test_to_config_dict_omits_api_for_completions(self): + from crewai.llms.providers.azure.completion import AzureCompletion + + comp = AzureCompletion( + model="gpt-4o", api_key="key", + endpoint="https://res.openai.azure.com", + ) + config = comp.to_config_dict() + assert "api" not in config + + +# --------------------------------------------------------------------------- +# LLM factory integration test +# --------------------------------------------------------------------------- + +class TestAzureResponsesViaLLMFactory: + """Test that the LLM factory passes api='responses' through to AzureCompletion.""" + + @pytest.mark.usefixtures("azure_env") + def test_llm_factory_passes_api_kwarg(self): + """LLM(model='azure/gpt-4o', api='responses') should create AzureCompletion + with api='responses' and a delegate.""" + with patch( + "crewai.llms.providers.openai.completion.OpenAI", + ), patch( + "crewai.llms.providers.openai.completion.AsyncOpenAI", + ): + from crewai.llm import LLM + + llm = LLM(model="azure/gpt-4o", api="responses") + + from crewai.llms.providers.azure.completion import AzureCompletion + assert isinstance(llm, AzureCompletion) + assert llm.api == "responses" + assert llm._responses_delegate is not None From 48d2dfc386f1bec46538f219f0882230ada84462 Mon Sep 17 00:00:00 2001 From: Kunal Karmakar Date: Tue, 31 Mar 2026 17:16:10 +0000 Subject: [PATCH 2/2] Fix linting --- lib/crewai/src/crewai/llms/providers/azure/completion.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/crewai/src/crewai/llms/providers/azure/completion.py b/lib/crewai/src/crewai/llms/providers/azure/completion.py index dc278032798..b3419a972d9 100644 --- a/lib/crewai/src/crewai/llms/providers/azure/completion.py +++ b/lib/crewai/src/crewai/llms/providers/azure/completion.py @@ -1230,14 +1230,16 @@ def _extract_azure_token_usage(response: ChatCompletions) -> dict[str, Any]: def last_response_id(self) -> str | None: """Get the last response ID from Responses API auto-chaining.""" if self._responses_delegate is not None: - return self._responses_delegate.last_response_id + result: str | None = self._responses_delegate.last_response_id + return result return None @property def last_reasoning_items(self) -> list[Any] | None: """Get the last reasoning items from Responses API auto-chain reasoning.""" if self._responses_delegate is not None: - return self._responses_delegate.last_reasoning_items + result: list[Any] | None = self._responses_delegate.last_reasoning_items + return result return None def reset_chain(self) -> None: