crewAIInc · kunalk16 · Mar 31, 2026 · Mar 31, 2026
diff --git a/lib/crewai/src/crewai/llms/providers/azure/completion.py b/lib/crewai/src/crewai/llms/providers/azure/completion.py
@@ -89,8 +89,22 @@ class AzureCompletion(BaseLLM):
     is_openai_model: bool = False
     is_azure_openai_endpoint: bool = False
 
+    # Responses API settings
+    api: Literal["completions", "responses"] = "completions"
+    reasoning_effort: str | None = None
+    instructions: str | None = None
+    store: bool | None = None
+    previous_response_id: str | None = None
+    include: list[str] | None = None
+    builtin_tools: list[str] | None = None
+    parse_tool_outputs: bool = False
+    auto_chain: bool = False
+    auto_chain_reasoning: bool = False
+    max_completion_tokens: int | None = None
+
     _client: Any = PrivateAttr(default=None)
     _async_client: Any = PrivateAttr(default=None)
+    _responses_delegate: Any = PrivateAttr(default=None)
 
     @model_validator(mode="before")
     @classmethod
@@ -143,17 +157,95 @@ def _normalize_azure_fields(cls, data: Any) -> Any:
     def _init_clients(self) -> AzureCompletion:
         if not self.api_key:
             raise ValueError("Azure API key is required.")
-        client_kwargs: dict[str, Any] = {
-            "endpoint": self.endpoint,
-            "credential": AzureKeyCredential(self.api_key),
-        }
-        if self.api_version:
-            client_kwargs["api_version"] = self.api_version
 
-        self._client = ChatCompletionsClient(**client_kwargs)
-        self._async_client = AsyncChatCompletionsClient(**client_kwargs)
+        if self.api == "responses":
+            self._init_responses_delegate()
+        else:
+            client_kwargs: dict[str, Any] = {
+                "endpoint": self.endpoint,
+                "credential": AzureKeyCredential(self.api_key),
+            }
+            if self.api_version:
+                client_kwargs["api_version"] = self.api_version
+
+            self._client = ChatCompletionsClient(**client_kwargs)
+            self._async_client = AsyncChatCompletionsClient(**client_kwargs)
         return self
 
+    def _init_responses_delegate(self) -> None:
+        """Create an OpenAICompletion delegate for the Azure OpenAI Responses API.
+
+        The Azure OpenAI Responses API uses the standard OpenAI Python SDK
+        with a base_url pointing to the Azure resource's /openai/v1/ endpoint.
+        """
+        from crewai.llms.providers.openai.completion import OpenAICompletion
+
+        base_url = self._get_responses_base_url()
+
+        delegate_kwargs: dict[str, Any] = {
+            "model": self.model,
+            "api_key": self.api_key,
+            "base_url": base_url,
+            "api": "responses",
+            "provider": "openai",
+            "stream": self.stream,
+        }
+
+        if self.temperature is not None:
+            delegate_kwargs["temperature"] = self.temperature
+        if self.top_p is not None:
+            delegate_kwargs["top_p"] = self.top_p
+        if self.max_tokens is not None:
+            delegate_kwargs["max_tokens"] = self.max_tokens
+        if self.max_completion_tokens is not None:
+            delegate_kwargs["max_completion_tokens"] = self.max_completion_tokens
+        if self.stop:
+            delegate_kwargs["stop"] = self.stop
+        if self.timeout is not None:
+            delegate_kwargs["timeout"] = self.timeout
+        if self.max_retries != 2:
+            delegate_kwargs["max_retries"] = self.max_retries
+        if self.reasoning_effort is not None:
+            delegate_kwargs["reasoning_effort"] = self.reasoning_effort
+        if self.instructions is not None:
+            delegate_kwargs["instructions"] = self.instructions
+        if self.store is not None:
+            delegate_kwargs["store"] = self.store
+        if self.previous_response_id is not None:
+            delegate_kwargs["previous_response_id"] = self.previous_response_id
+        if self.include is not None:
+            delegate_kwargs["include"] = self.include
+        if self.builtin_tools is not None:
+            delegate_kwargs["builtin_tools"] = self.builtin_tools
+        if self.parse_tool_outputs:
+            delegate_kwargs["parse_tool_outputs"] = self.parse_tool_outputs
+        if self.auto_chain:
+            delegate_kwargs["auto_chain"] = self.auto_chain
+        if self.auto_chain_reasoning:
+            delegate_kwargs["auto_chain_reasoning"] = self.auto_chain_reasoning
+        if self.response_format is not None:
+            delegate_kwargs["response_format"] = self.response_format
+        if self.additional_params:
+            delegate_kwargs["additional_params"] = self.additional_params
+
+        self._responses_delegate = OpenAICompletion(**delegate_kwargs)
+
+    def _get_responses_base_url(self) -> str:
+        """Construct the base URL for the Azure OpenAI Responses API.
+
+        Extracts the scheme and host from the configured endpoint and appends
+        the ``/openai/v1/`` path required by the Azure OpenAI Responses API.
+
+        Returns:
+            The Responses API base URL, e.g.
+            ``https://myresource.openai.azure.com/openai/v1/``
+        """
+        if not self.endpoint:
+            raise ValueError("Azure endpoint is required for Responses API")
+        parsed = urlparse(self.endpoint)
+        base = f"{parsed.scheme}://{parsed.netloc}"
+        return f"{base}/openai/v1/"
+
     def to_config_dict(self) -> dict[str, Any]:
         """Extend base config with Azure-specific fields."""
         config = super().to_config_dict()
@@ -173,6 +265,16 @@ def to_config_dict(self) -> dict[str, Any]:
             config["presence_penalty"] = self.presence_penalty
         if self.max_tokens is not None:
             config["max_tokens"] = self.max_tokens
+        if self.api != "completions":
+            config["api"] = self.api
+        if self.reasoning_effort is not None:
+            config["reasoning_effort"] = self.reasoning_effort
+        if self.instructions is not None:
+            config["instructions"] = self.instructions
+        if self.store is not None:
+            config["store"] = self.store
+        if self.max_completion_tokens is not None:
+            config["max_completion_tokens"] = self.max_completion_tokens
         return config
 
     @staticmethod
@@ -278,10 +380,10 @@ def call(
         from_agent: Any | None = None,
         response_model: type[BaseModel] | None = None,
     ) -> str | Any:
-        """Call Azure AI Inference chat completions API.
+        """Call Azure AI Inference API (Chat Completions or Responses).
 
         Args:
-            messages: Input messages for the chat completion
+            messages: Input messages for the completion
             tools: List of tool/function definitions
             callbacks: Callback functions (not used in native implementation)
             available_functions: Available functions for tool calling
@@ -290,8 +392,19 @@ def call(
             response_model: Response model
 
         Returns:
-            Chat completion response or tool call result
+            Completion response or tool call result
         """
+        if self.api == "responses":
+            return self._responses_delegate.call(
+                messages=messages,
+                tools=tools,
+                callbacks=callbacks,
+                available_functions=available_functions,
+                from_task=from_task,
+                from_agent=from_agent,
+                response_model=response_model,
+            )
+
         with llm_call_context():
             try:
                 # Emit call started event
@@ -350,10 +463,10 @@ async def acall(  # type: ignore[return]
         from_agent: Any | None = None,
         response_model: type[BaseModel] | None = None,
     ) -> str | Any:
-        """Call Azure AI Inference chat completions API asynchronously.
+        """Call Azure AI Inference API asynchronously (Chat Completions or Responses).
 
         Args:
-            messages: Input messages for the chat completion
+            messages: Input messages for the completion
             tools: List of tool/function definitions
             callbacks: Callback functions (not used in native implementation)
             available_functions: Available functions for tool calling
@@ -362,8 +475,19 @@ async def acall(  # type: ignore[return]
             response_model: Pydantic model for structured output
 
         Returns:
-            Chat completion response or tool call result
+            Completion response or tool call result
         """
+        if self.api == "responses":
+            return await self._responses_delegate.acall(
+                messages=messages,
+                tools=tools,
+                callbacks=callbacks,
+                available_functions=available_functions,
+                from_task=from_task,
+                from_agent=from_agent,
+                response_model=response_model,
+            )
+
         with llm_call_context():
             try:
                 self._emit_call_started_event(
@@ -1013,7 +1137,8 @@ async def _ahandle_streaming_completion(
 
     def supports_function_calling(self) -> bool:
         """Check if the model supports function calling."""
-        # Azure OpenAI models support function calling
+        if self.api == "responses":
+            return True
         return self.is_openai_model
 
     def supports_stop_words(self) -> bool:
@@ -1023,6 +1148,8 @@ def supports_stop_words(self) -> bool:
         computer-use-preview) do not support stop sequences.
         See: https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-models/concepts/models-sold-directly-by-azure
         """
+        if self.api == "responses":
+            return False
         model_lower = self.model.lower() if self.model else ""
 
         if "gpt-5" in model_lower:
@@ -1091,13 +1218,39 @@ def _extract_azure_token_usage(response: ChatCompletions) -> dict[str, Any]:
             }
         return {"total_tokens": 0}
 
+    @property
+    def last_response_id(self) -> str | None:
+        """Get the last response ID from Responses API auto-chaining."""
+        if self._responses_delegate is not None:
+            result: str | None = self._responses_delegate.last_response_id
+            return result
+        return None
+
+    @property
+    def last_reasoning_items(self) -> list[Any] | None:
+        """Get the last reasoning items from Responses API auto-chain reasoning."""
+        if self._responses_delegate is not None:
+            result: list[Any] | None = self._responses_delegate.last_reasoning_items
+            return result
+        return None
+
+    def reset_chain(self) -> None:
+        """Reset the Responses API auto-chain state."""
+        if self._responses_delegate is not None:
+            self._responses_delegate.reset_chain()
+
+    def reset_reasoning_chain(self) -> None:
+        """Reset the Responses API reasoning chain state."""
+        if self._responses_delegate is not None:
+            self._responses_delegate.reset_reasoning_chain()
+
     async def aclose(self) -> None:
         """Close the async client and clean up resources.
 
         This ensures proper cleanup of the underlying aiohttp session
         to avoid unclosed connector warnings.
         """
-        if hasattr(self._async_client, "close"):
+        if self._async_client and hasattr(self._async_client, "close"):
             await self._async_client.close()
 
     async def __aenter__(self) -> Self: