Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
185 changes: 169 additions & 16 deletions lib/crewai/src/crewai/llms/providers/azure/completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,22 @@ class AzureCompletion(BaseLLM):
is_openai_model: bool = False
is_azure_openai_endpoint: bool = False

# Responses API settings
api: Literal["completions", "responses"] = "completions"
reasoning_effort: str | None = None
instructions: str | None = None
store: bool | None = None
previous_response_id: str | None = None
include: list[str] | None = None
builtin_tools: list[str] | None = None
parse_tool_outputs: bool = False
auto_chain: bool = False
auto_chain_reasoning: bool = False
max_completion_tokens: int | None = None

_client: Any = PrivateAttr(default=None)
_async_client: Any = PrivateAttr(default=None)
_responses_delegate: Any = PrivateAttr(default=None)

@model_validator(mode="before")
@classmethod
Expand Down Expand Up @@ -143,17 +157,95 @@ def _normalize_azure_fields(cls, data: Any) -> Any:
def _init_clients(self) -> AzureCompletion:
if not self.api_key:
raise ValueError("Azure API key is required.")
client_kwargs: dict[str, Any] = {
"endpoint": self.endpoint,
"credential": AzureKeyCredential(self.api_key),
}
if self.api_version:
client_kwargs["api_version"] = self.api_version

self._client = ChatCompletionsClient(**client_kwargs)
self._async_client = AsyncChatCompletionsClient(**client_kwargs)
if self.api == "responses":
self._init_responses_delegate()
else:
client_kwargs: dict[str, Any] = {
"endpoint": self.endpoint,
"credential": AzureKeyCredential(self.api_key),
}
if self.api_version:
client_kwargs["api_version"] = self.api_version

self._client = ChatCompletionsClient(**client_kwargs)
self._async_client = AsyncChatCompletionsClient(**client_kwargs)
return self

def _init_responses_delegate(self) -> None:
"""Create an OpenAICompletion delegate for the Azure OpenAI Responses API.

The Azure OpenAI Responses API uses the standard OpenAI Python SDK
with a base_url pointing to the Azure resource's /openai/v1/ endpoint.
"""
from crewai.llms.providers.openai.completion import OpenAICompletion

base_url = self._get_responses_base_url()

delegate_kwargs: dict[str, Any] = {
"model": self.model,
"api_key": self.api_key,
"base_url": base_url,
"api": "responses",
"provider": "openai",
"stream": self.stream,
}

if self.temperature is not None:
delegate_kwargs["temperature"] = self.temperature
if self.top_p is not None:
delegate_kwargs["top_p"] = self.top_p
if self.max_tokens is not None:
delegate_kwargs["max_tokens"] = self.max_tokens
if self.max_completion_tokens is not None:
delegate_kwargs["max_completion_tokens"] = self.max_completion_tokens
if self.stop:
delegate_kwargs["stop"] = self.stop
if self.timeout is not None:
delegate_kwargs["timeout"] = self.timeout
if self.max_retries != 2:
delegate_kwargs["max_retries"] = self.max_retries
if self.reasoning_effort is not None:
delegate_kwargs["reasoning_effort"] = self.reasoning_effort
if self.instructions is not None:
delegate_kwargs["instructions"] = self.instructions
if self.store is not None:
delegate_kwargs["store"] = self.store
if self.previous_response_id is not None:
delegate_kwargs["previous_response_id"] = self.previous_response_id
if self.include is not None:
delegate_kwargs["include"] = self.include
if self.builtin_tools is not None:
delegate_kwargs["builtin_tools"] = self.builtin_tools
if self.parse_tool_outputs:
delegate_kwargs["parse_tool_outputs"] = self.parse_tool_outputs
if self.auto_chain:
delegate_kwargs["auto_chain"] = self.auto_chain
if self.auto_chain_reasoning:
delegate_kwargs["auto_chain_reasoning"] = self.auto_chain_reasoning
if self.response_format is not None:
delegate_kwargs["response_format"] = self.response_format
if self.additional_params:
delegate_kwargs["additional_params"] = self.additional_params

self._responses_delegate = OpenAICompletion(**delegate_kwargs)

def _get_responses_base_url(self) -> str:
"""Construct the base URL for the Azure OpenAI Responses API.

Extracts the scheme and host from the configured endpoint and appends
the ``/openai/v1/`` path required by the Azure OpenAI Responses API.

Returns:
The Responses API base URL, e.g.
``https://myresource.openai.azure.com/openai/v1/``
"""
if not self.endpoint:
raise ValueError("Azure endpoint is required for Responses API")
parsed = urlparse(self.endpoint)
base = f"{parsed.scheme}://{parsed.netloc}"
return f"{base}/openai/v1/"

def to_config_dict(self) -> dict[str, Any]:
"""Extend base config with Azure-specific fields."""
config = super().to_config_dict()
Expand All @@ -173,6 +265,16 @@ def to_config_dict(self) -> dict[str, Any]:
config["presence_penalty"] = self.presence_penalty
if self.max_tokens is not None:
config["max_tokens"] = self.max_tokens
if self.api != "completions":
config["api"] = self.api
if self.reasoning_effort is not None:
config["reasoning_effort"] = self.reasoning_effort
if self.instructions is not None:
config["instructions"] = self.instructions
if self.store is not None:
config["store"] = self.store
if self.max_completion_tokens is not None:
config["max_completion_tokens"] = self.max_completion_tokens
return config

@staticmethod
Expand Down Expand Up @@ -278,10 +380,10 @@ def call(
from_agent: Any | None = None,
response_model: type[BaseModel] | None = None,
) -> str | Any:
"""Call Azure AI Inference chat completions API.
"""Call Azure AI Inference API (Chat Completions or Responses).

Args:
messages: Input messages for the chat completion
messages: Input messages for the completion
tools: List of tool/function definitions
callbacks: Callback functions (not used in native implementation)
available_functions: Available functions for tool calling
Expand All @@ -290,8 +392,19 @@ def call(
response_model: Response model

Returns:
Chat completion response or tool call result
Completion response or tool call result
"""
if self.api == "responses":
return self._responses_delegate.call(
messages=messages,
tools=tools,
callbacks=callbacks,
available_functions=available_functions,
from_task=from_task,
from_agent=from_agent,
response_model=response_model,
)

with llm_call_context():
try:
# Emit call started event
Expand Down Expand Up @@ -350,10 +463,10 @@ async def acall( # type: ignore[return]
from_agent: Any | None = None,
response_model: type[BaseModel] | None = None,
) -> str | Any:
"""Call Azure AI Inference chat completions API asynchronously.
"""Call Azure AI Inference API asynchronously (Chat Completions or Responses).

Args:
messages: Input messages for the chat completion
messages: Input messages for the completion
tools: List of tool/function definitions
callbacks: Callback functions (not used in native implementation)
available_functions: Available functions for tool calling
Expand All @@ -362,8 +475,19 @@ async def acall( # type: ignore[return]
response_model: Pydantic model for structured output

Returns:
Chat completion response or tool call result
Completion response or tool call result
"""
if self.api == "responses":
return await self._responses_delegate.acall(
messages=messages,
tools=tools,
callbacks=callbacks,
available_functions=available_functions,
from_task=from_task,
from_agent=from_agent,
response_model=response_model,
)

with llm_call_context():
try:
self._emit_call_started_event(
Expand Down Expand Up @@ -1013,7 +1137,8 @@ async def _ahandle_streaming_completion(

def supports_function_calling(self) -> bool:
"""Check if the model supports function calling."""
# Azure OpenAI models support function calling
if self.api == "responses":
return True
return self.is_openai_model

def supports_stop_words(self) -> bool:
Expand All @@ -1023,6 +1148,8 @@ def supports_stop_words(self) -> bool:
computer-use-preview) do not support stop sequences.
See: https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-models/concepts/models-sold-directly-by-azure
"""
if self.api == "responses":
return False
model_lower = self.model.lower() if self.model else ""

if "gpt-5" in model_lower:
Expand Down Expand Up @@ -1091,13 +1218,39 @@ def _extract_azure_token_usage(response: ChatCompletions) -> dict[str, Any]:
}
return {"total_tokens": 0}

@property
def last_response_id(self) -> str | None:
"""Get the last response ID from Responses API auto-chaining."""
if self._responses_delegate is not None:
result: str | None = self._responses_delegate.last_response_id
return result
return None

@property
def last_reasoning_items(self) -> list[Any] | None:
"""Get the last reasoning items from Responses API auto-chain reasoning."""
if self._responses_delegate is not None:
result: list[Any] | None = self._responses_delegate.last_reasoning_items
return result
return None

def reset_chain(self) -> None:
"""Reset the Responses API auto-chain state."""
if self._responses_delegate is not None:
self._responses_delegate.reset_chain()

def reset_reasoning_chain(self) -> None:
"""Reset the Responses API reasoning chain state."""
if self._responses_delegate is not None:
self._responses_delegate.reset_reasoning_chain()

async def aclose(self) -> None:
"""Close the async client and clean up resources.

This ensures proper cleanup of the underlying aiohttp session
to avoid unclosed connector warnings.
"""
if hasattr(self._async_client, "close"):
if self._async_client and hasattr(self._async_client, "close"):
await self._async_client.close()

async def __aenter__(self) -> Self:
Expand Down
Loading