diff --git a/package.json b/package.json index ab29bf49e..73c669b2e 100644 --- a/package.json +++ b/package.json @@ -41,6 +41,7 @@ "@ai-sdk/mistral": "^3.0.21", "@ai-sdk/openai": "^3.0.37", "@ai-sdk/openai-compatible": "^2.0.31", + "@anthropic-ai/sdk": "^0.78.0", "@aws-sdk/client-s3": "^3.980.0", "@aws-sdk/s3-request-presigner": "^3.980.0", "@chat-adapter/slack": "^4.15.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 4a3ca4bd3..a93f801ab 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -93,6 +93,9 @@ importers: '@ai-sdk/openai-compatible': specifier: ^2.0.31 version: 2.0.31(zod@4.3.6) + '@anthropic-ai/sdk': + specifier: ^0.78.0 + version: 0.78.0(zod@4.3.6) '@aws-sdk/client-s3': specifier: ^3.980.0 version: 3.980.0 @@ -1748,6 +1751,15 @@ packages: '@faker-js/faker': ^7.0.0 || ^8.0.0 || ^9.0.0 zod: ^3.21.4 + '@anthropic-ai/sdk@0.78.0': + resolution: {integrity: sha512-PzQhR715td/m1UaaN5hHXjYB8Gl2lF9UVhrrGrZeysiF6Rb74Wc9GCB8hzLdzmQtBd1qe89F9OptgB9Za1Ib5w==} + hasBin: true + peerDependencies: + zod: ^3.25.0 || ^4.0.0 + peerDependenciesMeta: + zod: + optional: true + '@apm-js-collab/code-transformer@0.8.2': resolution: {integrity: sha512-YRjJjNq5KFSjDUoqu5pFUWrrsvGOxl6c3bu+uMFc9HNNptZ2rNU/TI2nLw4jnhQNtka972Ee2m3uqbvDQtPeCA==} @@ -2565,6 +2577,10 @@ packages: resolution: {integrity: sha512-Q/N6JNWvIvPnLDvjlE1OUBLPQHH6l3CltCEsHIujp45zQUSSh8K+gHnaEX45yAT1nyngnINhvWtzN+Nb9D8RAQ==} engines: {node: '>=6.9.0'} + '@babel/runtime@7.28.6': + resolution: {integrity: sha512-05WQkdpL9COIMz4LjTxGpPNCdlpyimKppYNoJ5Di5EUObifl8t4tuLuUBBZEpoLYOmfvIWrsp9fCl0HoPRVTdA==} + engines: {node: '>=6.9.0'} + '@babel/template@7.27.2': resolution: {integrity: sha512-LPDZ85aEJyYSd18/DkjNh4/y1ntkE5KwUHWTiqgRxruuZL2F1yuHligVHLvcHY2vMHXttKFpJn6LwfI7cw7ODw==} engines: {node: '>=6.9.0'} @@ -9338,6 +9354,10 @@ packages: json-parse-even-better-errors@2.3.1: resolution: {integrity: sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==} + json-schema-to-ts@3.1.1: + resolution: {integrity: sha512-+DWg8jCJG2TEnpy7kOm/7/AxaYoaRbjVB4LFZLySZlWn8exGs3A4OLJR966cVvU26N7X9TWxl+Jsw7dzAqKT6g==} + engines: {node: '>=16'} + json-schema-traverse@0.4.1: resolution: {integrity: sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==} @@ -11523,6 +11543,9 @@ packages: trough@2.2.0: resolution: {integrity: sha512-tmMpK00BjZiUyVyvrBK7knerNgmgvcV/KLVyuma/SC+TQN167GrMRciANTz09+k3zW8L8t60jWO1GpfkZdjTaw==} + ts-algebra@2.0.0: + resolution: {integrity: sha512-FPAhNPFMrkwz76P7cdjdmiShwMynZYN6SgOujD1urY4oNm80Ou9oMdmbR45LotcKOXoy7wSmHkRFE6Mxbrhefw==} + ts-api-utils@2.1.0: resolution: {integrity: sha512-CUgTZL1irw8u29bzrOD/nH85jqyc74D6SshFgujOIA7osm2Rz7dYH77agkx7H4FBNxDq7Cjf+IjaX/8zwFW+ZQ==} engines: {node: '>=18.12'} @@ -12343,6 +12366,12 @@ snapshots: randexp: 0.5.3 zod: 4.3.6 + '@anthropic-ai/sdk@0.78.0(zod@4.3.6)': + dependencies: + json-schema-to-ts: 3.1.1 + optionalDependencies: + zod: 4.3.6 + '@apm-js-collab/code-transformer@0.8.2': {} '@apm-js-collab/tracing-hooks@0.3.1': @@ -13698,6 +13727,8 @@ snapshots: '@babel/runtime@7.28.4': {} + '@babel/runtime@7.28.6': {} + '@babel/template@7.27.2': dependencies: '@babel/code-frame': 7.27.1 @@ -18282,7 +18313,7 @@ snapshots: sirv: 3.0.2 tinyglobby: 0.2.15 tinyrainbow: 2.0.0 - vitest: 3.2.4(@types/debug@4.1.12)(@types/node@22.19.1)(@vitest/ui@3.2.4)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.0)(tsx@4.20.6)(yaml@2.8.1) + vitest: 3.2.4(@types/debug@4.1.12)(@types/node@25.2.0)(@vitest/ui@3.2.4)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.0)(tsx@4.20.6)(yaml@2.8.1) '@vitest/utils@3.2.4': dependencies: @@ -21717,6 +21748,11 @@ snapshots: json-parse-even-better-errors@2.3.1: {} + json-schema-to-ts@3.1.1: + dependencies: + '@babel/runtime': 7.28.6 + ts-algebra: 2.0.0 + json-schema-traverse@0.4.1: {} json-schema-traverse@1.0.0: {} @@ -23074,7 +23110,7 @@ snapshots: polished@4.3.1: dependencies: - '@babel/runtime': 7.28.4 + '@babel/runtime': 7.28.6 possible-typed-array-names@1.1.0: {} @@ -24351,6 +24387,8 @@ snapshots: trough@2.2.0: {} + ts-algebra@2.0.0: {} + ts-api-utils@2.1.0(typescript@5.9.3): dependencies: typescript: 5.9.3 diff --git a/src/app/api/openrouter/[...path]/route.ts b/src/app/api/openrouter/[...path]/route.ts index 7aec1569a..9bda7955a 100644 --- a/src/app/api/openrouter/[...path]/route.ts +++ b/src/app/api/openrouter/[...path]/route.ts @@ -7,6 +7,7 @@ import { validateFeatureHeader, FEATURE_HEADER } from '@/lib/feature-detection'; import type { OpenRouterChatCompletionRequest, GatewayResponsesRequest, + GatewayMessagesRequest, GatewayRequest, } from '@/lib/providers/openrouter/types'; import { applyProviderSpecificLogic, getProvider, openRouterRequest } from '@/lib/providers'; @@ -69,6 +70,7 @@ import { applyResolvedAutoModel, isKiloAutoModel } from '@/lib/kilo-auto-model'; import { fixOpenCodeDuplicateReasoning } from '@/lib/providers/fixOpenCodeDuplicateReasoning'; import type { MicrodollarUsageContext, PromptInfo } from '@/lib/processUsage.types'; import { extractResponsesPromptInfo } from '@/lib/processUsage.responses'; +import { extractMessagesPromptInfo } from '@/lib/processUsage.messages'; import { getMaxTokens, hasMiddleOutTransform } from '@/lib/providers/openrouter/request-helpers'; import { isKiloAffiliatedUser } from '@/lib/isKiloAffiliatedUser'; @@ -82,13 +84,17 @@ const PROMOTION_MODEL_LIMIT_REACHED = 'PROMOTION_MODEL_LIMIT_REACHED'; function validatePath( url: URL ): - | { path: '/chat/completions' | '/responses' } + | { path: '/chat/completions' | '/responses' | '/messages' } | { errorResponse: ReturnType } { const pathSuffix = stripRequiredPrefix(url.pathname, '/api/gateway') ?? stripRequiredPrefix(url.pathname, '/api/openrouter'); - if (pathSuffix === '/chat/completions' || pathSuffix === '/responses') { + if ( + pathSuffix === '/chat/completions' || + pathSuffix === '/responses' || + pathSuffix === '/messages' + ) { return { path: pathSuffix }; } return { errorResponse: invalidPathResponse() }; @@ -113,6 +119,9 @@ export async function POST(request: NextRequest): Promise b.text).join('\n') + : null; + const lastUserMessage = request.body.messages.filter(m => m.role === 'user').at(-1); + let userPrompt: string | null = null; + if (lastUserMessage) { + const content = lastUserMessage.content; + if (typeof content === 'string') { + userPrompt = content; + } else if (Array.isArray(content)) { + userPrompt = + content + .filter(c => c.type === 'text') + .map(c => ('text' in c ? c.text : '')) + .join('\n') || null; + } + } + return { systemPrompt: systemPrompt || null, userPrompt }; + } return extractFullPromptsFromChatCompletions(request.body); } diff --git a/src/lib/kilo-auto-model.ts b/src/lib/kilo-auto-model.ts index b66c81e54..e644b815e 100644 --- a/src/lib/kilo-auto-model.ts +++ b/src/lib/kilo-auto-model.ts @@ -214,15 +214,26 @@ export function applyResolvedAutoModel( ) { const resolved = resolveAutoModel(model, modeHeader); request.body.model = resolved.model; - if (resolved.reasoning) request.body.reasoning = resolved.reasoning; - if (resolved.verbosity) { - if (request.kind === 'chat_completions') { - request.body.verbosity = resolved.verbosity as OpenRouterChatCompletionRequest['verbosity']; + if (resolved.reasoning) { + if (request.kind === 'messages') { + request.body.thinking = { type: resolved.reasoning.enabled ? 'adaptive' : 'disabled' }; } else { + request.body.reasoning = resolved.reasoning; + } + } + if (resolved.verbosity) { + if (request.kind === 'messages') { + request.body.output_config = { + ...request.body.output_config, + effort: resolved.verbosity, + }; + } else if (request.kind === 'responses') { request.body.text = { ...request.body.text, verbosity: resolved.verbosity as OpenAI.Responses.ResponseTextConfig['verbosity'], }; + } else { + request.body.verbosity = resolved.verbosity as OpenRouterChatCompletionRequest['verbosity']; } } } diff --git a/src/lib/o11y/api-metrics.server.ts b/src/lib/o11y/api-metrics.server.ts index 886fb21a4..918d40bb8 100644 --- a/src/lib/o11y/api-metrics.server.ts +++ b/src/lib/o11y/api-metrics.server.ts @@ -81,6 +81,13 @@ export function getToolsAvailable(request: GatewayRequest): string[] { }); } + if (request.kind === 'messages') { + return request.body.tools.map((tool): string => { + const name = typeof tool.name === 'string' ? tool.name.trim() : ''; + return name ? `function:${name}` : 'function:unknown'; + }); + } + return request.body.tools.map((tool): string => { if (tool.type === 'function') { const toolName = typeof tool.function?.name === 'string' ? tool.function.name.trim() : ''; @@ -116,6 +123,21 @@ export function getToolsUsed(request: GatewayRequest): string[] { return used; } + if (request.kind === 'messages') { + const used = new Array(); + for (const message of request.body.messages) { + if (message.role !== 'assistant') continue; + const content = Array.isArray(message.content) ? message.content : []; + for (const block of content) { + if (block.type === 'tool_use') { + const name = typeof block.name === 'string' ? block.name.trim() : ''; + used.push(name ? `function:${name}` : 'function:unknown'); + } + } + } + return used; + } + if (!Array.isArray(request.body.messages)) return []; const used = new Array(); diff --git a/src/lib/processUsage.messages.ts b/src/lib/processUsage.messages.ts new file mode 100644 index 000000000..5ebed2e44 --- /dev/null +++ b/src/lib/processUsage.messages.ts @@ -0,0 +1,267 @@ +import { createParser, type EventSourceMessage } from 'eventsource-parser'; +import { captureException, captureMessage, startInactiveSpan } from '@sentry/nextjs'; +import type { Span } from '@sentry/nextjs'; +import { toMicrodollars } from './utils'; +import { sentryRootSpan } from './getRootSpan'; +import type { ProviderId } from '@/lib/providers/provider-id'; +import type { + JustTheCostsUsageStats, + MicrodollarUsageStats, + NotYetCostedUsageStats, + PromptInfo, +} from '@/lib/processUsage.types'; +import type { GatewayMessagesRequest } from '@/lib/providers/openrouter/types'; +import { OPENROUTER_BYOK_COST_MULTIPLIER } from '@/lib/processUsage.constants'; +import type Anthropic from '@anthropic-ai/sdk'; + +// ref: https://docs.anthropic.com/en/api/messages +// Anthropic usage combined with OpenRouter cost fields +// ref: https://docs.anthropic.com/en/api/messages +// ref: https://openrouter.ai/docs/use-cases/usage-accounting#response-format +type MessagesApiUsage = Anthropic.Messages.Usage & { + cost?: number; + is_byok?: boolean | null; + cost_details?: { upstream_inference_cost: number }; +}; + +function processMessagesApiUsage( + usage: MessagesApiUsage | null | undefined, + coreProps: NotYetCostedUsageStats +): JustTheCostsUsageStats { + const inputTokens = usage?.input_tokens ?? 0; + const outputTokens = usage?.output_tokens ?? 0; + const cacheHitTokens = usage?.cache_read_input_tokens ?? 0; + const cacheWriteTokens = usage?.cache_creation_input_tokens ?? 0; + + // OpenRouter path: cost fields are present directly in usage + if (usage?.cost != null || usage?.is_byok != null) { + const is_byok = usage.is_byok ?? null; + const openrouterCost_USD = usage.cost ?? 0; + const upstream_inference_cost_USD = usage.cost_details?.upstream_inference_cost ?? 0; + const cost_mUsd = toMicrodollars(is_byok ? upstream_inference_cost_USD : openrouterCost_USD); + const inferredUpstream_USD = openrouterCost_USD * OPENROUTER_BYOK_COST_MULTIPLIER; + const microdollar_error = (inferredUpstream_USD - upstream_inference_cost_USD) * 1000000; + if ( + (is_byok == null && (openrouterCost_USD || upstream_inference_cost_USD)) || + (is_byok && usage.cost !== 0 && 1.1 < Math.abs(microdollar_error)) + ) { + const { responseContent: _ignore, ...corePropsCopy } = coreProps; + captureMessage("SUSPICIOUS: openrouters cost accounting doesn't make sense", { + level: 'error', + tags: { source: 'messages_sse_processing' }, + extra: { + ...corePropsCopy, + cost_mUsd, + is_byok, + openrouterCost_USD, + upstream_inference_cost_USD, + inferredUpstream_USD, + microdollar_error, + }, + }); + } + return { inputTokens, outputTokens, cacheHitTokens, cacheWriteTokens, cost_mUsd, is_byok }; + } + + // No cost info available + return { + inputTokens, + outputTokens, + cacheHitTokens, + cacheWriteTokens, + cost_mUsd: 0, + is_byok: null, + }; +} + +export async function parseMessagesMicrodollarUsageFromStream( + stream: ReadableStream, + kiloUserId: string, + openrouterRequestSpan: Span | undefined, + provider: ProviderId, + statusCode: number +): Promise { + openrouterRequestSpan?.end(); + const streamProcessingSpan = startInactiveSpan({ + name: 'messages-stream-processing', + op: 'performance', + }); + const timeToFirstTokenSpan = startInactiveSpan({ + name: 'time-to-first-token', + op: 'performance', + }); + + let messageId: string | null = null; + let model: string | null = null; + let responseContent = ''; + const reportedError = statusCode >= 400; + const startedAt = performance.now(); + let firstTokenReceived = false; + let inputUsage: MessagesApiUsage | null = null; + let outputTokens = 0; + let finish_reason: string | null = null; + + const reader = stream.getReader(); + const decoder = new TextDecoder(); + + const sseStreamParser = createParser({ + onEvent(event: EventSourceMessage) { + if (!firstTokenReceived) { + sentryRootSpan()?.setAttribute( + 'messages.time_to_first_token_ms', + performance.now() - startedAt + ); + firstTokenReceived = true; + timeToFirstTokenSpan.end(); + } + + const json = JSON.parse(event.data) as Anthropic.Messages.MessageStreamEvent; + + if (!json) { + captureException(new Error('SUSPICIOUS: No JSON in SSE event'), { + extra: { event }, + }); + return; + } + + //if (json.type === 'error') { + // reportedError = true; + // captureException(new Error(`Messages API error: ${json.error.message}`), { + // tags: { source: 'messages_sse_processing' }, + // extra: { json, event }, + // }); + // return; + //} + + if (json.type === 'message_start') { + messageId = json.message.id; + model = json.message.model; + inputUsage = json.message.usage; + } + + if ( + json.type === 'content_block_delta' && + json.delta.type === 'text_delta' && + json.delta.text + ) { + responseContent += json.delta.text; + } + + if (json.type === 'message_delta') { + finish_reason = json.delta.stop_reason; + outputTokens = json.usage.output_tokens; + } + }, + }); + + let wasAborted = false; + try { + while (true) { + const { done, value } = await reader.read(); + if (done) break; + sseStreamParser.feed(decoder.decode(value, { stream: true })); + } + } catch (error) { + if (error instanceof Error && error.name === 'ResponseAborted') { + wasAborted = true; + } else { + throw error; + } + } finally { + reader.releaseLock(); + streamProcessingSpan.end(); + } + + if (!reportedError && !inputUsage) { + captureMessage('SUSPICIOUS: No usage in Messages API stream', { + level: 'warning', + tags: { source: 'messages_usage_processing' }, + extra: { kiloUserId, provider, messageId, model }, + }); + } + + // Merge input and output usage together + const usage: MessagesApiUsage | null = + inputUsage !== null ? Object.assign({}, inputUsage, { output_tokens: outputTokens }) : null; + + const coreProps = { + messageId, + hasError: reportedError || wasAborted, + model, + responseContent, + inference_provider: null, + finish_reason, + upstream_id: null, + latency: null, + moderation_latency: null, + generation_time: null, + streamed: true, + cancelled: null, + } satisfies NotYetCostedUsageStats; + + const costs = processMessagesApiUsage(usage, coreProps); + return { ...coreProps, ...costs }; +} + +export function parseMessagesMicrodollarUsageFromString( + fullResponse: string, + statusCode: number +): MicrodollarUsageStats { + const responseJson = JSON.parse(fullResponse) as Anthropic.Messages.Message | null; + + const usage = responseJson?.usage; + + const responseContent = + responseJson?.content + .filter(c => c.type === 'text') + .map(c => c.text ?? '') + .join('') ?? ''; + + const coreProps = { + messageId: responseJson?.id ?? null, + hasError: !responseJson?.model || statusCode >= 400, + model: responseJson?.model ?? null, + responseContent, + inference_provider: null, + upstream_id: null, + finish_reason: responseJson?.stop_reason ?? null, + latency: null, + moderation_latency: null, + generation_time: null, + streamed: false, + cancelled: null, + } satisfies NotYetCostedUsageStats; + + const costs = processMessagesApiUsage(usage, coreProps); + return { ...coreProps, ...costs }; +} + +export function extractMessagesPromptInfo(body: GatewayMessagesRequest): PromptInfo { + const systemContent = + typeof body.system === 'string' + ? body.system + : Array.isArray(body.system) + ? body.system.map(b => b.text).join('\n') + : ''; + + const lastUserMessage = body.messages.filter(m => m.role === 'user').at(-1); + + let userPrompt = ''; + if (lastUserMessage) { + const content = lastUserMessage.content; + if (typeof content === 'string') { + userPrompt = content; + } else { + userPrompt = content + .filter((c): c is { type: 'text'; text: string } => c.type === 'text') + .map(c => c.text) + .join('\n'); + } + } + + return { + system_prompt_prefix: systemContent.slice(0, 100), + system_prompt_length: systemContent.length, + user_prompt_prefix: userPrompt.slice(0, 100), + }; +} diff --git a/src/lib/processUsage.ts b/src/lib/processUsage.ts index 574d55399..df0d64ca0 100644 --- a/src/lib/processUsage.ts +++ b/src/lib/processUsage.ts @@ -48,6 +48,10 @@ import { parseResponsesMicrodollarUsageFromStream, parseResponsesMicrodollarUsageFromString, } from '@/lib/processUsage.responses'; +import { + parseMessagesMicrodollarUsageFromStream, + parseMessagesMicrodollarUsageFromString, +} from '@/lib/processUsage.messages'; import { OPENROUTER_BYOK_COST_MULTIPLIER } from '@/lib/processUsage.constants'; const posthogClient = PostHogClient(); @@ -568,6 +572,21 @@ export function countAndStoreUsage( ) ); } + if (usageContext.api_kind === 'messages') { + usageStatsPromise = usageContext.isStreaming + ? parseMessagesMicrodollarUsageFromStream( + clonedReponse.body, + usageContext.kiloUserId, + openrouterRequestSpan, + usageContext.provider, + clonedReponse.status + ) + : clonedReponse + .text() + .then(content => + parseMessagesMicrodollarUsageFromString(content, clonedReponse.status) + ); + } } return usageStatsPromise.then(usageStats => processTokenData(usageStats, usageContext)); diff --git a/src/lib/providers/anthropic.ts b/src/lib/providers/anthropic.ts index e2cdad86f..f039e1e26 100644 --- a/src/lib/providers/anthropic.ts +++ b/src/lib/providers/anthropic.ts @@ -87,8 +87,11 @@ export function applyAnthropicModelSettings( // additionally it is a common bug to forget adding cache breakpoints // we may want to gate this for Kilo-clients at some point if (requestToMutate.kind === 'chat_completions') { - //todo: figure out whether this is possible before making responses generally available addCacheBreakpoints(requestToMutate.body.messages); + } else if (requestToMutate.kind === 'messages') { + if (!requestToMutate.body.cache_control && requestToMutate.body.messages.length > 1) { + requestToMutate.body.cache_control = { type: 'ephemeral' }; + } } } diff --git a/src/lib/providers/index.ts b/src/lib/providers/index.ts index a618e6e9c..2caad6c5f 100644 --- a/src/lib/providers/index.ts +++ b/src/lib/providers/index.ts @@ -7,6 +7,7 @@ import type { OpenRouterChatCompletionRequest, OpenRouterGeneration, GatewayRequest, + GatewayMessagesRequest, } from '@/lib/providers/openrouter/types'; import { applyMistralModelSettings, @@ -118,7 +119,7 @@ async function checkBYOK( export async function getProvider( requestedModel: string, - request: OpenRouterChatCompletionRequest | GatewayResponsesRequest, + request: OpenRouterChatCompletionRequest | GatewayResponsesRequest | GatewayMessagesRequest, user: User | AnonymousUserContext, organizationId: string | undefined, taskId: string | undefined @@ -255,7 +256,10 @@ function getPreferredProviderOrder(requestedModel: string): string[] { function applyPreferredProvider( requestedModel: string, - requestToMutate: OpenRouterChatCompletionRequest | GatewayResponsesRequest + requestToMutate: + | OpenRouterChatCompletionRequest + | GatewayResponsesRequest + | GatewayMessagesRequest ) { const preferredProviderOrder = getPreferredProviderOrder(requestedModel); if (preferredProviderOrder.length === 0) { @@ -347,7 +351,7 @@ export async function openRouterRequest({ path: string; search: string; method: string; - body: OpenRouterChatCompletionRequest | GatewayResponsesRequest; + body: OpenRouterChatCompletionRequest | GatewayResponsesRequest | GatewayMessagesRequest; extraHeaders: Record; provider: Provider; signal?: AbortSignal; diff --git a/src/lib/providers/openrouter/request-helpers.ts b/src/lib/providers/openrouter/request-helpers.ts index fdc7b53bb..6a64a9572 100644 --- a/src/lib/providers/openrouter/request-helpers.ts +++ b/src/lib/providers/openrouter/request-helpers.ts @@ -1,9 +1,13 @@ import type { GatewayRequest } from '@/lib/providers/openrouter/types'; export function getMaxTokens(request: GatewayRequest) { - return request.kind === 'chat_completions' - ? (request.body.max_completion_tokens ?? request.body.max_tokens ?? null) - : (request.body.max_output_tokens ?? null); + if (request.kind === 'responses') { + return request.body.max_output_tokens ?? null; + } + if (request.kind === 'messages') { + return request.body.max_tokens ?? null; + } + return request.body.max_completion_tokens ?? request.body.max_tokens ?? null; } export function hasMiddleOutTransform(request: GatewayRequest) { diff --git a/src/lib/providers/openrouter/types.ts b/src/lib/providers/openrouter/types.ts index a751602aa..88ec87579 100644 --- a/src/lib/providers/openrouter/types.ts +++ b/src/lib/providers/openrouter/types.ts @@ -3,6 +3,7 @@ import type { GatewayProviderOptions } from '@ai-sdk/gateway'; import type { AnthropicProviderOptions } from '@ai-sdk/anthropic'; import type { ReasoningDetailUnion } from '@/lib/custom-llm/reasoning-details'; import type { AwsCredentials } from '@/lib/providers/openrouter/inference-provider-id'; +import type Anthropic from '@anthropic-ai/sdk'; // Base types for OpenRouter API that don't depend on other lib files // This breaks circular dependencies with mistral.ts, minimax.ts, etc. @@ -56,12 +57,18 @@ export type SharedGatewayRequestProperties = { // https://openrouter.ai/docs/api/api-reference/chat/send-chat-completion-request#request.body.models models?: string[]; - thinking?: { type?: 'enabled' | 'disabled' }; + thinking?: { type?: 'enabled' | 'adaptive' | 'disabled' }; }; export type GatewayResponsesRequest = SharedGatewayRequestProperties & OpenAI.Responses.ResponseCreateParams; +export type GatewayMessagesRequest = SharedGatewayRequestProperties & + Anthropic.MessageCreateParams & { + user?: string; + session_id?: string; + }; + /** * Approximately OpenRouter API request type. Actually based on OpenAI's, but the differences aren't huge. */ @@ -86,7 +93,8 @@ export type MessageWithReasoning = { export type GatewayRequest = | { kind: 'chat_completions'; body: OpenRouterChatCompletionRequest } - | { kind: 'responses'; body: GatewayResponsesRequest }; + | { kind: 'responses'; body: GatewayResponsesRequest } + | { kind: 'messages'; body: GatewayMessagesRequest }; export type OpenRouterGeneration = { data: { diff --git a/src/lib/providers/vercel/index.ts b/src/lib/providers/vercel/index.ts index 6024de950..3efb6e731 100644 --- a/src/lib/providers/vercel/index.ts +++ b/src/lib/providers/vercel/index.ts @@ -18,6 +18,7 @@ import type { VercelProviderConfig, OpenRouterChatCompletionRequest, GatewayResponsesRequest, + GatewayMessagesRequest, } from '@/lib/providers/openrouter/types'; import { mapModelIdToVercel } from '@/lib/providers/vercel/mapModelIdToVercel'; import { isZaiModel } from '@/lib/providers/zai'; @@ -57,7 +58,7 @@ function isLikelyAvailableOnAllGateways(requestedModel: string) { export async function shouldRouteToVercel( requestedModel: string, - request: OpenRouterChatCompletionRequest | GatewayResponsesRequest, + request: OpenRouterChatCompletionRequest | GatewayResponsesRequest | GatewayMessagesRequest, randomSeed: string ) { if (request.provider?.data_collection === 'deny') { diff --git a/src/lib/providers/xai.ts b/src/lib/providers/xai.ts index 0807839fb..f7e784b5c 100644 --- a/src/lib/providers/xai.ts +++ b/src/lib/providers/xai.ts @@ -24,11 +24,16 @@ export function applyXaiModelSettings( requestToMutate: GatewayRequest, extraHeaders: Record ) { - if (requestedModel === grok_code_fast_1_optimized_free_model.public_id) { + if ( + requestedModel === grok_code_fast_1_optimized_free_model.public_id && + requestToMutate.kind === 'chat_completions' + ) { delete requestToMutate.body.reasoning; } - // https://kilo-code.slack.com/archives/C09922UFQHF/p1767968746782459 - extraHeaders['x-grok-conv-id'] = requestToMutate.body.prompt_cache_key || crypto.randomUUID(); - extraHeaders['x-grok-req-id'] = crypto.randomUUID(); + if (requestToMutate.kind === 'chat_completions' || requestToMutate.kind === 'responses') { + // https://kilo-code.slack.com/archives/C09922UFQHF/p1767968746782459 + extraHeaders['x-grok-conv-id'] = requestToMutate.body.prompt_cache_key || crypto.randomUUID(); + extraHeaders['x-grok-req-id'] = crypto.randomUUID(); + } }