Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
"@ai-sdk/mistral": "^3.0.21",
"@ai-sdk/openai": "^3.0.37",
"@ai-sdk/openai-compatible": "^2.0.31",
"@anthropic-ai/sdk": "^0.78.0",
"@aws-sdk/client-s3": "^3.980.0",
"@aws-sdk/s3-request-presigner": "^3.980.0",
"@chat-adapter/slack": "^4.15.0",
Expand Down
42 changes: 40 additions & 2 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

60 changes: 43 additions & 17 deletions src/app/api/openrouter/[...path]/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { validateFeatureHeader, FEATURE_HEADER } from '@/lib/feature-detection';
import type {
OpenRouterChatCompletionRequest,
GatewayResponsesRequest,
GatewayMessagesRequest,
GatewayRequest,
} from '@/lib/providers/openrouter/types';
import { applyProviderSpecificLogic, getProvider, openRouterRequest } from '@/lib/providers';
Expand Down Expand Up @@ -69,6 +70,7 @@ import { applyResolvedAutoModel, isKiloAutoModel } from '@/lib/kilo-auto-model';
import { fixOpenCodeDuplicateReasoning } from '@/lib/providers/fixOpenCodeDuplicateReasoning';
import type { MicrodollarUsageContext, PromptInfo } from '@/lib/processUsage.types';
import { extractResponsesPromptInfo } from '@/lib/processUsage.responses';
import { extractMessagesPromptInfo } from '@/lib/processUsage.messages';
import { getMaxTokens, hasMiddleOutTransform } from '@/lib/providers/openrouter/request-helpers';
import { isKiloAffiliatedUser } from '@/lib/isKiloAffiliatedUser';

Expand All @@ -82,13 +84,17 @@ const PROMOTION_MODEL_LIMIT_REACHED = 'PROMOTION_MODEL_LIMIT_REACHED';
function validatePath(
url: URL
):
| { path: '/chat/completions' | '/responses' }
| { path: '/chat/completions' | '/responses' | '/messages' }
| { errorResponse: ReturnType<typeof invalidPathResponse> } {
const pathSuffix =
stripRequiredPrefix(url.pathname, '/api/gateway') ??
stripRequiredPrefix(url.pathname, '/api/openrouter');

if (pathSuffix === '/chat/completions' || pathSuffix === '/responses') {
if (
pathSuffix === '/chat/completions' ||
pathSuffix === '/responses' ||
pathSuffix === '/messages'
) {
return { path: pathSuffix };
}
return { errorResponse: invalidPathResponse() };
Expand All @@ -113,6 +119,9 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
// Inject or merge stream_options.include_usage = true
body.stream_options = { ...(body.stream_options || {}), include_usage: true };
requestBodyParsed = { kind: 'chat_completions', body };
} else if (path === '/messages') {
const body: GatewayMessagesRequest = JSON.parse(requestBodyText);
requestBodyParsed = { kind: 'messages', body };
} else {
const body: GatewayResponsesRequest = JSON.parse(requestBodyText);
body.store = false;
Expand Down Expand Up @@ -236,13 +245,13 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
}

if (
requestBodyParsed.kind === 'responses' &&
['messages', 'responses'].includes(requestBodyParsed.kind) &&
!isKiloAffiliatedUser(maybeUser, organizationId ?? null)
) {
return NextResponse.json(
{
error: {
message: 'The Responses API is experimental and not yet available to all users.',
message: `The ${requestBodyParsed.kind} API is experimental and not yet available to all users.`,
},
},
{ status: 403 }
Expand Down Expand Up @@ -309,7 +318,9 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
const promptInfo: PromptInfo =
requestBodyParsed.kind === 'chat_completions'
? extractPromptInfo(requestBodyParsed.body)
: extractResponsesPromptInfo(requestBodyParsed.body);
: requestBodyParsed.kind === 'messages'
? extractMessagesPromptInfo(requestBodyParsed.body)
: extractResponsesPromptInfo(requestBodyParsed.body);

const usageContext: MicrodollarUsageContext = {
api_kind: requestBodyParsed.kind,
Expand Down Expand Up @@ -387,14 +398,23 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
return dataCollectionRequiredResponse();
}

if (taskId) {
requestBodyParsed.body.prompt_cache_key = generateProviderSpecificHash(
user.id + taskId,
provider
);
const userId = generateProviderSpecificHash(user.id, provider);
if (requestBodyParsed.kind === 'messages') {
requestBodyParsed.body.metadata = { user_id: userId };
requestBodyParsed.body.user = userId;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WARNING: Non-standard Messages fields are sent to Anthropic-compatible backends

GatewayMessagesRequest is widened with user/session_id, but Anthropic's Messages request schema does not define either field. getProvider() can still route /messages through Vercel/BYOK, so this branch will forward those keys to Anthropic-compatible /messages endpoints, where they can be rejected as unknown parameters even when the model otherwise supports the Messages API.

if (taskId) {
requestBodyParsed.body.session_id = generateProviderSpecificHash(user.id + taskId, provider);
}
} else {
if (taskId) {
requestBodyParsed.body.prompt_cache_key = generateProviderSpecificHash(
user.id + taskId,
provider
);
}
requestBodyParsed.body.safety_identifier = userId;
requestBodyParsed.body.user = userId; // deprecated, but this is what OpenRouter uses
}
requestBodyParsed.body.safety_identifier = generateProviderSpecificHash(user.id, provider);
requestBodyParsed.body.user = requestBodyParsed.body.safety_identifier; // deprecated, but this is what OpenRouter uses

if (requestBodyParsed.kind === 'chat_completions') {
if (ENABLE_TOOL_REPAIR) {
Expand Down Expand Up @@ -422,9 +442,11 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno

let response: Response;
if (customLlm) {
if (requestBodyParsed.kind === 'responses') {
if (requestBodyParsed.kind === 'responses' || requestBodyParsed.kind === 'messages') {
return NextResponse.json(
{ error: 'This model is not yet available on the Responses API' },
{
error: `This model is not available on the ${requestBodyParsed.kind} API`,
},
{ status: 404 }
);
}
Expand Down Expand Up @@ -548,9 +570,13 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
(isKiloFreeModel(originalModelIdLowerCased) ||
isActiveReviewPromo(botId, originalModelIdLowerCased))
) {
return requestBodyParsed.kind === 'chat_completions'
? rewriteFreeModelResponse_ChatCompletions(response, originalModelIdLowerCased)
: rewriteFreeModelResponse_Responses(response, originalModelIdLowerCased);
if (requestBodyParsed.kind === 'chat_completions') {
return rewriteFreeModelResponse_ChatCompletions(response, originalModelIdLowerCased);
}
if (requestBodyParsed.kind === 'responses') {
return rewriteFreeModelResponse_Responses(response, originalModelIdLowerCased);
}
// messages kind: pass through as-is (free models don't currently use the Messages API)
}

return wrapInSafeNextResponse(response);
Expand Down
26 changes: 25 additions & 1 deletion src/lib/abuse-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import 'server-only';
import { getMaxTokens, hasMiddleOutTransform } from '@/lib/providers/openrouter/request-helpers';

/**
* Extract full prompts from a GatewayRequest (chat completions or responses API).
* Extract full prompts from a GatewayRequest (chat completions, responses, or messages API).
* Unlike extractPromptInfo (which truncates to 100 chars), this returns full content for abuse analysis.
*/
function extractFullPrompts(request: GatewayRequest): {
Expand All @@ -31,6 +31,30 @@ function extractFullPrompts(request: GatewayRequest): {
if (request.kind === 'responses') {
return extractFullPromptsFromResponses(request.body);
}
if (request.kind === 'messages') {
const systemContent = request.body.system;
const systemPrompt =
typeof systemContent === 'string'
? systemContent
: Array.isArray(systemContent)
? systemContent.map(b => b.text).join('\n')
: null;
const lastUserMessage = request.body.messages.filter(m => m.role === 'user').at(-1);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WARNING: Tool-result turns erase the user prompt for abuse checks

Anthropic tool loops send tool results as a user message. When that is the latest user turn, this picks it, filters out the non-text blocks, and returns null even though an earlier user text prompt is still in history. That leaves the abuse classifier blind on follow-up tool calls.

let userPrompt: string | null = null;
if (lastUserMessage) {
const content = lastUserMessage.content;
if (typeof content === 'string') {
userPrompt = content;
} else if (Array.isArray(content)) {
userPrompt =
content
.filter(c => c.type === 'text')
.map(c => ('text' in c ? c.text : ''))
.join('\n') || null;
}
}
return { systemPrompt: systemPrompt || null, userPrompt };
}
return extractFullPromptsFromChatCompletions(request.body);
}

Expand Down
19 changes: 15 additions & 4 deletions src/lib/kilo-auto-model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -214,15 +214,26 @@ export function applyResolvedAutoModel(
) {
const resolved = resolveAutoModel(model, modeHeader);
request.body.model = resolved.model;
if (resolved.reasoning) request.body.reasoning = resolved.reasoning;
if (resolved.verbosity) {
if (request.kind === 'chat_completions') {
request.body.verbosity = resolved.verbosity as OpenRouterChatCompletionRequest['verbosity'];
if (resolved.reasoning) {
if (request.kind === 'messages') {
request.body.thinking = { type: resolved.reasoning.enabled ? 'adaptive' : 'disabled' };
} else {
request.body.reasoning = resolved.reasoning;
}
}
if (resolved.verbosity) {
if (request.kind === 'messages') {
request.body.output_config = {
...request.body.output_config,
effort: resolved.verbosity,
};
} else if (request.kind === 'responses') {
request.body.text = {
...request.body.text,
verbosity: resolved.verbosity as OpenAI.Responses.ResponseTextConfig['verbosity'],
};
} else {
request.body.verbosity = resolved.verbosity as OpenRouterChatCompletionRequest['verbosity'];
}
}
}
22 changes: 22 additions & 0 deletions src/lib/o11y/api-metrics.server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ export function getToolsAvailable(request: GatewayRequest): string[] {
});
}

if (request.kind === 'messages') {
return request.body.tools.map((tool): string => {
const name = typeof tool.name === 'string' ? tool.name.trim() : '';
return name ? `function:${name}` : 'function:unknown';
});
}

return request.body.tools.map((tool): string => {
if (tool.type === 'function') {
const toolName = typeof tool.function?.name === 'string' ? tool.function.name.trim() : '';
Expand Down Expand Up @@ -116,6 +123,21 @@ export function getToolsUsed(request: GatewayRequest): string[] {
return used;
}

if (request.kind === 'messages') {
const used = new Array<string>();
for (const message of request.body.messages) {
if (message.role !== 'assistant') continue;
const content = Array.isArray(message.content) ? message.content : [];
for (const block of content) {
if (block.type === 'tool_use') {
const name = typeof block.name === 'string' ? block.name.trim() : '';
used.push(name ? `function:${name}` : 'function:unknown');
}
}
}
return used;
}

if (!Array.isArray(request.body.messages)) return [];

const used = new Array<string>();
Expand Down
Loading
Loading