diff --git a/.changeset/four-rules-peel.md b/.changeset/four-rules-peel.md new file mode 100644 index 000000000..393d9307a --- /dev/null +++ b/.changeset/four-rules-peel.md @@ -0,0 +1,9 @@ +--- +"@voltagent/core": patch +--- + +fix: tighten prompt-context usage telemetry + +- redact nested large binary fields when estimating prompt context usage +- exclude runtime-only tool metadata from tool schema token estimates +- avoid emitting cached and reasoning token span attributes when their values are zero diff --git a/packages/core/src/agent/agent-observability.spec.ts b/packages/core/src/agent/agent-observability.spec.ts index 44ddb8582..fff3a67c7 100644 --- a/packages/core/src/agent/agent-observability.spec.ts +++ b/packages/core/src/agent/agent-observability.spec.ts @@ -212,6 +212,53 @@ describe("Agent with Observability", () => { unsubscribe(); }); + it("should not emit zero cached or reasoning usage on llm spans", async () => { + const events: any[] = []; + const unsubscribe = WebSocketEventEmitter.getInstance().onWebSocketEvent((event) => { + events.push(event); + }); + + mockModel.doGenerate = async () => ({ + finishReason: makeFinishReason("stop"), + usage: makeProviderUsage(10, 20), + content: [{ type: "text", text: "No extra usage" }], + warnings: [], + logprobs: undefined, + providerDetails: undefined, + }); + + const agent = new Agent({ + name: "usage-agent", + purpose: "Testing llm usage emission", + instructions: "You are a usage test agent", + model: mockModel as any, + observability, + }); + + const result = await agent.generateText("Track usage"); + + expect(result.text).toBe("No extra usage"); + + const endSpans = events + .filter((event) => event.type === "span:end") + .map((event) => event.span); + + const llmSpan = endSpans.find( + (span) => + span.attributes["span.type"] === "llm" && + span.attributes["llm.operation"] === "generateText", + ); + + expect(llmSpan).toBeDefined(); + expect(llmSpan.attributes["llm.usage.prompt_tokens"]).toBe(10); + expect(llmSpan.attributes["llm.usage.completion_tokens"]).toBe(20); + expect(llmSpan.attributes["llm.usage.total_tokens"]).toBe(30); + expect(llmSpan.attributes["llm.usage.cached_tokens"]).toBeUndefined(); + expect(llmSpan.attributes["llm.usage.reasoning_tokens"]).toBeUndefined(); + + unsubscribe(); + }); + it("should preserve root span provider cost when post-processing fails after a successful model call", async () => { const events: any[] = []; const unsubscribe = WebSocketEventEmitter.getInstance().onWebSocketEvent((event) => { diff --git a/packages/core/src/agent/agent.ts b/packages/core/src/agent/agent.ts index abd1c1c61..511916d17 100644 --- a/packages/core/src/agent/agent.ts +++ b/packages/core/src/agent/agent.ts @@ -4265,10 +4265,10 @@ export class Agent { if (totalTokens !== undefined) { span.setAttribute("llm.usage.total_tokens", totalTokens); } - if (cachedInputTokens !== undefined) { + if (cachedInputTokens !== undefined && cachedInputTokens > 0) { span.setAttribute("llm.usage.cached_tokens", cachedInputTokens); } - if (reasoningTokens !== undefined) { + if (reasoningTokens !== undefined && reasoningTokens > 0) { span.setAttribute("llm.usage.reasoning_tokens", reasoningTokens); } } diff --git a/packages/core/src/agent/prompt-context-usage.spec.ts b/packages/core/src/agent/prompt-context-usage.spec.ts index 61111b3a0..75ccd7809 100644 --- a/packages/core/src/agent/prompt-context-usage.spec.ts +++ b/packages/core/src/agent/prompt-context-usage.spec.ts @@ -68,4 +68,86 @@ describe("prompt context usage estimation", () => { "usage.prompt_context.tool_count": 2, }); }); + + it("sanitizes nested binary args recursively and ignores provider-only metadata", () => { + const circularArgsA: Record = { + content: { + metadata: { + data: "x".repeat(8_000), + }, + }, + attachments: [{ image: "y".repeat(8_000) }], + }; + circularArgsA.self = circularArgsA; + + const circularArgsB: Record = { + content: { + metadata: { + data: "short", + }, + }, + attachments: [{ image: "tiny" }], + }; + circularArgsB.self = circularArgsB; + + const toolAEstimate = estimatePromptContextUsage({ + tools: { + searchDocs: { + description: "Search the documentation", + inputSchema: z.object({ query: z.string() }), + outputSchema: z.object({ answer: z.string() }), + providerOptions: { + openai: { + metadata: "provider-only".repeat(2_000), + }, + }, + needsApproval: true, + args: circularArgsA, + }, + }, + }); + + const toolBEstimate = estimatePromptContextUsage({ + tools: { + searchDocs: { + description: "Search the documentation", + inputSchema: z.object({ query: z.string() }), + outputSchema: z.object({ answer: z.string() }), + providerOptions: { + openai: { + metadata: "ignored", + }, + }, + needsApproval: false, + args: circularArgsB, + }, + }, + }); + + expect(toolAEstimate?.toolTokensEstimated).toBeGreaterThan(0); + expect(toolAEstimate?.toolTokensEstimated).toBe(toolBEstimate?.toolTokensEstimated); + }); + + it("ignores non-plain args values when estimating tool tokens", () => { + const withArrayArgs = estimatePromptContextUsage({ + tools: { + searchDocs: { + description: "Search the documentation", + inputSchema: z.object({ query: z.string() }), + args: ["x".repeat(10_000)], + }, + }, + }); + + const withoutArgs = estimatePromptContextUsage({ + tools: { + searchDocs: { + description: "Search the documentation", + inputSchema: z.object({ query: z.string() }), + }, + }, + }); + + expect(withArrayArgs?.toolTokensEstimated).toBe(withoutArgs?.toolTokensEstimated); + }); }); diff --git a/packages/core/src/agent/prompt-context-usage.ts b/packages/core/src/agent/prompt-context-usage.ts index b252704f3..bee9f5006 100644 --- a/packages/core/src/agent/prompt-context-usage.ts +++ b/packages/core/src/agent/prompt-context-usage.ts @@ -12,6 +12,7 @@ const BINARY_PART_TYPES = new Set([ "media", ]); const LARGE_BINARY_KEYS = new Set(["audio", "base64", "bytes", "data", "image"]); +const CIRCULAR_REFERENCE_PLACEHOLDER = "[circular]"; type PromptMessage = { role?: string; @@ -164,12 +165,25 @@ function serializePromptValue(value: unknown): string { } function sanitizeRecord(record: Record): Record { + return sanitizeRecordValue(record, new Set()); +} + +function sanitizeRecordValue( + record: Record, + seen: Set, +): Record { + if (seen.has(record)) { + return { circular: CIRCULAR_REFERENCE_PLACEHOLDER }; + } + + seen.add(record); const sanitized: Record = {}; for (const [key, value] of Object.entries(record)) { - sanitized[key] = LARGE_BINARY_KEYS.has(key) ? "[omitted]" : value; + sanitized[key] = LARGE_BINARY_KEYS.has(key) ? "[omitted]" : sanitizeValue(value, seen); } + seen.delete(record); return sanitized; } @@ -200,9 +214,7 @@ function serializeToolDefinition(name: string, tool: unknown): Record) } : {}), - ...(candidate.needsApproval !== undefined ? { needsApproval: candidate.needsApproval } : {}), + ...(isPlainObject(candidate.args) ? { args: sanitizeRecord(candidate.args) } : {}), }; } @@ -221,3 +233,43 @@ function normalizeSchema(schema: unknown): unknown { return schema; } + +function sanitizeValue(value: unknown, seen: Set): unknown { + if (value === null || value === undefined) { + return value; + } + + if (typeof value !== "object") { + return value; + } + + if (value instanceof Date || value instanceof RegExp) { + return value; + } + + if (Array.isArray(value)) { + if (seen.has(value)) { + return [CIRCULAR_REFERENCE_PLACEHOLDER]; + } + + seen.add(value); + const sanitized = value.map((entry) => sanitizeValue(entry, seen)); + seen.delete(value); + return sanitized; + } + + if (!isPlainObject(value)) { + return value; + } + + return sanitizeRecordValue(value, seen); +} + +function isPlainObject(value: unknown): value is Record { + if (!value || typeof value !== "object" || Array.isArray(value)) { + return false; + } + + const prototype = Object.getPrototypeOf(value); + return prototype === Object.prototype || prototype === null; +}