From a03a2b6eab82725ab380547c08847f016bbf1d8a Mon Sep 17 00:00:00 2001 From: Frank Date: Mon, 23 Mar 2026 20:33:11 -0400 Subject: [PATCH] Zen: adjust cache tokens --- packages/console/app/src/routes/zen/util/handler.ts | 9 +++++++-- .../src/routes/zen/util/provider/openai-compatible.ts | 11 ++++++++--- .../app/src/routes/zen/util/provider/provider.ts | 2 +- packages/console/core/src/model.ts | 1 + 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/packages/console/app/src/routes/zen/util/handler.ts b/packages/console/app/src/routes/zen/util/handler.ts index 812e7c8d1..9dbadf1ee 100644 --- a/packages/console/app/src/routes/zen/util/handler.ts +++ b/packages/console/app/src/routes/zen/util/handler.ts @@ -461,12 +461,17 @@ export async function handler( ...modelProvider, ...zenData.providers[modelProvider.id], ...(() => { - const format = zenData.providers[modelProvider.id].format + const providerProps = zenData.providers[modelProvider.id] + const format = providerProps.format const providerModel = modelProvider.model if (format === "anthropic") return anthropicHelper({ reqModel, providerModel }) if (format === "google") return googleHelper({ reqModel, providerModel }) if (format === "openai") return openaiHelper({ reqModel, providerModel }) - return oaCompatHelper({ reqModel, providerModel }) + return oaCompatHelper({ + reqModel, + providerModel, + adjustCacheUsage: providerProps.adjustCacheUsage, + }) })(), } } diff --git a/packages/console/app/src/routes/zen/util/provider/openai-compatible.ts b/packages/console/app/src/routes/zen/util/provider/openai-compatible.ts index ce97a34d9..6cb4b6a75 100644 --- a/packages/console/app/src/routes/zen/util/provider/openai-compatible.ts +++ b/packages/console/app/src/routes/zen/util/provider/openai-compatible.ts @@ -21,7 +21,7 @@ type Usage = { } } -export const oaCompatHelper: ProviderHelper = () => ({ +export const oaCompatHelper: ProviderHelper = ({ adjustCacheUsage }) => ({ format: "oa-compat", modifyUrl: (providerApi: string) => providerApi + "/chat/completions", modifyHeaders: (headers: Headers, body: Record, apiKey: string) => { @@ -57,10 +57,15 @@ export const oaCompatHelper: ProviderHelper = () => ({ } }, normalizeUsage: (usage: Usage) => { - const inputTokens = usage.prompt_tokens ?? 0 + let inputTokens = usage.prompt_tokens ?? 0 const outputTokens = usage.completion_tokens ?? 0 const reasoningTokens = usage.completion_tokens_details?.reasoning_tokens ?? undefined - const cacheReadTokens = usage.cached_tokens ?? usage.prompt_tokens_details?.cached_tokens ?? undefined + let cacheReadTokens = usage.cached_tokens ?? usage.prompt_tokens_details?.cached_tokens ?? undefined + + if (adjustCacheUsage && !cacheReadTokens) { + cacheReadTokens = Math.floor(inputTokens * 0.9) + } + return { inputTokens: inputTokens - (cacheReadTokens ?? 0), outputTokens, diff --git a/packages/console/app/src/routes/zen/util/provider/provider.ts b/packages/console/app/src/routes/zen/util/provider/provider.ts index 3afabdae9..64444ec9e 100644 --- a/packages/console/app/src/routes/zen/util/provider/provider.ts +++ b/packages/console/app/src/routes/zen/util/provider/provider.ts @@ -33,7 +33,7 @@ export type UsageInfo = { cacheWrite1hTokens?: number } -export type ProviderHelper = (input: { reqModel: string; providerModel: string }) => { +export type ProviderHelper = (input: { reqModel: string; providerModel: string; adjustCacheUsage?: boolean }) => { format: ZenData.Format modifyUrl: (providerApi: string, isStream?: boolean) => string modifyHeaders: (headers: Headers, body: Record, apiKey: string) => void diff --git a/packages/console/core/src/model.ts b/packages/console/core/src/model.ts index c47e4a6d8..6f28dd798 100644 --- a/packages/console/core/src/model.ts +++ b/packages/console/core/src/model.ts @@ -48,6 +48,7 @@ export namespace ZenData { headerMappings: z.record(z.string(), z.string()).optional(), payloadModifier: z.record(z.string(), z.any()).optional(), payloadMappings: z.record(z.string(), z.string()).optional(), + adjustCacheUsage: z.boolean().optional(), }) const ModelsSchema = z.object({