Zen: adjust cache tokens

This commit is contained in:
Frank 2026-03-23 20:33:11 -04:00
parent ad78b79b8a
commit a03a2b6eab
4 changed files with 17 additions and 6 deletions

View File

@ -461,12 +461,17 @@ export async function handler(
...modelProvider, ...modelProvider,
...zenData.providers[modelProvider.id], ...zenData.providers[modelProvider.id],
...(() => { ...(() => {
const format = zenData.providers[modelProvider.id].format const providerProps = zenData.providers[modelProvider.id]
const format = providerProps.format
const providerModel = modelProvider.model const providerModel = modelProvider.model
if (format === "anthropic") return anthropicHelper({ reqModel, providerModel }) if (format === "anthropic") return anthropicHelper({ reqModel, providerModel })
if (format === "google") return googleHelper({ reqModel, providerModel }) if (format === "google") return googleHelper({ reqModel, providerModel })
if (format === "openai") return openaiHelper({ reqModel, providerModel }) if (format === "openai") return openaiHelper({ reqModel, providerModel })
return oaCompatHelper({ reqModel, providerModel }) return oaCompatHelper({
reqModel,
providerModel,
adjustCacheUsage: providerProps.adjustCacheUsage,
})
})(), })(),
} }
} }

View File

@ -21,7 +21,7 @@ type Usage = {
} }
} }
export const oaCompatHelper: ProviderHelper = () => ({ export const oaCompatHelper: ProviderHelper = ({ adjustCacheUsage }) => ({
format: "oa-compat", format: "oa-compat",
modifyUrl: (providerApi: string) => providerApi + "/chat/completions", modifyUrl: (providerApi: string) => providerApi + "/chat/completions",
modifyHeaders: (headers: Headers, body: Record<string, any>, apiKey: string) => { modifyHeaders: (headers: Headers, body: Record<string, any>, apiKey: string) => {
@ -57,10 +57,15 @@ export const oaCompatHelper: ProviderHelper = () => ({
} }
}, },
normalizeUsage: (usage: Usage) => { normalizeUsage: (usage: Usage) => {
const inputTokens = usage.prompt_tokens ?? 0 let inputTokens = usage.prompt_tokens ?? 0
const outputTokens = usage.completion_tokens ?? 0 const outputTokens = usage.completion_tokens ?? 0
const reasoningTokens = usage.completion_tokens_details?.reasoning_tokens ?? undefined const reasoningTokens = usage.completion_tokens_details?.reasoning_tokens ?? undefined
const cacheReadTokens = usage.cached_tokens ?? usage.prompt_tokens_details?.cached_tokens ?? undefined let cacheReadTokens = usage.cached_tokens ?? usage.prompt_tokens_details?.cached_tokens ?? undefined
if (adjustCacheUsage && !cacheReadTokens) {
cacheReadTokens = Math.floor(inputTokens * 0.9)
}
return { return {
inputTokens: inputTokens - (cacheReadTokens ?? 0), inputTokens: inputTokens - (cacheReadTokens ?? 0),
outputTokens, outputTokens,

View File

@ -33,7 +33,7 @@ export type UsageInfo = {
cacheWrite1hTokens?: number cacheWrite1hTokens?: number
} }
export type ProviderHelper = (input: { reqModel: string; providerModel: string }) => { export type ProviderHelper = (input: { reqModel: string; providerModel: string; adjustCacheUsage?: boolean }) => {
format: ZenData.Format format: ZenData.Format
modifyUrl: (providerApi: string, isStream?: boolean) => string modifyUrl: (providerApi: string, isStream?: boolean) => string
modifyHeaders: (headers: Headers, body: Record<string, any>, apiKey: string) => void modifyHeaders: (headers: Headers, body: Record<string, any>, apiKey: string) => void

View File

@ -48,6 +48,7 @@ export namespace ZenData {
headerMappings: z.record(z.string(), z.string()).optional(), headerMappings: z.record(z.string(), z.string()).optional(),
payloadModifier: z.record(z.string(), z.any()).optional(), payloadModifier: z.record(z.string(), z.any()).optional(),
payloadMappings: z.record(z.string(), z.string()).optional(), payloadMappings: z.record(z.string(), z.string()).optional(),
adjustCacheUsage: z.boolean().optional(),
}) })
const ModelsSchema = z.object({ const ModelsSchema = z.object({