mirror of
https://gitea.toothfairyai.com/ToothFairyAI/tf_code.git
synced 2026-03-30 05:43:55 +00:00
424 lines
14 KiB
TypeScript
424 lines
14 KiB
TypeScript
import { describe, expect, test } from "bun:test"
|
|
import path from "path"
|
|
import { SessionCompaction } from "../../src/session/compaction"
|
|
import { Token } from "../../src/util/token"
|
|
import { Instance } from "../../src/project/instance"
|
|
import { Log } from "../../src/util/log"
|
|
import { tmpdir } from "../fixture/fixture"
|
|
import { Session } from "../../src/session"
|
|
import type { Provider } from "../../src/provider/provider"
|
|
|
|
Log.init({ print: false })
|
|
|
|
function createModel(opts: {
|
|
context: number
|
|
output: number
|
|
input?: number
|
|
cost?: Provider.Model["cost"]
|
|
npm?: string
|
|
}): Provider.Model {
|
|
return {
|
|
id: "test-model",
|
|
providerID: "test",
|
|
name: "Test",
|
|
limit: {
|
|
context: opts.context,
|
|
input: opts.input,
|
|
output: opts.output,
|
|
},
|
|
cost: opts.cost ?? { input: 0, output: 0, cache: { read: 0, write: 0 } },
|
|
capabilities: {
|
|
toolcall: true,
|
|
attachment: false,
|
|
reasoning: false,
|
|
temperature: true,
|
|
input: { text: true, image: false, audio: false, video: false },
|
|
output: { text: true, image: false, audio: false, video: false },
|
|
},
|
|
api: { npm: opts.npm ?? "@ai-sdk/anthropic" },
|
|
options: {},
|
|
} as Provider.Model
|
|
}
|
|
|
|
describe("session.compaction.isOverflow", () => {
|
|
test("returns true when token count exceeds usable context", async () => {
|
|
await using tmp = await tmpdir()
|
|
await Instance.provide({
|
|
directory: tmp.path,
|
|
fn: async () => {
|
|
const model = createModel({ context: 100_000, output: 32_000 })
|
|
const tokens = { input: 75_000, output: 5_000, reasoning: 0, cache: { read: 0, write: 0 } }
|
|
expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(true)
|
|
},
|
|
})
|
|
})
|
|
|
|
test("returns false when token count within usable context", async () => {
|
|
await using tmp = await tmpdir()
|
|
await Instance.provide({
|
|
directory: tmp.path,
|
|
fn: async () => {
|
|
const model = createModel({ context: 200_000, output: 32_000 })
|
|
const tokens = { input: 100_000, output: 10_000, reasoning: 0, cache: { read: 0, write: 0 } }
|
|
expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(false)
|
|
},
|
|
})
|
|
})
|
|
|
|
test("includes cache.read in token count", async () => {
|
|
await using tmp = await tmpdir()
|
|
await Instance.provide({
|
|
directory: tmp.path,
|
|
fn: async () => {
|
|
const model = createModel({ context: 100_000, output: 32_000 })
|
|
const tokens = { input: 60_000, output: 10_000, reasoning: 0, cache: { read: 10_000, write: 0 } }
|
|
expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(true)
|
|
},
|
|
})
|
|
})
|
|
|
|
test("respects input limit for input caps", async () => {
|
|
await using tmp = await tmpdir()
|
|
await Instance.provide({
|
|
directory: tmp.path,
|
|
fn: async () => {
|
|
const model = createModel({ context: 400_000, input: 272_000, output: 128_000 })
|
|
const tokens = { input: 271_000, output: 1_000, reasoning: 0, cache: { read: 2_000, write: 0 } }
|
|
expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(true)
|
|
},
|
|
})
|
|
})
|
|
|
|
test("returns false when input/output are within input caps", async () => {
|
|
await using tmp = await tmpdir()
|
|
await Instance.provide({
|
|
directory: tmp.path,
|
|
fn: async () => {
|
|
const model = createModel({ context: 400_000, input: 272_000, output: 128_000 })
|
|
const tokens = { input: 200_000, output: 20_000, reasoning: 0, cache: { read: 10_000, write: 0 } }
|
|
expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(false)
|
|
},
|
|
})
|
|
})
|
|
|
|
test("returns false when output within limit with input caps", async () => {
|
|
await using tmp = await tmpdir()
|
|
await Instance.provide({
|
|
directory: tmp.path,
|
|
fn: async () => {
|
|
const model = createModel({ context: 200_000, input: 120_000, output: 10_000 })
|
|
const tokens = { input: 50_000, output: 9_999, reasoning: 0, cache: { read: 0, write: 0 } }
|
|
expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(false)
|
|
},
|
|
})
|
|
})
|
|
|
|
// ─── Bug reproduction tests ───────────────────────────────────────────
|
|
// These tests demonstrate that when limit.input is set, isOverflow()
|
|
// does not subtract any headroom for the next model response. This means
|
|
// compaction only triggers AFTER we've already consumed the full input
|
|
// budget, leaving zero room for the next API call's output tokens.
|
|
//
|
|
// Compare: without limit.input, usable = context - output (reserves space).
|
|
// With limit.input, usable = limit.input (reserves nothing).
|
|
//
|
|
// Related issues: #10634, #8089, #11086, #12621
|
|
// Open PRs: #6875, #12924
|
|
|
|
test("BUG: no headroom when limit.input is set — compaction should trigger near boundary but does not", async () => {
|
|
await using tmp = await tmpdir()
|
|
await Instance.provide({
|
|
directory: tmp.path,
|
|
fn: async () => {
|
|
// Simulate Claude with prompt caching: input limit = 200K, output limit = 32K
|
|
const model = createModel({ context: 200_000, input: 200_000, output: 32_000 })
|
|
|
|
// We've used 198K tokens total. Only 2K under the input limit.
|
|
// On the next turn, the full conversation (198K) becomes input,
|
|
// plus the model needs room to generate output — this WILL overflow.
|
|
const tokens = { input: 180_000, output: 15_000, reasoning: 0, cache: { read: 3_000, write: 0 } }
|
|
// count = 180K + 3K + 15K = 198K
|
|
// usable = limit.input = 200K (no output subtracted!)
|
|
// 198K > 200K = false → no compaction triggered
|
|
|
|
// WITHOUT limit.input: usable = 200K - 32K = 168K, and 198K > 168K = true ✓
|
|
// WITH limit.input: usable = 200K, and 198K > 200K = false ✗
|
|
|
|
// With 198K used and only 2K headroom, the next turn will overflow.
|
|
// Compaction MUST trigger here.
|
|
expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(true)
|
|
},
|
|
})
|
|
})
|
|
|
|
test("BUG: without limit.input, same token count correctly triggers compaction", async () => {
|
|
await using tmp = await tmpdir()
|
|
await Instance.provide({
|
|
directory: tmp.path,
|
|
fn: async () => {
|
|
// Same model but without limit.input — uses context - output instead
|
|
const model = createModel({ context: 200_000, output: 32_000 })
|
|
|
|
// Same token usage as above
|
|
const tokens = { input: 180_000, output: 15_000, reasoning: 0, cache: { read: 3_000, write: 0 } }
|
|
// count = 198K
|
|
// usable = context - output = 200K - 32K = 168K
|
|
// 198K > 168K = true → compaction correctly triggered
|
|
|
|
const result = await SessionCompaction.isOverflow({ tokens, model })
|
|
expect(result).toBe(true) // ← Correct: headroom is reserved
|
|
},
|
|
})
|
|
})
|
|
|
|
test("BUG: asymmetry — limit.input model allows 30K more usage before compaction than equivalent model without it", async () => {
|
|
await using tmp = await tmpdir()
|
|
await Instance.provide({
|
|
directory: tmp.path,
|
|
fn: async () => {
|
|
// Two models with identical context/output limits, differing only in limit.input
|
|
const withInputLimit = createModel({ context: 200_000, input: 200_000, output: 32_000 })
|
|
const withoutInputLimit = createModel({ context: 200_000, output: 32_000 })
|
|
|
|
// 170K total tokens — well above context-output (168K) but below input limit (200K)
|
|
const tokens = { input: 166_000, output: 10_000, reasoning: 0, cache: { read: 5_000, write: 0 } }
|
|
|
|
const withLimit = await SessionCompaction.isOverflow({ tokens, model: withInputLimit })
|
|
const withoutLimit = await SessionCompaction.isOverflow({ tokens, model: withoutInputLimit })
|
|
|
|
// Both models have identical real capacity — they should agree:
|
|
expect(withLimit).toBe(true) // should compact (170K leaves no room for 32K output)
|
|
expect(withoutLimit).toBe(true) // correctly compacts (170K > 168K)
|
|
},
|
|
})
|
|
})
|
|
|
|
test("returns false when model context limit is 0", async () => {
|
|
await using tmp = await tmpdir()
|
|
await Instance.provide({
|
|
directory: tmp.path,
|
|
fn: async () => {
|
|
const model = createModel({ context: 0, output: 32_000 })
|
|
const tokens = { input: 100_000, output: 10_000, reasoning: 0, cache: { read: 0, write: 0 } }
|
|
expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(false)
|
|
},
|
|
})
|
|
})
|
|
|
|
test("returns false when compaction.auto is disabled", async () => {
|
|
await using tmp = await tmpdir({
|
|
init: async (dir) => {
|
|
await Bun.write(
|
|
path.join(dir, "opencode.json"),
|
|
JSON.stringify({
|
|
compaction: { auto: false },
|
|
}),
|
|
)
|
|
},
|
|
})
|
|
await Instance.provide({
|
|
directory: tmp.path,
|
|
fn: async () => {
|
|
const model = createModel({ context: 100_000, output: 32_000 })
|
|
const tokens = { input: 75_000, output: 5_000, reasoning: 0, cache: { read: 0, write: 0 } }
|
|
expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(false)
|
|
},
|
|
})
|
|
})
|
|
})
|
|
|
|
describe("util.token.estimate", () => {
|
|
test("estimates tokens from text (4 chars per token)", () => {
|
|
const text = "x".repeat(4000)
|
|
expect(Token.estimate(text)).toBe(1000)
|
|
})
|
|
|
|
test("estimates tokens from larger text", () => {
|
|
const text = "y".repeat(20_000)
|
|
expect(Token.estimate(text)).toBe(5000)
|
|
})
|
|
|
|
test("returns 0 for empty string", () => {
|
|
expect(Token.estimate("")).toBe(0)
|
|
})
|
|
})
|
|
|
|
describe("session.getUsage", () => {
|
|
test("normalizes standard usage to token format", () => {
|
|
const model = createModel({ context: 100_000, output: 32_000 })
|
|
const result = Session.getUsage({
|
|
model,
|
|
usage: {
|
|
inputTokens: 1000,
|
|
outputTokens: 500,
|
|
totalTokens: 1500,
|
|
},
|
|
})
|
|
|
|
expect(result.tokens.input).toBe(1000)
|
|
expect(result.tokens.output).toBe(500)
|
|
expect(result.tokens.reasoning).toBe(0)
|
|
expect(result.tokens.cache.read).toBe(0)
|
|
expect(result.tokens.cache.write).toBe(0)
|
|
})
|
|
|
|
test("extracts cached tokens to cache.read", () => {
|
|
const model = createModel({ context: 100_000, output: 32_000 })
|
|
const result = Session.getUsage({
|
|
model,
|
|
usage: {
|
|
inputTokens: 1000,
|
|
outputTokens: 500,
|
|
totalTokens: 1500,
|
|
cachedInputTokens: 200,
|
|
},
|
|
})
|
|
|
|
expect(result.tokens.input).toBe(800)
|
|
expect(result.tokens.cache.read).toBe(200)
|
|
})
|
|
|
|
test("handles anthropic cache write metadata", () => {
|
|
const model = createModel({ context: 100_000, output: 32_000 })
|
|
const result = Session.getUsage({
|
|
model,
|
|
usage: {
|
|
inputTokens: 1000,
|
|
outputTokens: 500,
|
|
totalTokens: 1500,
|
|
},
|
|
metadata: {
|
|
anthropic: {
|
|
cacheCreationInputTokens: 300,
|
|
},
|
|
},
|
|
})
|
|
|
|
expect(result.tokens.cache.write).toBe(300)
|
|
})
|
|
|
|
test("does not subtract cached tokens for anthropic provider", () => {
|
|
const model = createModel({ context: 100_000, output: 32_000 })
|
|
const result = Session.getUsage({
|
|
model,
|
|
usage: {
|
|
inputTokens: 1000,
|
|
outputTokens: 500,
|
|
totalTokens: 1500,
|
|
cachedInputTokens: 200,
|
|
},
|
|
metadata: {
|
|
anthropic: {},
|
|
},
|
|
})
|
|
|
|
expect(result.tokens.input).toBe(1000)
|
|
expect(result.tokens.cache.read).toBe(200)
|
|
})
|
|
|
|
test("handles reasoning tokens", () => {
|
|
const model = createModel({ context: 100_000, output: 32_000 })
|
|
const result = Session.getUsage({
|
|
model,
|
|
usage: {
|
|
inputTokens: 1000,
|
|
outputTokens: 500,
|
|
totalTokens: 1500,
|
|
reasoningTokens: 100,
|
|
},
|
|
})
|
|
|
|
expect(result.tokens.reasoning).toBe(100)
|
|
})
|
|
|
|
test("handles undefined optional values gracefully", () => {
|
|
const model = createModel({ context: 100_000, output: 32_000 })
|
|
const result = Session.getUsage({
|
|
model,
|
|
usage: {
|
|
inputTokens: 0,
|
|
outputTokens: 0,
|
|
totalTokens: 0,
|
|
},
|
|
})
|
|
|
|
expect(result.tokens.input).toBe(0)
|
|
expect(result.tokens.output).toBe(0)
|
|
expect(result.tokens.reasoning).toBe(0)
|
|
expect(result.tokens.cache.read).toBe(0)
|
|
expect(result.tokens.cache.write).toBe(0)
|
|
expect(Number.isNaN(result.cost)).toBe(false)
|
|
})
|
|
|
|
test("calculates cost correctly", () => {
|
|
const model = createModel({
|
|
context: 100_000,
|
|
output: 32_000,
|
|
cost: {
|
|
input: 3,
|
|
output: 15,
|
|
cache: { read: 0.3, write: 3.75 },
|
|
},
|
|
})
|
|
const result = Session.getUsage({
|
|
model,
|
|
usage: {
|
|
inputTokens: 1_000_000,
|
|
outputTokens: 100_000,
|
|
totalTokens: 1_100_000,
|
|
},
|
|
})
|
|
|
|
expect(result.cost).toBe(3 + 1.5)
|
|
})
|
|
|
|
test.each(["@ai-sdk/anthropic", "@ai-sdk/amazon-bedrock", "@ai-sdk/google-vertex/anthropic"])(
|
|
"computes total from components for %s models",
|
|
(npm) => {
|
|
const model = createModel({ context: 100_000, output: 32_000, npm })
|
|
const usage = {
|
|
inputTokens: 1000,
|
|
outputTokens: 500,
|
|
// These providers typically report total as input + output only,
|
|
// excluding cache read/write.
|
|
totalTokens: 1500,
|
|
cachedInputTokens: 200,
|
|
}
|
|
if (npm === "@ai-sdk/amazon-bedrock") {
|
|
const result = Session.getUsage({
|
|
model,
|
|
usage,
|
|
metadata: {
|
|
bedrock: {
|
|
usage: {
|
|
cacheWriteInputTokens: 300,
|
|
},
|
|
},
|
|
},
|
|
})
|
|
|
|
expect(result.tokens.input).toBe(1000)
|
|
expect(result.tokens.cache.read).toBe(200)
|
|
expect(result.tokens.cache.write).toBe(300)
|
|
expect(result.tokens.total).toBe(2000)
|
|
return
|
|
}
|
|
|
|
const result = Session.getUsage({
|
|
model,
|
|
usage,
|
|
metadata: {
|
|
anthropic: {
|
|
cacheCreationInputTokens: 300,
|
|
},
|
|
},
|
|
})
|
|
|
|
expect(result.tokens.input).toBe(1000)
|
|
expect(result.tokens.cache.read).toBe(200)
|
|
expect(result.tokens.cache.write).toBe(300)
|
|
expect(result.tokens.total).toBe(2000)
|
|
},
|
|
)
|
|
})
|