fix: make read tool more mem efficient (#14009)

This commit is contained in:
Aiden Cline
2026-02-17 15:36:45 -06:00
committed by GitHub
parent 2a2437bf22
commit c1b03b728a
2 changed files with 142 additions and 34 deletions

View File

@@ -1,6 +1,7 @@
import z from "zod"
import * as fs from "fs"
import * as path from "path"
import { createInterface } from "readline"
import { Tool } from "./tool"
import { LSP } from "../lsp"
import { FileTime } from "../file/time"
@@ -11,7 +12,9 @@ import { InstructionPrompt } from "../session/instruction"
const DEFAULT_READ_LIMIT = 2000
const MAX_LINE_LENGTH = 2000
const MAX_LINE_SUFFIX = `... (line truncated to ${MAX_LINE_LENGTH} chars)`
const MAX_BYTES = 50 * 1024
const MAX_BYTES_LABEL = `${MAX_BYTES / 1024} KB`
export const ReadTool = Tool.define("read", {
description: DESCRIPTION,
@@ -134,27 +137,53 @@ export const ReadTool = Tool.define("read", {
}
}
const isBinary = await isBinaryFile(filepath, file)
const isBinary = await isBinaryFile(filepath, stat.size)
if (isBinary) throw new Error(`Cannot read binary file: ${filepath}`)
const stream = fs.createReadStream(filepath, { encoding: "utf8" })
const rl = createInterface({
input: stream,
// Note: we use the crlfDelay option to recognize all instances of CR LF
// ('\r\n') in file as a single line break.
crlfDelay: Infinity,
})
const limit = params.limit ?? DEFAULT_READ_LIMIT
const offset = params.offset ?? 1
const start = offset - 1
const lines = await file.text().then((text) => text.split("\n"))
if (start >= lines.length) throw new Error(`Offset ${offset} is out of range for this file (${lines.length} lines)`)
const raw: string[] = []
let bytes = 0
let lines = 0
let truncatedByBytes = false
for (let i = start; i < Math.min(lines.length, start + limit); i++) {
const line = lines[i].length > MAX_LINE_LENGTH ? lines[i].substring(0, MAX_LINE_LENGTH) + "..." : lines[i]
const size = Buffer.byteLength(line, "utf-8") + (raw.length > 0 ? 1 : 0)
if (bytes + size > MAX_BYTES) {
truncatedByBytes = true
break
let hasMoreLines = false
try {
for await (const text of rl) {
lines += 1
if (lines <= start) continue
if (raw.length >= limit) {
hasMoreLines = true
continue
}
const line = text.length > MAX_LINE_LENGTH ? text.substring(0, MAX_LINE_LENGTH) + MAX_LINE_SUFFIX : text
const size = Buffer.byteLength(line, "utf-8") + (raw.length > 0 ? 1 : 0)
if (bytes + size > MAX_BYTES) {
truncatedByBytes = true
hasMoreLines = true
break
}
raw.push(line)
bytes += size
}
raw.push(line)
bytes += size
} finally {
rl.close()
stream.destroy()
}
if (lines < offset && !(lines === 0 && offset === 1)) {
throw new Error(`Offset ${offset} is out of range for this file (${lines} lines)`)
}
const content = raw.map((line, index) => {
@@ -165,15 +194,15 @@ export const ReadTool = Tool.define("read", {
let output = [`<path>${filepath}</path>`, `<type>file</type>`, "<content>"].join("\n")
output += content.join("\n")
const totalLines = lines.length
const totalLines = lines
const lastReadLine = offset + raw.length - 1
const hasMoreLines = totalLines > lastReadLine
const nextOffset = lastReadLine + 1
const truncated = hasMoreLines || truncatedByBytes
if (truncatedByBytes) {
output += `\n\n(Output truncated at ${MAX_BYTES} bytes. Use 'offset' parameter to read beyond line ${lastReadLine})`
output += `\n\n(Output capped at ${MAX_BYTES_LABEL}. Showing lines ${offset}-${lastReadLine}. Use offset=${nextOffset} to continue.)`
} else if (hasMoreLines) {
output += `\n\n(File has more lines. Use 'offset' parameter to read beyond line ${lastReadLine})`
output += `\n\n(Showing lines ${offset}-${lastReadLine} of ${totalLines}. Use offset=${nextOffset} to continue.)`
} else {
output += `\n\n(End of file - total ${totalLines} lines)`
}
@@ -199,7 +228,7 @@ export const ReadTool = Tool.define("read", {
},
})
async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise<boolean> {
async function isBinaryFile(filepath: string, fileSize: number): Promise<boolean> {
const ext = path.extname(filepath).toLowerCase()
// binary check for common non-text extensions
switch (ext) {
@@ -236,22 +265,25 @@ async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise<boolea
break
}
const stat = await file.stat()
const fileSize = stat.size
if (fileSize === 0) return false
const bufferSize = Math.min(4096, fileSize)
const buffer = await file.arrayBuffer()
if (buffer.byteLength === 0) return false
const bytes = new Uint8Array(buffer.slice(0, bufferSize))
const fh = await fs.promises.open(filepath, "r")
try {
const sampleSize = Math.min(4096, fileSize)
const bytes = Buffer.alloc(sampleSize)
const result = await fh.read(bytes, 0, sampleSize, 0)
if (result.bytesRead === 0) return false
let nonPrintableCount = 0
for (let i = 0; i < bytes.length; i++) {
if (bytes[i] === 0) return true
if (bytes[i] < 9 || (bytes[i] > 13 && bytes[i] < 32)) {
nonPrintableCount++
let nonPrintableCount = 0
for (let i = 0; i < result.bytesRead; i++) {
if (bytes[i] === 0) return true
if (bytes[i] < 9 || (bytes[i] > 13 && bytes[i] < 32)) {
nonPrintableCount++
}
}
// If >30% non-printable characters, consider it binary
return nonPrintableCount / result.bytesRead > 0.3
} finally {
await fh.close()
}
// If >30% non-printable characters, consider it binary
return nonPrintableCount / bytes.length > 0.3
}