fix: make read tool more mem efficient (#14009)

2026-04-04 08:03:14 +00:00 · 2026-02-17 15:36:45 -06:00
parent 2a2437bf22
commit c1b03b728a
2 changed files with 142 additions and 34 deletions
--- a/packages/opencode/src/tool/read.ts
+++ b/packages/opencode/src/tool/read.ts
@@ -1,6 +1,7 @@
 import z from "zod"
 import * as fs from "fs"
 import * as path from "path"
+import { createInterface } from "readline"
 import { Tool } from "./tool"
 import { LSP } from "../lsp"
 import { FileTime } from "../file/time"
@@ -11,7 +12,9 @@ import { InstructionPrompt } from "../session/instruction"

 const DEFAULT_READ_LIMIT = 2000
 const MAX_LINE_LENGTH = 2000
+const MAX_LINE_SUFFIX = `... (line truncated to ${MAX_LINE_LENGTH} chars)`
 const MAX_BYTES = 50 * 1024
+const MAX_BYTES_LABEL = `${MAX_BYTES / 1024} KB`

 export const ReadTool = Tool.define("read", {
  description: DESCRIPTION,
@@ -134,27 +137,53 @@ export const ReadTool = Tool.define("read", {
      }
    }

-    const isBinary = await isBinaryFile(filepath, file)
+    const isBinary = await isBinaryFile(filepath, stat.size)
    if (isBinary) throw new Error(`Cannot read binary file: ${filepath}`)

+    const stream = fs.createReadStream(filepath, { encoding: "utf8" })
+    const rl = createInterface({
+      input: stream,
+      // Note: we use the crlfDelay option to recognize all instances of CR LF
+      // ('\r\n') in file as a single line break.
+      crlfDelay: Infinity,
+    })
+
    const limit = params.limit ?? DEFAULT_READ_LIMIT
    const offset = params.offset ?? 1
    const start = offset - 1
-    const lines = await file.text().then((text) => text.split("\n"))
-    if (start >= lines.length) throw new Error(`Offset ${offset} is out of range for this file (${lines.length} lines)`)
-
    const raw: string[] = []
    let bytes = 0
+    let lines = 0
    let truncatedByBytes = false
-    for (let i = start; i < Math.min(lines.length, start + limit); i++) {
-      const line = lines[i].length > MAX_LINE_LENGTH ? lines[i].substring(0, MAX_LINE_LENGTH) + "..." : lines[i]
-      const size = Buffer.byteLength(line, "utf-8") + (raw.length > 0 ? 1 : 0)
-      if (bytes + size > MAX_BYTES) {
-        truncatedByBytes = true
-        break
+    let hasMoreLines = false
+    try {
+      for await (const text of rl) {
+        lines += 1
+        if (lines <= start) continue
+
+        if (raw.length >= limit) {
+          hasMoreLines = true
+          continue
+        }
+
+        const line = text.length > MAX_LINE_LENGTH ? text.substring(0, MAX_LINE_LENGTH) + MAX_LINE_SUFFIX : text
+        const size = Buffer.byteLength(line, "utf-8") + (raw.length > 0 ? 1 : 0)
+        if (bytes + size > MAX_BYTES) {
+          truncatedByBytes = true
+          hasMoreLines = true
+          break
+        }
+
+        raw.push(line)
+        bytes += size
      }
-      raw.push(line)
-      bytes += size
+    } finally {
+      rl.close()
+      stream.destroy()
+    }
+
+    if (lines < offset && !(lines === 0 && offset === 1)) {
+      throw new Error(`Offset ${offset} is out of range for this file (${lines} lines)`)
    }

    const content = raw.map((line, index) => {
@@ -165,15 +194,15 @@ export const ReadTool = Tool.define("read", {
    let output = [`<path>${filepath}</path>`, `<type>file</type>`, "<content>"].join("\n")
    output += content.join("\n")

-    const totalLines = lines.length
+    const totalLines = lines
    const lastReadLine = offset + raw.length - 1
-    const hasMoreLines = totalLines > lastReadLine
+    const nextOffset = lastReadLine + 1
    const truncated = hasMoreLines || truncatedByBytes

    if (truncatedByBytes) {
-      output += `\n\n(Output truncated at ${MAX_BYTES} bytes. Use 'offset' parameter to read beyond line ${lastReadLine})`
+      output += `\n\n(Output capped at ${MAX_BYTES_LABEL}. Showing lines ${offset}-${lastReadLine}. Use offset=${nextOffset} to continue.)`
    } else if (hasMoreLines) {
-      output += `\n\n(File has more lines. Use 'offset' parameter to read beyond line ${lastReadLine})`
+      output += `\n\n(Showing lines ${offset}-${lastReadLine} of ${totalLines}. Use offset=${nextOffset} to continue.)`
    } else {
      output += `\n\n(End of file - total ${totalLines} lines)`
    }
@@ -199,7 +228,7 @@ export const ReadTool = Tool.define("read", {
  },
 })

-async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise<boolean> {
+async function isBinaryFile(filepath: string, fileSize: number): Promise<boolean> {
  const ext = path.extname(filepath).toLowerCase()
  // binary check for common non-text extensions
  switch (ext) {
@@ -236,22 +265,25 @@ async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise<boolea
      break
  }

-  const stat = await file.stat()
-  const fileSize = stat.size
  if (fileSize === 0) return false

-  const bufferSize = Math.min(4096, fileSize)
-  const buffer = await file.arrayBuffer()
-  if (buffer.byteLength === 0) return false
-  const bytes = new Uint8Array(buffer.slice(0, bufferSize))
+  const fh = await fs.promises.open(filepath, "r")
+  try {
+    const sampleSize = Math.min(4096, fileSize)
+    const bytes = Buffer.alloc(sampleSize)
+    const result = await fh.read(bytes, 0, sampleSize, 0)
+    if (result.bytesRead === 0) return false

-  let nonPrintableCount = 0
-  for (let i = 0; i < bytes.length; i++) {
-    if (bytes[i] === 0) return true
-    if (bytes[i] < 9 || (bytes[i] > 13 && bytes[i] < 32)) {
-      nonPrintableCount++
+    let nonPrintableCount = 0
+    for (let i = 0; i < result.bytesRead; i++) {
+      if (bytes[i] === 0) return true
+      if (bytes[i] < 9 || (bytes[i] > 13 && bytes[i] < 32)) {
+        nonPrintableCount++
+      }
    }
+    // If >30% non-printable characters, consider it binary
+    return nonPrintableCount / result.bytesRead > 0.3
+  } finally {
+    await fh.close()
  }
-  // If >30% non-printable characters, consider it binary
-  return nonPrintableCount / bytes.length > 0.3
 }