From 16742d2cc58e615f1a0f5094aa051da903f90470 Mon Sep 17 00:00:00 2001 From: hanrui Date: Mon, 22 Jun 2026 16:34:28 +0800 Subject: [PATCH] perf(grep): stream files_with_matches and stop at head_limit Delegate most-recently-modified-first ordering to ripgrep via --sortr modified, then read records incrementally and kill the subprocess as soon as head_limit non-sensitive matches are collected. This removes the per-file stat pass from JS and avoids a full tree scan for broad patterns. content and count_matches keep the buffered path. The files_with_matches truncation notice no longer reports an exact total, since the search stops early. --- .changeset/speed-up-grep.md | 5 + .../agent-core/src/tools/builtin/file/grep.ts | 398 ++++++++++++++---- packages/agent-core/test/tools/grep.test.ts | 187 ++++---- 3 files changed, 395 insertions(+), 195 deletions(-) create mode 100644 .changeset/speed-up-grep.md diff --git a/.changeset/speed-up-grep.md b/.changeset/speed-up-grep.md new file mode 100644 index 000000000..7c9c02376 --- /dev/null +++ b/.changeset/speed-up-grep.md @@ -0,0 +1,5 @@ +--- +"@moonshot-ai/kimi-code": patch +--- + +Speed up file-content search for broad patterns in large repositories. diff --git a/packages/agent-core/src/tools/builtin/file/grep.ts b/packages/agent-core/src/tools/builtin/file/grep.ts index 00dbcf25f..0450ef74b 100644 --- a/packages/agent-core/src/tools/builtin/file/grep.ts +++ b/packages/agent-core/src/tools/builtin/file/grep.ts @@ -13,8 +13,16 @@ * - stdout/stderr are capped while streams continue draining; * - hidden files are searched, but VCS metadata and common sensitive glob * patterns are prefiltered where possible; - * - parsed path records are filtered again after rg returns, using the active - * backend path class. + * - parsed path records are filtered against the sensitive-file rules using + * the active backend path class. + * + * `files_with_matches` (the default) takes a streaming fast path: ripgrep is + * run with `--sortr modified` so it emits matches most-recently-modified + * first, and we stop reading (and kill the subprocess) as soon as we have + * `head_limit` non-sensitive matches. This avoids stat-ing every match from + * JS (a cross-channel round-trip per file on remote Kaos backends) and avoids + * a full tree scan for broad patterns, at the cost of not reporting an exact + * total match count. `content` and `count_matches` keep the buffered path. */ import type { Readable } from 'node:stream'; @@ -137,7 +145,6 @@ const MAX_OUTPUT_BYTES = 10 * 1024 * 1024; // matching lines in full so the cap is intentionally skipped there. const RG_MAX_COLUMNS = 500; const DEFAULT_HEAD_LIMIT = 250; -const MTIME_STAT_CONCURRENCY = 32; const VCS_DIRECTORIES_TO_EXCLUDE = ['.git', '.svn', '.hg', '.bzr', '.jj', '.sl'] as const; // This is a conservative prefilter. The authoritative sensitive-file check // still happens on parsed rg records after execution. @@ -215,6 +222,11 @@ export class GrepTool implements BuiltinTool { return { isError: true, output: rgUnavailableMessage(error) }; } + const mode = args.output_mode ?? 'files_with_matches'; + if (mode === 'files_with_matches') { + return this.executeFilesWithMatches(args, signal, searchPaths, rgPath, pathClass); + } + let runResult = await runRipgrepOnce(this.kaos, buildRgArgs(rgPath, args, searchPaths), signal); if (runResult.kind === 'tool-error') return runResult.result; if (shouldRetryRipgrepEagain(runResult)) { @@ -238,7 +250,6 @@ export class GrepTool implements BuiltinTool { }; } - const mode = args.output_mode ?? 'files_with_matches'; if (bufferTruncated || timedOut) { stdoutText = omitIncompleteTrailingRecord(stdoutText, mode); } @@ -255,19 +266,7 @@ export class GrepTool implements BuiltinTool { const rawLines = parseRipgrepOutput(stdoutText, mode); const filteredSensitive = new Set(); - const keptLines = filterSensitiveLines(rawLines, mode, filteredSensitive, pathClass); - let orderedLines: ParsedGrepLine[]; - try { - orderedLines = - mode === 'files_with_matches' && !timedOut - ? await sortFilesWithMatchesByMtime(keptLines, this.kaos, signal) - : keptLines; - } catch (error) { - if (error instanceof GrepAbortedError) { - return { isError: true, output: 'Grep aborted' }; - } - throw error; - } + const orderedLines = filterSensitiveLines(rawLines, mode, filteredSensitive, pathClass); const offset = args.offset ?? 0; const headLimit = args.head_limit ?? DEFAULT_HEAD_LIMIT; @@ -346,6 +345,89 @@ export class GrepTool implements BuiltinTool { return builder.ok(sideChannelMessages.join('\n')); } + private async executeFilesWithMatches( + args: GrepInput, + signal: AbortSignal, + searchPaths: string[], + rgPath: string, + pathClass: PathClass, + ): Promise { + const offset = args.offset ?? 0; + const headLimit = args.head_limit ?? DEFAULT_HEAD_LIMIT; + const streamResult = await runRipgrepStreaming( + this.kaos, + buildRgArgs(rgPath, args, searchPaths), + signal, + { + offset, + limit: headLimit > 0 ? headLimit : undefined, + pathClass, + }, + ); + if (streamResult.kind === 'tool-error') return streamResult.result; + + const { records, filteredSensitive, hasMore, timedOut } = streamResult; + + if (timedOut && records.length === 0) { + return { + isError: true, + output: `Grep timed out after ${String(DEFAULT_TIMEOUT_MS / 1000)}s. Try a more specific path or pattern.`, + }; + } + if (signal.aborted) { + return { isError: true, output: 'Grep aborted' }; + } + + // Records are already sensitive-filtered, offset-skipped, capped at the + // limit, and emitted by ripgrep in most-recently-modified-first order. + const messages: string[] = []; + if (filteredSensitive.size > 0) { + const displayedFilteredPaths = [...filteredSensitive].map((path) => + relativizeIfUnder(path, this.workspace.workspaceDir, pathClass), + ); + messages.push( + `Filtered ${String(filteredSensitive.size)} sensitive file(s): ${displayedFilteredPaths.join(', ')}`, + ); + } + if (hasMore) { + // We stopped reading after the limit, so more matches may exist. The + // exact total is intentionally not computed (that would require a full + // scan, defeating the early-stop); point the user at pagination instead. + const nextOffset = offset + records.length; + messages.push( + `Results truncated to ${String(records.length)} lines (more available). Use offset=${String(nextOffset)} to see more.`, + ); + } + if (timedOut) { + messages.push( + `Grep timed out after ${String(DEFAULT_TIMEOUT_MS / 1000)}s; partial results returned`, + ); + } + + const displayedLines = records.map((line) => + formatDisplayLine(line, 'files_with_matches', this.workspace.workspaceDir, pathClass, false), + ); + const contentBody = displayedLines.join('\n'); + const visibleBody = + records.length === 0 && filteredSensitive.size > 0 + ? 'No non-sensitive matches found' + : contentBody; + const emptyResultMessage = + SENSITIVE_GLOBS_TO_EXCLUDE.length > 0 ? 'No non-sensitive matches found' : 'No matches found'; + const combined = + visibleBody === '' && messages.length === 0 + ? emptyResultMessage + : messages.length > 0 + ? visibleBody === '' + ? messages.join('\n') + : `${visibleBody}\n${messages.join('\n')}` + : visibleBody; + + const builder = new ToolResultBuilder(); + builder.write(combined); + return builder.ok(''); + } + } interface RipgrepRunResult { @@ -378,13 +460,6 @@ type ParsedGrepLine = readonly text: string; }; -class GrepAbortedError extends Error { - constructor() { - super('Grep aborted'); - this.name = 'GrepAbortedError'; - } -} - async function runRipgrepOnce( kaos: Kaos, rgArgs: readonly string[], @@ -515,6 +590,214 @@ async function runRipgrepOnce( }; } +interface RipgrepStreamResult { + readonly kind: 'result'; + readonly records: ParsedGrepLine[]; + readonly filteredSensitive: Set; + readonly hasMore: boolean; + readonly timedOut: boolean; + readonly exitCode: number; + readonly stderrText: string; +} + +type RipgrepStreamOutcome = + | RipgrepStreamResult + | { readonly kind: 'tool-error'; readonly result: ExecutableToolResult }; + +/** + * Run ripgrep for `files_with_matches`, streaming records and stopping as soon + * as we have enough (`offset` skipped + `limit` kept). + * + * ripgrep is invoked with `--sortr modified`, so it emits matching files in + * most-recently-modified-first order. We kill it the moment we have `limit` + * non-sensitive matches, so broad patterns in large trees don't force a full + * scan plus a per-file stat from JS. When `limit` is `undefined` (unlimited), + * the stream is read to completion. + * + * Records are split on either NUL (the `--null` runtime format) or newline + * (used by tests and the legacy fallback), whichever appears first. + */ +async function runRipgrepStreaming( + kaos: Kaos, + rgArgs: readonly string[], + signal: AbortSignal, + options: { + readonly offset: number; + readonly limit: number | undefined; + readonly pathClass: PathClass; + }, +): Promise { + if (signal.aborted) { + return { kind: 'tool-error', result: { isError: true, output: 'Grep aborted' } }; + } + + let proc: KaosProcess; + try { + proc = await kaos.exec(...rgArgs); + } catch (error) { + const isEnoent = + error instanceof Error && 'code' in error && (error as NodeJS.ErrnoException).code === 'ENOENT'; + return { + kind: 'tool-error', + result: { + isError: true, + output: isEnoent + ? rgUnavailableMessage(error) + : error instanceof Error + ? error.message + : String(error), + }, + }; + } + + try { + proc.stdin.end(); + } catch { + /* already gone */ + } + + let timedOut = false; + let aborted = false; + let killed = false; + + const killProc = async (): Promise => { + if (killed) return; + killed = true; + try { + await proc.kill('SIGTERM'); + } catch { + /* process already gone */ + } + const exited = proc + .wait() + .then(() => true) + .catch(() => true); + const raced = await Promise.race([ + exited, + new Promise((resolve) => { + setTimeout(() => { + resolve(false); + }, SIGTERM_GRACE_MS); + }), + ]); + if (!raced && proc.exitCode === null) { + try { + await proc.kill('SIGKILL'); + } catch { + /* ignore */ + } + } + }; + + const onAbort = (): void => { + aborted = true; + void killProc(); + }; + signal.addEventListener('abort', onAbort); + if (signal.aborted) onAbort(); + + const timeoutHandle = setTimeout(() => { + timedOut = true; + void killProc(); + }, DEFAULT_TIMEOUT_MS); + + const records: ParsedGrepLine[] = []; + const filteredSensitive = new Set(); + let nonSensitiveSeen = 0; + let limitReached = false; + let stderrText = ''; + let stderrTruncated = false; + + try { + const stdoutDone = (async (): Promise => { + let buffer = ''; + try { + for await (const chunk of proc.stdout) { + if (limitReached) continue; // drain and discard after early-stop + const buf: Buffer = + typeof chunk === 'string' ? Buffer.from(chunk, 'utf8') : (chunk as Buffer); + buffer += buf.toString('utf8'); + while (true) { + const nulIndex = buffer.indexOf('\0'); + const newlineIndex = buffer.indexOf('\n'); + if (nulIndex < 0 && newlineIndex < 0) break; + const sepIndex = + nulIndex < 0 + ? newlineIndex + : newlineIndex < 0 + ? nulIndex + : Math.min(nulIndex, newlineIndex); + const filePath = stripTrailingCarriageReturn(buffer.slice(0, sepIndex)); + buffer = buffer.slice(sepIndex + 1); + if (filePath === '') continue; + if (isSensitiveFile(filePath, options.pathClass)) { + filteredSensitive.add(filePath); + continue; + } + nonSensitiveSeen += 1; + if (nonSensitiveSeen <= options.offset) continue; + records.push({ kind: 'record', filePath, payload: '' }); + if (options.limit !== undefined && records.length >= options.limit) { + limitReached = true; + void killProc(); + break; + } + } + } + } catch (error) { + // Tearing the stream down after an early-stop / timeout / abort is + // expected; anything before that is a real failure and should surface. + if (!limitReached && !timedOut && !aborted) throw error; + } + })(); + + const stderrDone = (async (): Promise => { + try { + const result = await readStreamWithCap(proc.stderr, MAX_OUTPUT_BYTES); + stderrText = result.text; + stderrTruncated = result.truncated; + } catch { + /* stderr is best-effort */ + } + })(); + + const [, , exitCode] = await Promise.all([stdoutDone, stderrDone, proc.wait()]); + + if (aborted) { + return { kind: 'tool-error', result: { isError: true, output: 'Grep aborted' } }; + } + + // rg exit codes: 0 = matches, 1 = no matches, 2 = error. When we stop + // early ripgrep is killed (signal exit), which is expected and not an + // error. A non-zero/non-one exit without an early-stop or timeout is a + // real ripgrep error. + if (exitCode !== 0 && exitCode !== 1 && !timedOut && !limitReached) { + return { + kind: 'tool-error', + result: { isError: true, output: formatRipgrepError(exitCode, stderrText, stderrTruncated) }, + }; + } + + return { + kind: 'result', + records, + filteredSensitive, + hasMore: limitReached, + timedOut, + exitCode, + stderrText, + }; + } catch (error) { + return { + kind: 'tool-error', + result: { isError: true, output: error instanceof Error ? error.message : String(error) }, + }; + } finally { + clearTimeout(timeoutHandle); + signal.removeEventListener('abort', onAbort); + } +} + function shouldRetryRipgrepEagain(result: RipgrepRunResult): boolean { return ( result.exitCode !== 0 && @@ -528,61 +811,6 @@ function isEagainRipgrepError(stderr: string): boolean { return stderr.includes('os error 11') || stderr.includes('Resource temporarily unavailable'); } -async function sortFilesWithMatchesByMtime( - lines: readonly ParsedGrepLine[], - kaos: Kaos, - signal: AbortSignal, -): Promise { - const entries = await mapWithConcurrency( - lines, - MTIME_STAT_CONCURRENCY, - signal, - async (line, index) => { - const path = - line.kind === 'record' ? line.filePath : line.kind === 'legacy' ? line.text : undefined; - let mtime = 0; - if (path !== undefined) { - try { - mtime = (await kaos.stat(path)).stMtime ?? 0; - } catch { - // Keep stat failures visible; use mtime=0 so they sort after known files. - } - } - return { line, mtime, index }; - }, - ); - entries.sort((a, b) => b.mtime - a.mtime || a.index - b.index); - return entries.map((entry) => entry.line); -} - -async function mapWithConcurrency( - items: readonly T[], - concurrency: number, - signal: AbortSignal, - mapper: (item: T, index: number) => Promise, -): Promise { - if (signal.aborted) throw new GrepAbortedError(); - if (items.length === 0) return []; - - const results: U[] = []; - results.length = items.length; - let nextIndex = 0; - const workerCount = Math.min(Math.max(1, concurrency), items.length); - await Promise.all( - Array.from({ length: workerCount }, async () => { - while (true) { - if (signal.aborted) return; - const index = nextIndex; - nextIndex += 1; - if (index >= items.length) return; - results[index] = await mapper(items[index] as T, index); - } - }), - ); - if (signal.aborted) throw new GrepAbortedError(); - return results; -} - function buildRgArgs( rgPath: string, args: GrepInput, @@ -605,8 +833,16 @@ function buildRgArgs( cmd.push('--glob', `!${dir}`); } - if (mode === 'files_with_matches') cmd.push('-l'); - else if (mode === 'count_matches') { + if (mode === 'files_with_matches') { + // Delegate "most-recently-modified first" ordering to ripgrep itself. + // Combined with the streaming early-stop in `runRipgrepStreaming`, this + // avoids stat-ing every match from JS (which is one cross-channel + // round-trip per file on remote Kaos backends) while preserving the + // recency ordering UX. ripgrep emits matching files in mtime order and + // we stop reading once we have enough, so broad patterns in large trees + // don't force a full scan + N stat calls. + cmd.push('-l', '--sortr', 'modified'); + } else if (mode === 'count_matches') { // rg omits the filename when only one file is searched, so pin it on. Without // this, the per-file line collapses to a bare count and the summary parser // disagrees with the displayed number. diff --git a/packages/agent-core/test/tools/grep.test.ts b/packages/agent-core/test/tools/grep.test.ts index b31931d59..b20c189f7 100644 --- a/packages/agent-core/test/tools/grep.test.ts +++ b/packages/agent-core/test/tools/grep.test.ts @@ -1,6 +1,6 @@ import { Readable, type Writable } from 'node:stream'; -import type { KaosProcess, StatResult } from '@moonshot-ai/kaos'; +import type { KaosProcess } from '@moonshot-ai/kaos'; import { afterEach, describe, expect, it, vi } from 'vitest'; import { type GrepInput, GrepInputSchema, GrepTool } from '../../src/tools/builtin/file/grep'; @@ -39,6 +39,8 @@ const COMMON_RG_ARGS = [ ] as const; const DEFAULT_RG_ARGS = ['--hidden', ...MAX_COLUMNS_RG_ARGS, ...COMMON_RG_ARGS] as const; const CONTENT_RG_ARGS = ['--hidden', ...COMMON_RG_ARGS] as const; +// `files_with_matches` delegates mtime ordering to ripgrep via `--sortr modified`. +const FILES_WITH_MATCHES_ARGS = ['-l', '--sortr', 'modified'] as const; const SENSITIVE_KEY_BASENAMES = ['id_rsa', 'id_ed25519', 'id_ecdsa'] as const; const SENSITIVE_KEY_RG_ARGS = SENSITIVE_KEY_BASENAMES.flatMap((basename) => [ '--glob', @@ -76,21 +78,6 @@ function processWithOutput(stdout: string, stderr = '', exitCode = 0): KaosProce }; } -function statResult(mtime: number): StatResult { - return { - stMode: 0o100000, - stIno: 1, - stDev: 1, - stNlink: 1, - stUid: 0, - stGid: 0, - stSize: 0, - stAtime: mtime, - stMtime: mtime, - stCtime: mtime, - }; -} - function processThatExitsOnKill(stdout: string, stderr = '', exitCode = 143): KaosProcess { let currentExitCode: number | null = null; let resolveWait: (code: number) => void; @@ -279,7 +266,7 @@ describe('GrepTool', () => { expect(exec).toHaveBeenCalledWith( '/mock/rg', ...DEFAULT_RG_ARGS, - '-l', + ...FILES_WITH_MATCHES_ARGS, ...SENSITIVE_RG_ARGS, '--', 'hit', @@ -297,7 +284,7 @@ describe('GrepTool', () => { expect(exec).toHaveBeenCalledWith( '/mock/rg', ...DEFAULT_RG_ARGS, - '-l', + ...FILES_WITH_MATCHES_ARGS, ...SENSITIVE_RG_ARGS, '--', 'hit', @@ -336,15 +323,15 @@ describe('GrepTool', () => { expect(result.output).toBe('No non-sensitive matches found'); }); - it('sorts files_with_matches by mtime before pagination after sensitive filtering', async () => { - const stdout = ['/workspace/src/old.ts', '/workspace/.env', '/workspace/src/new.ts', ''].join( - '\n', - ); - const stat = vi.fn(async (path: string) => { - if (path === '/workspace/src/new.ts') return statResult(10); - if (path === '/workspace/src/old.ts') return statResult(1); - throw new Error(`unexpected stat: ${path}`); - }); + it('preserves ripgrep mtime order and filters sensitive files before pagination', async () => { + // ripgrep (--sortr modified) emits most-recently-modified first; the + // sensitive file is encountered before the head_limit is reached. + const stdout = [ + nullRecord('/workspace/.env'), + nullRecord('/workspace/src/new.ts'), + nullRecord('/workspace/src/old.ts'), + ].join(''); + const stat = vi.fn(); const tool = new GrepTool( createFakeKaos({ exec: vi.fn().mockResolvedValue(processWithOutput(stdout)), stat }), { workspaceDir: '/workspace', additionalDirs: [] }, @@ -356,99 +343,68 @@ describe('GrepTool', () => { [ 'src/new.ts', 'Filtered 1 sensitive file(s): .env', - 'Results truncated to 1 lines (total: 2). Use offset=1 to see more.', + 'Results truncated to 1 lines (more available). Use offset=1 to see more.', ].join('\n'), ); - expect(stat).toHaveBeenCalledTimes(2); - expect(stat).toHaveBeenCalledWith('/workspace/src/old.ts'); - expect(stat).toHaveBeenCalledWith('/workspace/src/new.ts'); + // mtime ordering is delegated to ripgrep; no stat calls from JS. + expect(stat).not.toHaveBeenCalled(); }); - it('limits concurrent mtime stats while sorting files_with_matches', async () => { - const filePaths = Array.from( - { length: 40 }, - (_, index) => `/workspace/src/file-${String(index).padStart(2, '0')}.ts`, - ); - let activeStats = 0; - let maxActiveStats = 0; - const stat = vi.fn(async (path: string) => { - activeStats += 1; - maxActiveStats = Math.max(maxActiveStats, activeStats); - await new Promise((resolve) => { - setTimeout(resolve, 0); + describe('files_with_matches streaming early-stop', () => { + it('kills ripgrep and reports more available once head_limit is reached', async () => { + const paths = Array.from({ length: 300 }, (_, i) => `/workspace/src/${String(i)}.ts`); + const proc = processWithOutput(paths.map((p) => nullRecord(p)).join('')); + const killSpy = proc.kill as unknown as ReturnType; + const exec = vi.fn().mockResolvedValue(proc); + const tool = new GrepTool(createFakeKaos({ exec }), { + workspaceDir: '/workspace', + additionalDirs: [], }); - activeStats -= 1; - const mtime = Number(path.match(/file-(\d+)\.ts$/)?.[1] ?? 0); - return statResult(mtime); - }); - const tool = new GrepTool( - createFakeKaos({ - exec: vi.fn().mockResolvedValue(processWithOutput(`${filePaths.join('\n')}\n`)), - stat, - }), - { workspaceDir: '/workspace', additionalDirs: [] }, - ); - - const result = await executeTool(tool, context({ pattern: 'hit', head_limit: 0 })); - const lines = toolContentString(result).split('\n'); - expect(stat).toHaveBeenCalledTimes(filePaths.length); - expect(maxActiveStats).toBeLessThanOrEqual(32); - expect(lines.at(0)).toBe('src/file-39.ts'); - expect(lines.at(-1)).toBe('src/file-00.ts'); - }); + const result = await executeTool(tool, context({ pattern: 'hit', head_limit: 10 })); + const output = toolContentString(result); - it('stops scheduling mtime stats when aborted during files_with_matches sorting', async () => { - const filePaths = Array.from( - { length: 40 }, - (_, index) => `/workspace/src/file-${String(index).padStart(2, '0')}.ts`, - ); - const abortController = new AbortController(); - const stat = vi.fn(async () => { - abortController.abort(); - await new Promise((resolve) => { - setTimeout(resolve, 0); - }); - return statResult(1); + expect(killSpy).toHaveBeenCalled(); + expect(output.split('\n').filter((line) => line.startsWith('src/'))).toHaveLength(10); + expect(output).toContain('more available'); }); - const tool = new GrepTool( - createFakeKaos({ - exec: vi.fn().mockResolvedValue(processWithOutput(`${filePaths.join('\n')}\n`)), - stat, - }), - { workspaceDir: '/workspace', additionalDirs: [] }, - ); - const result = await executeTool(tool, - context({ pattern: 'hit', head_limit: 0 }, abortController.signal), - ); + it('does not kill ripgrep when matches stay under head_limit', async () => { + const paths = Array.from({ length: 5 }, (_, i) => `/workspace/src/${String(i)}.ts`); + const proc = processWithOutput(paths.map((p) => nullRecord(p)).join('')); + const killSpy = proc.kill as unknown as ReturnType; + const exec = vi.fn().mockResolvedValue(proc); + const tool = new GrepTool(createFakeKaos({ exec }), { + workspaceDir: '/workspace', + additionalDirs: [], + }); - expect(result).toMatchObject({ isError: true, output: 'Grep aborted' }); - expect(stat.mock.calls.length).toBeLessThan(filePaths.length); - }); + const result = await executeTool(tool, context({ pattern: 'hit', head_limit: 10 })); + const output = toolContentString(result); - it('keeps files_with_matches entries when mtime stat fails', async () => { - const stdout = [ - '/workspace/src/old.ts', - '/workspace/src/missing.ts', - '/workspace/src/new.ts', - '', - ].join('\n'); - const stat = vi.fn(async (path: string) => { - if (path === '/workspace/src/new.ts') return statResult(10); - if (path === '/workspace/src/old.ts') return statResult(1); - throw new Error('stat failed'); + expect(killSpy).not.toHaveBeenCalled(); + expect(output.split('\n').filter((line) => line.startsWith('src/'))).toHaveLength(5); + expect(output).not.toContain('more available'); }); - const tool = new GrepTool( - createFakeKaos({ exec: vi.fn().mockResolvedValue(processWithOutput(stdout)), stat }), - { workspaceDir: '/workspace', additionalDirs: [] }, - ); - const result = await executeTool(tool, context({ pattern: 'hit', head_limit: 0 })); + it('skips offset records before applying head_limit', async () => { + const paths = Array.from({ length: 50 }, (_, i) => `/workspace/src/${String(i)}.ts`); + const proc = processWithOutput(paths.map((p) => nullRecord(p)).join('')); + const exec = vi.fn().mockResolvedValue(proc); + const tool = new GrepTool(createFakeKaos({ exec }), { + workspaceDir: '/workspace', + additionalDirs: [], + }); - expect(toolContentString(result)).toBe( - ['src/new.ts', 'src/old.ts', 'src/missing.ts'].join('\n'), - ); + const result = await executeTool(tool, context({ pattern: 'hit', offset: 5, head_limit: 3 })); + const output = toolContentString(result); + + expect(output).toContain('src/5.ts'); + expect(output).toContain('src/6.ts'); + expect(output).toContain('src/7.ts'); + expect(output).not.toContain('src/4.ts'); + expect(output).not.toContain('src/8.ts'); + }); }); it('uses count-matches and ignores context flags outside content output mode', async () => { @@ -477,12 +433,14 @@ describe('GrepTool', () => { .mockResolvedValueOnce( processWithOutput('', 'rg: failed to spawn worker: Resource temporarily unavailable\n', 2), ) - .mockResolvedValueOnce(processWithOutput('/workspace/src/a.ts\n')); + .mockResolvedValueOnce(processWithOutput('/workspace/src/a.ts:1:hit\n')); const tool = new GrepTool(createFakeKaos({ exec }), workspace); - const result = await executeTool(tool, context({ pattern: 'hit' })); + // EAGAIN retry lives in the buffered path (content / count_matches); + // files_with_matches is already single-threaded via `--sortr modified`. + const result = await executeTool(tool, context({ pattern: 'hit', output_mode: 'content' })); - expect(toolContentString(result)).toBe('src/a.ts'); + expect(toolContentString(result)).toBe('src/a.ts:1:hit'); expect(exec).toHaveBeenCalledTimes(2); expect(exec.mock.calls[0]).not.toContain('-j'); expect(exec).toHaveBeenNthCalledWith( @@ -490,8 +448,9 @@ describe('GrepTool', () => { '/mock/rg', '-j', '1', - ...DEFAULT_RG_ARGS, - '-l', + ...CONTENT_RG_ARGS, + '--with-filename', + '-n', ...SENSITIVE_RG_ARGS, '--', 'hit', @@ -516,7 +475,7 @@ describe('GrepTool', () => { expect(exec).toHaveBeenCalledWith( '/mock/rg', ...DEFAULT_RG_ARGS, - '-l', + ...FILES_WITH_MATCHES_ARGS, '-i', '--type', 'ts', @@ -607,7 +566,7 @@ describe('GrepTool', () => { expect(exec).toHaveBeenCalledWith( '/mock/rg', ...DEFAULT_RG_ARGS, - '-l', + ...FILES_WITH_MATCHES_ARGS, '--glob', '**/.env', ...SENSITIVE_RG_ARGS, @@ -1019,7 +978,7 @@ describe('GrepTool', () => { expect(lines.slice(0, 250)).toEqual(displayPaths.slice(0, 250)); expect(output).not.toContain(displayPaths[250]); expect(output).toContain( - 'Results truncated to 250 lines (total: 251). Use offset=250 to see more.', + 'Results truncated to 250 lines (more available). Use offset=250 to see more.', ); });