diff --git a/packages/analysis/src/group/cross-repo-links.ts b/packages/analysis/src/group/cross-repo-links.ts index d432e110..f0638171 100644 Binary files a/packages/analysis/src/group/cross-repo-links.ts and b/packages/analysis/src/group/cross-repo-links.ts differ diff --git a/packages/analysis/src/test-utils.ts b/packages/analysis/src/test-utils.ts index 209b51a5..da1ee086 100644 --- a/packages/analysis/src/test-utils.ts +++ b/packages/analysis/src/test-utils.ts @@ -11,7 +11,7 @@ * `traverseDescendants`, `traverse`, plus the ITemporalStore-compat noops. * * Per-test fixtures populate the store via `addNode` / `addEdge`; the test - * then exercises the production code through the same finders the DuckDb + * then exercises the production code through the same finders the SQLite store * and GraphDb adapters expose. No raw SQL crosses the test boundary. */ diff --git a/packages/analysis/src/verdict.ts b/packages/analysis/src/verdict.ts index 24a77121..9e20ec2e 100644 --- a/packages/analysis/src/verdict.ts +++ b/packages/analysis/src/verdict.ts @@ -15,7 +15,7 @@ * Contributors) never crashes the verdict; it simply drops the missing * signal. * - **Zero `any`**: the only loose type surface is `Record` - * for raw DuckDB rows, each of which we narrow with explicit casts. + * for raw SQLite rows, each of which we narrow with explicit casts. */ import { execFile } from "node:child_process"; diff --git a/packages/cli/src/commands/analyze.ts b/packages/cli/src/commands/analyze.ts index e8a9c46a..3e0cd1c9 100644 --- a/packages/cli/src/commands/analyze.ts +++ b/packages/cli/src/commands/analyze.ts @@ -56,7 +56,7 @@ export interface AnalyzeOptions { readonly force?: boolean; /** * When true, the embeddings phase embeds every callable/declaration symbol - * and the result is upserted into the DuckDB `embeddings` table. Requires + * and the result is upserted into the `embeddings` table. Requires * `codehub setup --embeddings` to have installed weights; if weights are * missing the phase logs a warning and skips — analyze never aborts. */ @@ -222,7 +222,7 @@ export async function runAnalyze(path: string, opts: AnalyzeOptions = {}): Promi // Load a prior graph projection for the incremental-scope phase when the // CLI was not invoked with --force. The projection is a thin wrapper - // around the prior DuckDB index (File nodes + IMPORTS / EXTENDS / + // around the prior SQLite index (File nodes + IMPORTS / EXTENDS / // IMPLEMENTS edges). `loadPreviousGraph` silently returns undefined if // the store does not exist or cannot be opened; incremental-scope then // reports mode="full" with reason="no-prior-graph". @@ -333,10 +333,9 @@ export async function runAnalyze(path: string, opts: AnalyzeOptions = {}): Promi ); } - // Persist to the composed graph + temporal store. Storage is always - // graph.lbug (graph-tier) + temporal.duckdb sidecar (cochanges, summary - // cache); the temporal-tier writes (`bulkLoadCochanges`, - // `bulkLoadSymbolSummaries`) route through `store.temporal`. + // Persist to the composed graph + temporal store. Post-ADR 0019 both views + // are one `store.sqlite`; the temporal-tier writes (`bulkLoadCochanges`, + // `bulkLoadSymbolSummaries`) still route through `store.temporal`. await mkdir(resolveRepoMetaDir(repoPath), { recursive: true }); const dbPath = resolveGraphPath(repoPath); const store: Store = await openStore({ path: dbPath }); @@ -423,7 +422,7 @@ export async function runAnalyze(path: string, opts: AnalyzeOptions = {}): Promi // Persist the scan-state sidecar so the next analyze invocation can feed // the incremental-scope phase via loadPreviousGraph(). We write this - // alongside the DuckDB file under `/.codehub` so a clean of the + // alongside the store.sqlite file under `/.codehub` so a clean of the // meta dir invalidates both the index and the incremental state together. if (result.scan !== undefined) { await writeScanState( @@ -434,7 +433,7 @@ export async function runAnalyze(path: string, opts: AnalyzeOptions = {}): Promi // Opt-in skill generation. Walk Community nodes just persisted above and // emit one SKILL.md per cluster under `/.codehub/skills/`. Runs - // against the still-open DuckDB handle so there's no re-open cost, and + // against the still-open SQLite handle so there's no re-open cost, and // any per-skill failure (read-only dir, permission denied, disk full) // logs-and-continues — analyze never aborts because of a skill write. if (opts.skills === true) { @@ -579,7 +578,7 @@ export async function runAnalyze(path: string, opts: AnalyzeOptions = {}): Promi /** * Build the {@link pipeline.PreviousGraph} projection expected by the - * incremental-scope phase from the prior DuckDB index + scan-state sidecar. + * incremental-scope phase from the prior SQLite index + scan-state sidecar. * * The projection carries: * - file paths + scan-time content hashes, read from @@ -827,7 +826,7 @@ export async function resolveCoverageEnabled( * compute that before the pipeline runs (LSP phases haven't yielded * yet), so we use the prior run's stored counts when available: * - * - If a DuckDB store is readable at the expected path, count nodes + * - If a SQLite store is readable at the expected path, count nodes * whose kind is Function/Method/Class. That count is the best proxy * for "SCIP-confirmed callables" we can get before the parse phase. * - If no prior store exists (fresh clone, first analyze), fall back @@ -863,7 +862,7 @@ export async function resolveMaxSummariesCap( /** * Count callable symbols (Function / Method / Class) recorded by the - * prior run. Returns `undefined` when no prior DuckDB index exists or + * prior run. Returns `undefined` when no prior SQLite index exists or * the count query fails — callers treat that as "no prior run" and fall * back to the first-run heuristic. */ @@ -893,7 +892,7 @@ async function countPriorCallableSymbols(repoPath: string): Promise = new Set(NODE_KINDS); const RELATION_TYPE_SET: ReadonlySet = new Set(RELATION_TYPES); @@ -1015,7 +1014,7 @@ function boolField(r: Record, col: string): boolean | undefined } function stringArrayField(r: Record, col: string): readonly string[] | undefined { - // Preserve `[]` distinct from absent. The DuckDB TEXT[] binder returns + // Preserve `[]` distinct from absent. The SQLite TEXT[] binder returns // a 0-length JS array for an empty SQL array literal and `null` for // SQL NULL; mirror the storage adapter's `setStringArrayField` and // return the array verbatim so a Community / Route node written as diff --git a/packages/cli/src/commands/code-pack.ts b/packages/cli/src/commands/code-pack.ts index d3f04cc6..264cc575 100644 --- a/packages/cli/src/commands/code-pack.ts +++ b/packages/cli/src/commands/code-pack.ts @@ -61,7 +61,7 @@ export interface CodePackArgs { readonly engine?: "pack" | "repomix"; /** * Test seam — inject a custom `generatePack` so unit tests don't need - * to load native DuckDB bindings. Production callers leave this + * to load native storage bindings. Production callers leave this * unset. */ readonly _generatePack?: typeof generatePack; diff --git a/packages/cli/src/commands/ingest-sarif.ts b/packages/cli/src/commands/ingest-sarif.ts index 4cbabef8..1a86c5b9 100644 --- a/packages/cli/src/commands/ingest-sarif.ts +++ b/packages/cli/src/commands/ingest-sarif.ts @@ -5,7 +5,7 @@ * Flow: * 1. Read + parse + validate the SARIF file via `@opencodehub/sarif`. * 2. Resolve the target repo (either `--repo ` or CWD). - * 3. Open the DuckDB store and pull a per-file, line-sorted symbol + * 3. Open the SQLite store and pull a per-file, line-sorted symbol * index over the SARIF's referenced URIs (used to resolve Finding * → Symbol edges). * 4. For every Result across every Run, build a Finding node keyed by @@ -15,7 +15,7 @@ * enclosing symbol at `(uri, startLine)` when the graph contains * one. A scanner-provided `opencodehub.symbolId` hint wins over the * enclosing lookup when set. - * 5. UPSERT into DuckDB via `store.bulkLoad({ mode: "upsert" })`. + * 5. UPSERT into the SQLite store via `store.bulkLoad({ mode: "upsert" })`. * * The command is idempotent — re-running with the same SARIF produces * the same nodes and edges. Results without a parsable location (no @@ -140,7 +140,7 @@ interface BuildSummary { /** * Pure builder over SARIF runs. Exposed for unit tests so we can exercise - * the node/edge emission logic without touching DuckDB. + * the node/edge emission logic without touching SQLite. * * `nodesByFile` is the per-file, line-sorted symbol index (produced by * {@link indexNodesByFile}) used to resolve each SARIF result back to the diff --git a/packages/cli/src/commands/list.ts b/packages/cli/src/commands/list.ts index 8263320e..e0129123 100644 --- a/packages/cli/src/commands/list.ts +++ b/packages/cli/src/commands/list.ts @@ -34,8 +34,8 @@ type Health = "ok" | "path-missing" | "graph-missing"; function classifyHealth(entry: RepoEntry): Health { if (!existsSync(entry.path)) return "path-missing"; - // Indexed probe: presence of `meta.json` / `graph.lbug` under `.codehub/` - // counts as "indexed" (lbug is the only graph backend post-ADR 0016). + // Indexed probe: presence of `meta.json` / `store.sqlite` under `.codehub/` + // counts as "indexed" (the single-file store is the only backend, ADR 0019). if (!codehubIsIndexed(entry.path)) return "graph-missing"; return "ok"; } diff --git a/packages/cli/src/commands/open-store.ts b/packages/cli/src/commands/open-store.ts index 6975dca5..37078fb6 100644 --- a/packages/cli/src/commands/open-store.ts +++ b/packages/cli/src/commands/open-store.ts @@ -6,9 +6,9 @@ * Returns the canonical {@link Store} envelope from `@opencodehub/storage` * so callers can route graph-tier queries through `store.graph` and * temporal-tier queries (cochanges, summaries, `--sql` escape hatch) - * through `store.temporal`. Storage is always graph.lbug + temporal.duckdb; - * the legacy backend selector was removed when the DuckDB graph backend - * was ripped out (see ADR 0016). + * through `store.temporal`. Post-ADR 0019 both views are one `SqliteStore` + * over a single `/.codehub/store.sqlite`; the legacy backend selector + * was removed when the lbug + DuckDB pair was replaced (see ADR 0019). */ import { resolve } from "node:path"; @@ -33,7 +33,7 @@ export async function openStoreForCommand(opts: OpenStoreOptions): Promise Promise; } diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts index 2ecb95b0..cb9ddeb5 100644 --- a/packages/cli/src/index.ts +++ b/packages/cli/src/index.ts @@ -3,7 +3,7 @@ * `codehub` CLI entrypoint. * * Every subcommand is loaded lazily via `await import(...)` so that - * `codehub --help` (and `codehub --help`) stays fast: no DuckDB + * `codehub --help` (and `codehub --help`) stays fast: no native storage engine * native binding, no pipeline, no MCP SDK unless we are actually going to * run that subcommand. */ diff --git a/packages/cli/src/lib/is-indexed.ts b/packages/cli/src/lib/is-indexed.ts index 19b8ac6c..59171734 100644 --- a/packages/cli/src/lib/is-indexed.ts +++ b/packages/cli/src/lib/is-indexed.ts @@ -3,7 +3,7 @@ * either signal exists under `/.codehub`: * * - `meta.json` — written by every successful analyze run. - * - `graph.lbug` — the lbug graph artifact (post-M7 the only graph backend). + * - `store.sqlite` — the single-file index (ADR 0019; the only backend). * * Returns a plain boolean — UI surfaces (e.g. `codehub list`) want a single * column rendering. diff --git a/packages/cli/src/skills-gen.ts b/packages/cli/src/skills-gen.ts index fd1db32b..05f22c05 100644 --- a/packages/cli/src/skills-gen.ts +++ b/packages/cli/src/skills-gen.ts @@ -201,7 +201,7 @@ async function fetchProcessEntryPointIds(store: SkillsGenStore): Promise` (`languageStats: {}`) goes through a diff --git a/packages/ingestion/src/pipeline/orchestrator.ts b/packages/ingestion/src/pipeline/orchestrator.ts index 2679142f..637eb41b 100644 --- a/packages/ingestion/src/pipeline/orchestrator.ts +++ b/packages/ingestion/src/pipeline/orchestrator.ts @@ -3,7 +3,7 @@ * configured phase set, and returns a summary plus the hashed graph. * * The orchestrator does not touch storage — the returned - * `KnowledgeGraph` is in-memory only. Persisting it (DuckDB / embeddings) + * `KnowledgeGraph` is in-memory only. Persisting it (SQLite / embeddings) * is a CLI concern (see `codehub analyze`, which opens a writable store * and calls `bulkLoad`). */ @@ -133,7 +133,7 @@ export interface RunIngestionOptions extends PipelineOptions { readonly onProgress?: (ev: ProgressEvent) => void; /** * Optional adapter the summarize phase probes before issuing work. - * Production wires this to the DuckDB store's `lookupSymbolSummary` + * Production wires this to the SQLite store's `lookupSymbolSummary` * implementation so re-indexes become free when source hasn't drifted. * Tests inject an in-memory fake. Absent by default — the phase degrades * to "every candidate is a miss" which is still correct, just more @@ -142,7 +142,7 @@ export interface RunIngestionOptions extends PipelineOptions { readonly summaryCacheAdapter?: SummaryCacheAdapter; /** * Optional adapter the embeddings phase probes before issuing embedder - * calls. Production wires this to the DuckDB store's + * calls. Production wires this to the SQLite store's * `listEmbeddingHashes` implementation so re-analyze runs skip chunks * whose `content_hash` matches a prior row. Absent by default — * the phase degrades to "every chunk is new" which is still correct, diff --git a/packages/ingestion/src/pipeline/phases/cochange.ts b/packages/ingestion/src/pipeline/phases/cochange.ts index 671cb22b..320a6a2a 100644 Binary files a/packages/ingestion/src/pipeline/phases/cochange.ts and b/packages/ingestion/src/pipeline/phases/cochange.ts differ diff --git a/packages/ingestion/src/pipeline/phases/embeddings.ts b/packages/ingestion/src/pipeline/phases/embeddings.ts index 70a80e19..91a15778 100644 --- a/packages/ingestion/src/pipeline/phases/embeddings.ts +++ b/packages/ingestion/src/pipeline/phases/embeddings.ts @@ -1,7 +1,7 @@ /** * Embeddings phase — generates 768-dim vectors across one or more * hierarchical tiers and materialises them into the phase output as an - * array of `EmbeddingRow`s the CLI upserts into DuckDB. + * array of `EmbeddingRow`s the CLI upserts into the SQLite store. * * Granularity tiers (P03): * - `"symbol"` — one vector per callable/declaration symbol. When a diff --git a/packages/ingestion/src/pipeline/phases/summarize.ts b/packages/ingestion/src/pipeline/phases/summarize.ts index 5dbfe66f..c02d5231 100644 --- a/packages/ingestion/src/pipeline/phases/summarize.ts +++ b/packages/ingestion/src/pipeline/phases/summarize.ts @@ -236,7 +236,7 @@ async function runSummarize(ctx: PipelineContext): Promise // Resolve a cache adapter from the options bag if the CLI attached one. // Phases have no direct store handle, so we route cache probes through a - // narrow hook on `ctx.options`. Production attaches the DuckDB-backed + // narrow hook on `ctx.options`. Production attaches the SQLite-backed // adapter; tests supply an in-memory fake. const cacheAdapter = resolveCacheAdapter(ctx); diff --git a/packages/ingestion/src/pipeline/types.ts b/packages/ingestion/src/pipeline/types.ts index 643e164b..f556e682 100644 --- a/packages/ingestion/src/pipeline/types.ts +++ b/packages/ingestion/src/pipeline/types.ts @@ -32,7 +32,7 @@ export interface PipelineContext { * Minimal projection of a prior-run graph sufficient for the incremental-scope * phase to compute the import-closure walk. We intentionally keep this * narrower than a full {@link KnowledgeGraph} so callers can materialise - * it cheaply from persisted storage (DuckDB rows, sidecar JSON, etc.) without + * it cheaply from persisted storage (SQLite rows, sidecar JSON, etc.) without * hydrating every node/edge kind in the graph. * * All arrays carry repo-relative posix paths (matching `ScannedFile.relPath`). diff --git a/packages/ingestion/src/providers/resolution/context.ts b/packages/ingestion/src/providers/resolution/context.ts index eedb3103..0d98f1a3 100644 --- a/packages/ingestion/src/providers/resolution/context.ts +++ b/packages/ingestion/src/providers/resolution/context.ts @@ -27,7 +27,7 @@ export interface ResolutionCandidate { /** * Minimal symbol-lookup surface. Concrete implementations sit atop the - * DuckDB-backed `IGraphStore`, but every resolver strategy speaks to this + * SQLite-backed `IGraphStore`, but every resolver strategy speaks to this * interface so unit tests can drive it with in-memory fixtures. */ export interface SymbolIndex { diff --git a/packages/mcp/src/connection-pool.ts b/packages/mcp/src/connection-pool.ts index 41a8420f..0c826d29 100644 --- a/packages/mcp/src/connection-pool.ts +++ b/packages/mcp/src/connection-pool.ts @@ -8,8 +8,8 @@ * LRU: * * 1. Per-key promise dedupe. Concurrent acquires for the same repo share - * a single in-flight open() — otherwise DuckDB will raise on the - * second connection opening the same file in read-write mode. + * a single in-flight open() — so the WAL-mode `store.sqlite` file is + * opened once per repo rather than racing multiple handles. * 2. Reference counting. Release must decrement a per-entry counter; an * eviction that lands on a still-in-use entry MUST NOT close it. We * set `closePending` and park the entry in a side table (it has left @@ -22,11 +22,11 @@ * `shutdown()` drains the pool on stdio close so the server exits cleanly. * * The pool caches the composed `OpenStoreResult` so MCP tools can route - * graph-tier calls through `store.graph` (lbug at `/.codehub/graph.lbug`) - * and temporal-tier calls (cochanges, summaries, `--sql` escape hatch) - * through `store.temporal` (DuckDB at `/.codehub/temporal.duckdb`). - * `OpenStoreResult.close()` is the deterministic composite close — graph - * first, then temporal. + * graph-tier calls through `store.graph` and temporal-tier calls + * (cochanges, summaries, `--sql` escape hatch) through `store.temporal`. + * Post-ADR 0019 both views are the SAME `SqliteStore` over one + * `/.codehub/store.sqlite` file. `OpenStoreResult.close()` is the + * deterministic composite close — it releases that single handle once. */ import { openStore, type Store } from "@opencodehub/storage"; @@ -50,15 +50,15 @@ const DEFAULT_TTL_MS = 15 * 60 * 1000; /** * Factory indirection keeps tests mockable without standing up the - * underlying database. Production always calls `openStore`, which - * composes the lbug graph view and the DuckDB temporal view from the - * `/.codehub/` parent directory. + * underlying database. Production always calls `openStore`, which returns + * one `SqliteStore` as both the graph and temporal views over the single + * `/.codehub/store.sqlite` file. */ export type StoreFactory = (dbPath: string) => Promise; const defaultFactory: StoreFactory = async (dbPath) => { - // openStore composes graph (lbug) + temporal (DuckDB) views from the - // shared `/.codehub/` parent. We open read-only because every + // openStore serves graph + temporal from one SqliteStore over the + // shared `/.codehub/store.sqlite`. We open read-only because every // MCP tool is a reader; the ingestion pipeline owns writes and runs // out-of-process. const store = await openStore({ path: dbPath, readOnly: true }); @@ -114,7 +114,7 @@ export class ConnectionPool { /** * Acquire a store handle for the given repo. The caller MUST pair every * acquire with a release. The `dbPath` argument is the absolute path to - * the on-disk DuckDB file; `repoKey` is a stable identifier used for + * the on-disk store.sqlite file; `repoKey` is a stable identifier used for * caching (usually the absolute repo path). */ async acquire(repoKey: string, dbPath: string): Promise { diff --git a/packages/mcp/src/repo-resolver.ts b/packages/mcp/src/repo-resolver.ts index 85e2c149..6b63c8a9 100644 --- a/packages/mcp/src/repo-resolver.ts +++ b/packages/mcp/src/repo-resolver.ts @@ -1,6 +1,6 @@ /** * Resolve a user-supplied (or omitted) `repo` argument to a concrete repo - * path, DuckDB file path, and (optionally) cached store metadata. + * path, store.sqlite file path, and (optionally) cached store metadata. * * The authoritative mapping lives at `~/.codehub/registry.json`. Callers * who pass a name look it up there. When `repo` is omitted: diff --git a/packages/mcp/src/resources/store-helper.ts b/packages/mcp/src/resources/store-helper.ts index 9ceb04d0..e17cbe0c 100644 --- a/packages/mcp/src/resources/store-helper.ts +++ b/packages/mcp/src/resources/store-helper.ts @@ -4,7 +4,7 @@ * Mirrors the tool-side `withStore` but returns shape appropriate for * resources: instead of `CallToolResult`, resource callbacks return * `ReadResourceResult`, which carries a plain text body. Errors - * (missing pool, repo not found, DuckDB open failure) are surfaced as a + * (missing pool, repo not found, SQLite open failure) are surfaced as a * YAML error envelope inside the resource body so the agent sees the * problem inline rather than receiving a transport-level fault. */ diff --git a/packages/mcp/src/server.ts b/packages/mcp/src/server.ts index f413640d..09f59baa 100644 --- a/packages/mcp/src/server.ts +++ b/packages/mcp/src/server.ts @@ -65,7 +65,7 @@ const INSTRUCTIONS = [ "Every per-repo tool (`query`, `context`, `impact`, `detect_changes`, `sql`, `scan`, `list_findings`, `list_findings_delta`, `list_dead_code`, `license_audit`, `project_profile`, `dependencies`, `owners`, `risk_trends`, `verdict`, `change_pack`) accepts an optional `repo` argument (registry name) or a `repo_uri` alias (Sourcegraph-style URI like `github.com/org/repo`, or `local:` for unpublished repos; wins when both are provided). When exactly one repo is registered, both are optional and the tool defaults to that repo. When ≥ 2 repos are registered and neither is supplied, the tool returns `AMBIGUOUS_REPO` — the structured envelope carries `structuredContent.error.choices[]` (capped at 10, with `{repo_uri, default_branch, group}`) plus `total_matches`, so a caller can retry with one of `choices[].repo_uri`.", "Every tool response includes a `next_steps` array under structuredContent and a `_meta.codehub/staleness` entry when the index may be behind HEAD.", "Use `query` to locate symbols, `context` for a 360-degree view, `impact` for blast radius (plan a refactor before you edit — OpenCodeHub does not edit source), `detect_changes` to map a diff to flows (verify a refactor after you apply it), `dependencies` for the external package list, `license_audit` for a copyleft/unknown/proprietary tier check of dependencies, `list_findings` to browse SARIF findings, `list_findings_delta` to diff the latest scan against a frozen baseline (new/fixed/unchanged/updated buckets), `scan` to run Priority-1 scanners (openWorld — spawns processes), `verdict` for a 5-tier PR decision (exit codes 0/1/2), `change_pack` for a deterministic diff-scoped pack (impacted subgraph + verdict + affected tests + char-heuristic cost estimate; CI-oriented), `risk_trends` for per-community trend lines and 30-day projections, and `sql` for bespoke queries.", - "For cross-repo work, call `group_list` to discover named repo groups, then `group_query`/`group_status` to fan out BM25 search and staleness across the group. `group_query` returns `{ group, query, results: [{ _repo, _rrf_score, ... }], per_repo, warnings }`; results are tagged with the source repo and per-repo errors surface in `per_repo[].error` + `warnings[]` (the fan-out never aborts on a single-repo failure). Use `group_sync` to materialize a cross-repo contract registry (HTTP / gRPC / topic) under `~/.codehub/groups//contracts.json`, then `group_contracts` to list the DuckDB-backed FETCHES↔Route edges together with the registry's signature-matched cross-links.", + "For cross-repo work, call `group_list` to discover named repo groups, then `group_query`/`group_status` to fan out BM25 search and staleness across the group. `group_query` returns `{ group, query, results: [{ _repo, _rrf_score, ... }], per_repo, warnings }`; results are tagged with the source repo and per-repo errors surface in `per_repo[].error` + `warnings[]` (the fan-out never aborts on a single-repo failure). Use `group_sync` to materialize a cross-repo contract registry (HTTP / gRPC / topic) under `~/.codehub/groups//contracts.json`, then `group_contracts` to list the graph-derived FETCHES↔Route edges together with the registry's signature-matched cross-links.", ].join(" "); export interface StartServerOptions { diff --git a/packages/mcp/src/test-utils.ts b/packages/mcp/src/test-utils.ts index 6baed83c..b64f0824 100644 --- a/packages/mcp/src/test-utils.ts +++ b/packages/mcp/src/test-utils.ts @@ -70,15 +70,16 @@ import { ConnectionPool } from "./connection-pool.js"; /** * Wrap an in-memory IGraphStore-shaped fake as the composed `Store` * (`OpenStoreResult`) that the connection pool returns. The same fake - * instance backs both `graph` and `temporal` views — tests don't care - * about the production split between lbug graph + DuckDB temporal. + * instance backs both `graph` and `temporal` views — which mirrors + * production, where one `SqliteStore` serves both over a single + * `store.sqlite` (ADR 0019). */ export function wrapAsStore(fake: unknown): Store { return { graph: fake as IGraphStore, temporal: fake as ITemporalStore, - graphFile: "/in-memory/graph.lbug", - temporalFile: "/in-memory/temporal.duckdb", + graphFile: "/in-memory/store.sqlite", + temporalFile: "/in-memory/store.sqlite", close: async () => { const closer = (fake as { close?: () => Promise }).close; if (typeof closer === "function") await closer.call(fake); diff --git a/packages/mcp/src/tools/group-contracts.ts b/packages/mcp/src/tools/group-contracts.ts index 4a5acba5..69ce08fe 100644 --- a/packages/mcp/src/tools/group-contracts.ts +++ b/packages/mcp/src/tools/group-contracts.ts @@ -218,7 +218,7 @@ export async function runGroupContracts( const persisted = await loadPersistedRegistry(group.name, ctx.home); const crossLinks = persisted?.crossLinks ?? []; - const header = `Cross-repo contracts for group ${group.name}: ${contracts.length} DuckDB-backed HTTP edge(s), ${crossLinks.length} registry cross-link(s).`; + const header = `Cross-repo contracts for group ${group.name}: ${contracts.length} graph-derived HTTP edge(s), ${crossLinks.length} registry cross-link(s).`; const body = contracts.length === 0 ? "(no FETCH-derived contracts — verify consumer repos ran the `fetches` phase and producer repos registered Route nodes)" @@ -282,8 +282,8 @@ export async function runGroupContracts( /** * Load `/.codehub/groups//contracts.json`. Returns `null` * when the file does not exist or fails to parse — the tool still - * succeeds on the DuckDB-backed surface when the persisted file is - * missing. + * succeeds on the graph-derived FETCHES↔Route surface when the persisted + * file is missing. */ async function loadPersistedRegistry( groupName: string, @@ -305,7 +305,7 @@ export function registerGroupContractsTool(server: McpServer, ctx: ToolContext): { title: "Cross-repo HTTP contracts + cross-links", description: - "Two-part surface for cross-repo contract discovery. (1) Match unresolved FETCHES edges (consumer) against Route nodes (producer) across every repo in the group — this is the DuckDB-backed HTTP surface. (2) When `group_sync` has written a contracts.json under `/.codehub/groups//`, surface its cross-links with signature + file + line for HTTP, gRPC, and topic pairings. Use this to audit cross-repo coupling after a schema change in a shared API or proto.", + "Two-part surface for cross-repo contract discovery. (1) Match unresolved FETCHES edges (consumer) against Route nodes (producer) across every repo in the group — this is the graph-derived HTTP surface. (2) When `group_sync` has written a contracts.json under `/.codehub/groups//`, surface its cross-links with signature + file + line for HTTP, gRPC, and topic pairings. Use this to audit cross-repo coupling after a schema change in a shared API or proto.", inputSchema: GroupContractsInput, annotations: { readOnlyHint: true, diff --git a/packages/mcp/src/tools/group-query.ts b/packages/mcp/src/tools/group-query.ts index 5fcc94cf..108e530c 100644 --- a/packages/mcp/src/tools/group-query.ts +++ b/packages/mcp/src/tools/group-query.ts @@ -12,7 +12,7 @@ * * Determinism: * - Repo iteration is the alphabetical sort of `group.repos[*].name`. - * - BM25 ties are broken by `id ASC` in the DuckDB adapter. + * - BM25 ties are broken by `id ASC` in the SQLite adapter. * - RRF tiebreak falls back to lex `(_repo, nodeId)` ordering (the * underlying `rrf()` breaks ties by first-run / first-rank; we do the * final lex pass ourselves to keep cross-run order stable). @@ -20,7 +20,7 @@ * don't silently truncate and large groups don't pull unnecessarily. * * Graceful degradation: - * - A repo missing from the registry, a missing DB file, or a DuckDB + * - A repo missing from the registry, a missing DB file, or a SQLite * open error all emit a `per_repo[]` row with an `error` string and a * human-readable entry in `warnings[]`. The fan-out continues; the * tool never aborts unless the group itself is unknown. diff --git a/packages/mcp/src/tools/pack-codebase.ts b/packages/mcp/src/tools/pack-codebase.ts index 4073e978..b922fa4c 100644 --- a/packages/mcp/src/tools/pack-codebase.ts +++ b/packages/mcp/src/tools/pack-codebase.ts @@ -267,7 +267,7 @@ async function callRealPackEngine(args: { }): Promise<{ outDir: string; packHash: string; bomItemCount: number }> { // Inline the same wiring as `runCodePack` rather than importing // `@opencodehub/cli` (which would create a cycle, MCP <- CLI <- MCP). - // Open the DuckStore directly, call generatePack, rename into place. + // Open the store directly, call generatePack, rename into place. const { mkdtemp, rename, rm } = await import("node:fs/promises"); const { tmpdir } = await import("node:os"); const { join, resolve } = await import("node:path"); @@ -275,7 +275,7 @@ async function callRealPackEngine(args: { const dbPath = resolveGraphPath(args.repo); if (!existsSync(dbPath)) { throw new Error( - `pack_codebase: no graph index at ${dbPath} (expected .codehub/graph.lbug). ` + + `pack_codebase: no index at ${dbPath} (expected .codehub/store.sqlite). ` + "Run `codehub analyze` first to populate the store.", ); } diff --git a/packages/mcp/src/tools/project-profile.ts b/packages/mcp/src/tools/project-profile.ts index 6eaaeed2..3c5277ef 100644 --- a/packages/mcp/src/tools/project-profile.ts +++ b/packages/mcp/src/tools/project-profile.ts @@ -2,7 +2,7 @@ * `project_profile` — return the ProjectProfile node for a repo. * * Profile is a singleton per repo, emitted by the ingestion `profile` phase. - * Each array field is stored in DuckDB as a JSON-encoded TEXT column + * Each array field is stored in SQLite as a JSON-encoded TEXT column * (`languages_json`, `frameworks_json`, etc.) so we decode every column * back into a `string[]` before returning. If the repo was indexed before * the profile phase shipped (or the phase failed to write the node), we diff --git a/packages/mcp/src/tools/query.test.ts b/packages/mcp/src/tools/query.test.ts index e701f873..139cf6f8 100644 --- a/packages/mcp/src/tools/query.test.ts +++ b/packages/mcp/src/tools/query.test.ts @@ -331,8 +331,8 @@ function makeFakeStore(opts: FakeStoreOptions): FakeStoreHandle { setMeta: async (_m: StoreMeta): Promise => {}, healthCheck: async () => ({ ok: true }), // ITemporalStore.exec — `bm25CorpusHasSummaries` calls this with two - // information_schema / count probes. Mirror the original SQL-regex - // dispatcher's responses for those exact texts. + // probes: a `sqlite_master` table-existence check (ADR 0019; node:sqlite + // has no information_schema) and a row count. Mirror those exact texts. exec: async ( sql: string, _params: readonly SqlParam[] = [], @@ -340,7 +340,7 @@ function makeFakeStore(opts: FakeStoreOptions): FakeStoreHandle { const normalized = sql.replace(/\s+/g, " ").trim(); if ( normalized === - "SELECT COUNT(*) AS n FROM information_schema.tables WHERE table_name = 'symbol_summaries'" + "SELECT COUNT(*) AS n FROM sqlite_master WHERE type = 'table' AND name = 'symbol_summaries'" ) { return [{ n: summariesPresent ? 1 : 0 }]; } diff --git a/packages/mcp/src/tools/query.ts b/packages/mcp/src/tools/query.ts index a894acb4..86b5ee7c 100644 --- a/packages/mcp/src/tools/query.ts +++ b/packages/mcp/src/tools/query.ts @@ -2,7 +2,7 @@ * `query` — true hybrid retrieval over the indexed graph. * * Two ranked runs, fused with Reciprocal Rank Fusion (k=60): - * 1. BM25 (DuckDB FTS) over `nodes.name` + `nodes.signature` + + * 1. BM25 (SQLite FTS5) over `nodes.name` + `nodes.signature` + * `nodes.description`. If a `symbol_summaries` table is present the * corpus extends transparently (see {@link bm25CorpusHasSummaries}) so * summarized prose participates as soon as the ingestion phase lands. @@ -239,18 +239,20 @@ async function lookupSummariesForHits( /** * Extensibility hook: return true iff the `symbol_summaries` table exists * and is non-empty. When it does, future BM25 upgrades can JOIN it into - * the FTS corpus. Today this is informational — the DuckDB FTS index is + * the FTS corpus. Today this is informational — the SQLite FTS5 index is * built at ingestion time against `nodes` columns only — but the probe * lives here so the sibling summarizer work can light up a corpus * extension without re-threading the tool. */ async function bm25CorpusHasSummaries(temporal: ITemporalStore): Promise { - // information_schema introspection is DuckDB-specific; route via the - // temporal-tier `exec` escape hatch so a future graph-only adapter - // pairing with a non-DuckDB temporal store can override this probe. + // Table-existence introspection via SQLite's `sqlite_master` catalog, + // routed through the temporal-tier `exec` escape hatch. (Pre-ADR-0019 + // this probed DuckDB's `information_schema.tables`, which node:sqlite does + // not expose.) A future graph-only adapter pairing with a non-SQLite + // temporal store can override this probe. try { const rows = await temporal.exec( - "SELECT COUNT(*) AS n FROM information_schema.tables WHERE table_name = 'symbol_summaries'", + "SELECT COUNT(*) AS n FROM sqlite_master WHERE type = 'table' AND name = 'symbol_summaries'", ); const first = rows[0]; if (!first) return false; diff --git a/packages/mcp/src/tools/shared.ts b/packages/mcp/src/tools/shared.ts index 102bcbdb..d061b603 100644 --- a/packages/mcp/src/tools/shared.ts +++ b/packages/mcp/src/tools/shared.ts @@ -127,7 +127,7 @@ export interface RepoArgs { /** * Acquire a store for the given repo argument, invoke `fn`, and release * the handle unconditionally. Errors from repo resolution become - * structured NO_INDEX/NOT_FOUND envelopes; DuckDB errors become DB_ERROR. + * structured NO_INDEX/NOT_FOUND envelopes; SQLite errors become DB_ERROR. * The inner function always returns a CallToolResult so the surface of * this helper is the same type. * diff --git a/packages/mcp/src/tools/sql.test.ts b/packages/mcp/src/tools/sql.test.ts index d919f7f9..72463802 100644 --- a/packages/mcp/src/tools/sql.test.ts +++ b/packages/mcp/src/tools/sql.test.ts @@ -382,39 +382,42 @@ test("sql: cypher timeout_ms is forwarded to store.query opts", async () => { // --------------------------------------------------------------------------- // --------------------------------------------------------------------------- -// Schema-hint correctness (finding R2): the tool description must map SQL -// mode to the DuckDB temporal tables and Cypher mode to the lbug graph -// labels — NOT advertise `nodes`/`relations`/etc. as SQL tables. One -// structural check covers both sections so the assertions can't drift. +// Schema-hint correctness (ADR 0019): the whole index is one `store.sqlite`, +// so `nodes` / `edges` / `embeddings` ARE directly SQL-queryable. The tool +// description must advertise them as SQL tables under `sql:` and mark +// `cypher:` as the community-fork-only escape hatch. (This inverts the prior +// finding-R2 contract, which assumed a Cypher-only lbug graph tier.) // --------------------------------------------------------------------------- -test("sql: tool description splits SQL temporal tables from Cypher graph labels", async () => { +test("sql: tool description advertises the graph tables as SQL-queryable and marks cypher fork-only", async () => { await withHarness({ rows: [] }, async ({ ctx, server }) => { registerSqlTool(server, ctx); // biome-ignore lint/suspicious/noExplicitAny: reach into the SDK's tool registry for the description const registered = (server as any)._registeredTools as Record; const desc = registered["sql"]?.description ?? ""; - // SQL section: the two tables that actually exist in the DuckDB - // temporal tier (per packages/storage/src/schema-ddl.ts). + // SQL section: every table in the single-file store is directly queryable. assert.match(desc, /SQL mode/, "description must label a SQL-mode section"); + assert.match(desc, /\bnodes\b/, "SQL section must list the nodes table"); + assert.match(desc, /\bedges\b/, "SQL section must list the edges table"); assert.match(desc, /\bcochanges\b/, "SQL section must list cochanges"); assert.match(desc, /\bsymbol_summaries\b/, "SQL section must list symbol_summaries"); + assert.match(desc, /payload->>/, "SQL section must show the JSON1 payload extract idiom"); - // Cypher section: graph labels reached via Cypher, plus an explicit - // statement that the graph entities are NOT SQL tables. + // Cypher section: reserved for community-fork adapters; the default + // SQLite backend does not support it. assert.match(desc, /Cypher mode/, "description must label a Cypher-mode section"); assert.match( desc, - /not SQL-queryable|never `SELECT/i, - "description must state the graph is not SQL-queryable", + /community-fork|use `sql:` instead/i, + "description must mark cypher as the community-fork-only path", ); - // The bug being fixed: the old hint advertised `nodes(...)` as a SQL - // table via a "Tables: nodes(" preamble. That phrasing must be gone. + // The inverted bug: the description must NOT claim the graph is + // unqueryable by SQL — that was true only under the old lbug backend. assert.ok( - !desc.includes("Tables: nodes("), - "description must NOT advertise `nodes` as a SQL table (finding R2)", + !/not SQL-queryable/i.test(desc), + "description must NOT claim the graph is non-SQL-queryable (ADR 0019)", ); }); }); diff --git a/packages/mcp/src/tools/sql.ts b/packages/mcp/src/tools/sql.ts index bebc8551..ecab7455 100644 --- a/packages/mcp/src/tools/sql.ts +++ b/packages/mcp/src/tools/sql.ts @@ -1,25 +1,30 @@ /** - * `sql` — raw read-only SQL / Cypher over the local graph store. + * `sql` — raw read-only SQL over the local single-file store. * - * The tool accepts either `sql` (temporal DuckDB view) or `cypher` - * (graph lbug view) — exactly one per call. The read-only guards - * (`assertReadOnlySql` / `assertReadOnlyCypher`) reject any write verb - * before the statement reaches the underlying engine. + * Post-ADR 0019 the whole index is one `store.sqlite` (node:sqlite, WAL), + * so `nodes`, `edges`, `embeddings`, `store_meta`, `cochanges`, and + * `symbol_summaries` are all real SQL tables in the same file. The `sql` + * arg runs read-only SQL over them via `store.temporal.exec()`. The + * read-only guard (`assertReadOnlySql`) rejects any write verb before the + * statement reaches the engine. + * + * The `cypher` arg is retained on the input surface for the community-fork + * escape hatch (an AGE / Memgraph / Neo4j adapter that implements the + * optional `execCypher` hatch). The in-tree `SqliteStore` does NOT + * implement `execCypher` — a `cypher:` call against the default backend + * returns a clear "use `sql:` instead" envelope. * * - SQL path: `SqlGuardError` on violation → INVALID_INPUT envelope. * - Cypher path: `CypherGuardError` on violation → INVALID_INPUT envelope. * - Both `sql` and `cypher` supplied → INVALID_INPUT "choose one". * - * A default 5 s timeout caps runaway queries (DuckDB itself has no SQL - * timeout — the adapter interrupts via a JS timer; the graph adapter - * honours `timeoutMs` through its pool). + * A default 5 s timeout caps runaway queries — the adapter interrupts via + * SQLite's `busy_timeout` PRAGMA plus a JS timer. * - * The tool description embeds a two-section schema hint so agents author - * correct queries without a separate schema probe: the SQL section lists - * the DuckDB temporal tables (`cochanges`, `symbol_summaries`); the Cypher - * section lists the lbug graph's node labels and relationship types. The - * code graph (`nodes`/`relations`/`embeddings`/`store_meta`) is reachable - * only through Cypher, not via SQL `FROM` — see ADR 0016. + * The tool description embeds a schema hint so agents author correct + * queries without a separate schema probe: it lists every SQL table in + * `store.sqlite` and the JSON1 `payload->>'$.field'` extract idiom for + * kind-specific node fields. */ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; @@ -44,14 +49,14 @@ const SqlInput = { .min(1) .optional() .describe( - "Read-only SQL statement against the temporal DuckDB view. INSERT/UPDATE/DELETE/DDL are rejected by the guard. Provide exactly one of `sql` or `cypher`.", + "Read-only SQL statement against the single-file `store.sqlite` index — query `nodes`, `edges`, `embeddings`, `store_meta`, `cochanges`, or `symbol_summaries` directly. INSERT/UPDATE/DELETE/DDL are rejected by the guard. Provide exactly one of `sql` or `cypher`.", ), cypher: z .string() .min(1) .optional() .describe( - "Read-only Cypher statement against the graph view (lbug). CREATE/DELETE/SET/MERGE/REMOVE/DROP are rejected by the guard. Provide exactly one of `sql` or `cypher`.", + "Read-only Cypher statement, for a community-fork graph adapter that implements the optional `execCypher` hatch. The default SQLite backend does NOT support Cypher — use `sql:` instead. Provide exactly one of `sql` or `cypher`.", ), ...repoArgShape, timeout_ms: z @@ -64,12 +69,17 @@ const SqlInput = { }; const SCHEMA_HINT = [ - "SQL mode (`sql:`, DuckDB temporal tier) — only these tables exist:", + "SQL mode (`sql:`) — the whole index is one `store.sqlite`; these tables are all directly SQL-queryable:", + " nodes(id, kind, name, file_path, start_line, end_line, payload) -- payload is canonical JSON; reach kind-specific fields via JSON1, e.g. payload->>'$.severity'", + " edges(id, src, dst, type, confidence, step, reason) -- the call/reference graph; join src/dst back to nodes.id", + " embeddings(node_id, granularity, chunk_index, dim, vector, content_hash)", " cochanges(source_file, target_file, cocommit_count, total_commits_source, total_commits_target, last_cocommit_at, lift)", " symbol_summaries(node_id, content_hash, prompt_version, model_id, summary_text, signature_summary, returns_type_summary, created_at)", - "Cypher mode (`cypher:`, lbug graph tier) — query the graph by node label / relationship type. The code graph is NOT SQL-queryable: `nodes`, `relations`, `embeddings`, and `store_meta` are graph entities reachable only through Cypher (e.g. `MATCH (n:CodeNode) RETURN n`), never `SELECT ... FROM nodes`.", - ` Node labels: ${NODE_KINDS.join(", ")}.`, - ` Relationship types: ${RELATION_TYPES.join(", ")}.`, + " store_meta(id, schema_version, indexed_at, node_count, edge_count, ...)", + ` nodes.kind values: ${NODE_KINDS.join(", ")}.`, + ` edges.type values: ${RELATION_TYPES.join(", ")}.`, + " Example: SELECT id, name FROM nodes WHERE kind = 'Function' AND file_path LIKE 'src/auth/%';", + "Cypher mode (`cypher:`) — only for a community-fork graph adapter with the optional execCypher hatch. The default SQLite backend rejects it; use `sql:` instead.", ].join("\n"); interface SqlArgs { @@ -91,7 +101,7 @@ export async function runSql(ctx: ToolContext, args: SqlArgs): Promise { /** * Dedupe by the caller-provided id extractor, keeping the LAST occurrence. * - * Protects against DuckDB UPSERT issue 8147 (two rows with the same primary + * Protects against SQLite UPSERT issue 8147 (two rows with the same primary * key in one INSERT cannot both fire ON CONFLICT). The caller-driven id * function also lets us reuse this for nodes (id) and edges (id). */ @@ -329,7 +333,7 @@ export function booleanOrNull(v: unknown): boolean | null { * * **Preserve `[]` distinct from absent.** Returning a typed `[]` on an * empty-array input (rather than `null`) carries the "explicit empty" - * signal into each adapter's writer. DuckDB `TEXT[]` stores a 0-length + * signal into each adapter's writer. SQLite `TEXT[]` stores a 0-length * literal natively; lbug `STRING[]` cannot (it collapses `[]` to NULL on * write), so the graph-db adapter substitutes an empty-array marker on the * way in and decodes it back on the way out — see `encodeNodeCol` + @@ -486,10 +490,10 @@ export function frameworksJsonOrNull(flat: unknown, detected: unknown): string | // --------------------------------------------------------------------------- /** - * Step-zero sentinel. The DuckDB `relations.step` column is + * Step-zero sentinel. The SQLite `relations.step` column is * `INTEGER NOT NULL DEFAULT 0`; the graph-db column is nullable `INT32`. * Both backends therefore disagree on read-back when the source edge - * carries an explicit `step: 0` (DuckDB returns `0`, graph-db returns + * carries an explicit `step: 0` (SQLite returns `0`, graph-db returns * `null`). The convention is "drop step when it reads back as zero/null" * — this helper formalises that on the read side so canonical-JSON parity * holds across backends. diff --git a/packages/storage/src/index.ts b/packages/storage/src/index.ts index 00caab5d..411d8301 100644 --- a/packages/storage/src/index.ts +++ b/packages/storage/src/index.ts @@ -59,7 +59,7 @@ import { SqliteStore, type SqliteStoreOptions } from "./sqlite-adapter.js"; /** * Combined options accepted by {@link openStore}. Superset of the spec-level * {@link ApiOpenStoreOptions} that adds the SQLite-adapter tuning bag. The - * single-file store replaced the lbug + DuckDB pair (ADR 0017), so the former + * single-file store replaced the lbug + DuckDB pair (ADR 0019), so the former * `duckOptions` / `graphDbOptions` per-backend bags are gone. */ export interface OpenStoreOptions extends ApiOpenStoreOptions { diff --git a/packages/storage/src/interface.ts b/packages/storage/src/interface.ts index bd327579..ec56d8a3 100644 --- a/packages/storage/src/interface.ts +++ b/packages/storage/src/interface.ts @@ -10,16 +10,18 @@ * adapters target. * 2. {@link ITemporalStore} — tabular-tier, SQL-only operations: * cochanges, symbol summaries, the `codehub query --sql` escape hatch, - * and any future temporal-analytics query. Today always DuckDB-backed. - * Community adapters can implement other SQL-shaped stores (SQLite, - * Postgres) without affecting graph adapters. + * and any future temporal-analytics query. Community adapters can + * implement other SQL-shaped stores (Postgres, …) without affecting + * graph adapters. * * Callers that need both surfaces use {@link openStore} and consume the * resulting {@link OpenStoreResult} `{graph, temporal, close, ...}`. * - * The graph-db adapter (via `@ladybugdb/core`) is graph-only and pairs - * with a DuckDB temporal store. The DuckDB adapter is temporal-only — - * cochanges, symbol summaries, and the `--sql` escape hatch. + * Post-ADR 0019 one `SqliteStore` implements BOTH interfaces over a single + * `store.sqlite` file, and `openStore` returns that one instance as both + * the `graph` and `temporal` views. The split is retained as a contract so + * a community fork can still pair a graph-only adapter (AGE / Memgraph / + * Neo4j / Neptune) with a separate SQL-shaped temporal store. * * ## Sentinel rules * @@ -31,10 +33,10 @@ * * 1. **Step-zero drop** ({@link stepZeroSentinel}). The canonical edge * shape distinguishes "no step" (field absent) from "step is N ≥ 1". - * DuckDB stores `relations.step` as `INTEGER NOT NULL DEFAULT 0`; the + * SQLite stores `relations.step` as `INTEGER NOT NULL DEFAULT 0`; the * graph-db backend stores the column as nullable `INT32`. Both * backends therefore disagree on read-back when the source edge - * carries an explicit `step: 0` (DuckDB returns `0`, graph-db + * carries an explicit `step: 0` (SQLite returns `0`, graph-db * returns `null`). The convention is "drop step when it reads back * as 0/null", which is what `stepZeroSentinel` enforces. * @@ -97,7 +99,7 @@ export type GraphDialect = "cypher"; * * Community adapters (AGE / Memgraph / Neo4j / Neptune) implement THIS * interface only. They pair with an {@link ITemporalStore} (always - * DuckDB-backed by default) for tabular concerns. + * SQLite-backed by default) for tabular concerns. * * ## v1.0 conformance contract * @@ -383,11 +385,12 @@ export interface IGraphStore { /** * Tabular/temporal interface. Cochanges, symbol summaries, time-travel * queries, and the `codehub query --sql` escape hatch all live here. - * Today always DuckDB-backed; future SQLite or Parquet-sidecar adapters - * fit the same surface. + * Post-ADR 0019 the in-tree `SqliteStore` implements this alongside + * {@link IGraphStore} over one `store.sqlite`; a community fork can back it + * with any other SQL-shaped store (Postgres, …) on the same surface. * * Graph-only community backends (AGE / Memgraph / Neo4j / Neptune) - * NEVER implement this interface — they pair with a DuckDB-backed + * NEVER implement this interface — they pair with a SQL-shaped * temporal store via {@link openStore}. */ export interface ITemporalStore { @@ -465,21 +468,22 @@ export interface ITemporalStore { // ───────────────────────────────────────────────────────────────────────────── /** - * Composed result of {@link openStore}. The caller closes both views via - * the deterministic {@link OpenStoreResult.close} method (graph closes - * first, then temporal — graph adapters tend to hold native pool - * handles that benefit from prompt release). + * Composed result of {@link openStore}. The caller closes the store via the + * idempotent {@link OpenStoreResult.close} method. Post-ADR 0019 `graph` and + * `temporal` are one `SqliteStore` over a single file, so close releases one + * handle; a community fork that splits the two adapters closes graph first, + * then temporal. */ export interface OpenStoreResult { - /** Graph-tier view (always lbug). */ + /** Graph-tier view. Post-ADR 0019 this is the shared `SqliteStore`. */ readonly graph: IGraphStore; - /** Tabular-tier view (always DuckDB). */ + /** Tabular-tier view — the SAME `SqliteStore` instance as `graph`. */ readonly temporal: ITemporalStore; - /** Absolute path to the on-disk graph artifact (`graph.lbug`). */ + /** Absolute path to the on-disk index (`store.sqlite`). */ readonly graphFile: string; - /** Absolute path to the on-disk temporal artifact (`temporal.duckdb`). */ + /** Same single-file path as `graphFile` (both views share one file). */ readonly temporalFile: string; - /** Closes both views in deterministic order. Idempotent. */ + /** Closes the shared store handle. Idempotent. */ close(): Promise; } @@ -590,7 +594,7 @@ export interface SymbolSummaryRow { // Shared options + result types // ───────────────────────────────────────────────────────────────────────────── -/** JS types that can safely round-trip as DuckDB query parameters at MVP. */ +/** JS types that can safely round-trip as SQLite query parameters at MVP. */ export type SqlParam = string | number | bigint | boolean | null; /** @@ -833,7 +837,7 @@ export interface EmbeddingRow { /** * Tier the row belongs to. Optional on the TypeScript interface so legacy * callers that build rows without explicitly setting it still compile; the - * DuckDB DDL defaults NULL inputs to `'symbol'` so the on-disk row always + * SQLite DDL defaults NULL inputs to `'symbol'` so the on-disk row always * carries a value. Writers produced by P03 always set this explicitly. */ readonly granularity?: EmbeddingGranularity; diff --git a/packages/storage/src/license.ts b/packages/storage/src/license.ts index 201d061c..39a57f4b 100644 --- a/packages/storage/src/license.ts +++ b/packages/storage/src/license.ts @@ -4,7 +4,7 @@ * Extracted out of `duckdb-adapter.ts` so consumers (the single-file * `SqliteStore`, `listDependencies`, the license-audit surface) can use it * WITHOUT transitively importing `@duckdb/node-api`. That top-level native - * import is exactly what would defeat the lazy-DuckDB contract — importing a + * import is exactly what would defeat the lazy-SQLite contract — importing a * pure helper must never load a native binding. */ diff --git a/packages/storage/src/meta.ts b/packages/storage/src/meta.ts index 915b3258..3e779d3a 100644 --- a/packages/storage/src/meta.ts +++ b/packages/storage/src/meta.ts @@ -1,9 +1,9 @@ /** * Sidecar metadata reader / writer for `/.codehub/meta.json`. * - * The DuckDB database stores the same information in its `store_meta` table, - * but the sidecar is plain JSON so tools outside the OpenCodeHub runtime (e.g. - * CI staleness probes) can read it without linking libduckdb. + * The `store.sqlite` index stores the same information in its `store_meta` + * table, but the sidecar is plain JSON so tools outside the OpenCodeHub + * runtime (e.g. CI staleness probes) can read it without opening the store. * * Writes are atomic: the payload is written to a temp file in the target * directory and renamed over the destination. `fs.rename` is atomic on POSIX diff --git a/packages/storage/src/paths.test.ts b/packages/storage/src/paths.test.ts index 5da6c500..e86ca6c7 100644 --- a/packages/storage/src/paths.test.ts +++ b/packages/storage/src/paths.test.ts @@ -18,7 +18,7 @@ test("resolveRepoMetaDir: joins repo path with .codehub", () => { assert.equal(actual, resolve("/tmp/demo-repo", META_DIR_NAME)); }); -test("resolveGraphPath: drops the lbug graph file inside the meta dir", () => { +test("resolveGraphPath: drops the store.sqlite file inside the meta dir", () => { const actual = resolveGraphPath("/tmp/demo-repo"); assert.equal(actual, resolve("/tmp/demo-repo", META_DIR_NAME, describeArtifacts().graphFile)); }); @@ -48,9 +48,9 @@ test("resolveRepoMetaDir: resolves relative paths", () => { assert.equal(actual, resolve(process.cwd(), "demo-repo", META_DIR_NAME)); }); -test("describeArtifacts: returns lbug + duckdb temporal pair", () => { +test("describeArtifacts: returns the single store.sqlite for both views (ADR 0019)", () => { const actual = describeArtifacts(); - assert.equal(actual.graphFile, "graph.lbug"); - assert.equal(actual.temporalFile, "temporal.duckdb"); + assert.equal(actual.graphFile, "store.sqlite"); + assert.equal(actual.temporalFile, "store.sqlite"); assert.equal(actual.schemaName, "main"); }); diff --git a/packages/storage/src/paths.ts b/packages/storage/src/paths.ts index ee15b076..5de0d41c 100644 --- a/packages/storage/src/paths.ts +++ b/packages/storage/src/paths.ts @@ -3,9 +3,9 @@ * * These helpers are pure — they never touch the filesystem — so they are * trivially testable. Resolution rules: - * - Per-repo: `/.codehub/` holds `graph.lbug` (graph artifact) - * and `temporal.duckdb` (cochange + symbol-summary sidecar) plus the - * meta sidecar `meta.json`. + * - Per-repo: `/.codehub/` holds the single `store.sqlite` index + * (graph nodes/edges, embeddings, and the temporal cochange + + * symbol-summary tables — ADR 0019) plus the meta sidecar `meta.json`. * - Global : `~/.codehub/registry.json` holds the cross-repo registry. */ @@ -17,24 +17,26 @@ export const META_FILE_NAME = "meta.json"; export const REGISTRY_FILE_NAME = "registry.json"; /** - * Canonical artifact filenames. Used by: + * Canonical artifact filename. Used by: * - * - The `openStore` factory to construct the graph + temporal file - * paths from a single `/.codehub/` parent. * - The `codehub list` indexed-status probe to decide whether a repo * has any artifact on disk. - * - The MCP error envelope to enumerate candidate paths in the + * - The MCP error envelope to name the candidate path in the * "store unreadable" message. * - * `schemaName` is the namespace used inside the graph artifact when the - * backend supports schemas; lbug emits into the default `main` schema. + * Post-ADR 0019 the entire index is one `/.codehub/store.sqlite` + * file (node:sqlite, WAL) — there is no separate graph / temporal file. + * `graphFile` and `temporalFile` both resolve to that single store so the + * historical two-field shape keeps callers (and the conformance harness) + * compiling without a churned signature. `schemaName` stays `main` — the + * default SQLite schema the tables live in. */ export function describeArtifacts(): { readonly graphFile: string; readonly temporalFile: string; readonly schemaName: string; } { - return { graphFile: "graph.lbug", temporalFile: "temporal.duckdb", schemaName: "main" }; + return { graphFile: "store.sqlite", temporalFile: "store.sqlite", schemaName: "main" }; } /** Resolve the `/.codehub` directory (repo path may be relative). */ @@ -43,9 +45,9 @@ export function resolveRepoMetaDir(repoPath: string): string { } /** - * Resolve the canonical graph artifact path - * (`/.codehub/graph.lbug`). The {@link openStore} factory derives - * the sibling temporal artifact path automatically. + * Resolve the canonical store path (`/.codehub/store.sqlite`). + * Post-ADR 0019 this single file is the whole index; {@link openStore} + * takes this path and serves both the graph and temporal views from it. */ export function resolveGraphPath(repoPath: string): string { return resolve(repoPath, META_DIR_NAME, describeArtifacts().graphFile); diff --git a/packages/storage/src/schema-ddl.ts b/packages/storage/src/schema-ddl.ts index 082a2b69..2122a51c 100644 --- a/packages/storage/src/schema-ddl.ts +++ b/packages/storage/src/schema-ddl.ts @@ -1,5 +1,5 @@ /** - * DDL emitter for the DuckDB-backed temporal store. + * DDL emitter for the SQLite-backed temporal store. * * Two tables only: * - `cochanges` — file-level association statistics from git history. diff --git a/packages/storage/src/sql-guard.ts b/packages/storage/src/sql-guard.ts index f5f577eb..d4bf4963 100644 --- a/packages/storage/src/sql-guard.ts +++ b/packages/storage/src/sql-guard.ts @@ -1,11 +1,11 @@ /** * Lightweight read-only SQL guard. * - * The primary safety mechanism for user-supplied queries is opening the DuckDB + * The primary safety mechanism for user-supplied queries is opening the SQLite * connection in `READ_ONLY` access mode — the engine itself rejects mutating * statements with a clear error. This guard is belt-and-braces: it catches * obviously-bad inputs before they hit the engine, and blocks extension / - * configuration commands (INSTALL / LOAD / ATTACH / PRAGMA) that DuckDB does + * configuration commands (INSTALL / LOAD / ATTACH / PRAGMA) that SQLite does * permit in read-only mode and that would let a caller reach outside the * sandbox. */ @@ -19,7 +19,7 @@ export class SqlGuardError extends Error { /** * Tokens that must never appear as a statement-leading keyword. The list covers - * every DDL/DML verb DuckDB knows about, plus ATTACH/COPY/INSTALL/LOAD which + * every DDL/DML verb SQLite knows about, plus ATTACH/COPY/INSTALL/LOAD which * could exfiltrate data or load arbitrary code even on a read-only connection. * PRAGMA is also blocked — the one read-only PRAGMA a user might want * (`EXPLAIN`) is reachable via the `EXPLAIN` keyword directly. @@ -62,7 +62,7 @@ const ALLOWED_LEADING_KEYWORDS: ReadonlySet = new Set([ "SHOW", "SUMMARIZE", "VALUES", - "FROM", // DuckDB FROM-first SELECT shorthand. + "FROM", // FROM-first SELECT shorthand (tolerated; harmless under node:sqlite). "TABLE", // shorthand for SELECT * FROM table. ]); @@ -130,7 +130,7 @@ function hasNonWhitespace(s: string): boolean { /** * Reject any SQL that is not a single read-only statement. Call this before - * handing `sql` to DuckDB on a read-only connection. + * handing `sql` to SQLite on a read-only connection. */ export function assertReadOnlySql(sql: string): void { if (typeof sql !== "string" || sql.trim().length === 0) { diff --git a/packages/storage/src/sqlite-adapter.ts b/packages/storage/src/sqlite-adapter.ts index 7a09d59e..c144a814 100644 --- a/packages/storage/src/sqlite-adapter.ts +++ b/packages/storage/src/sqlite-adapter.ts @@ -1,18 +1,19 @@ /** - * SqliteStore — single-file storage adapter (branch `spike/sqlite-single-file`). + * SqliteStore — single-file storage adapter (ADR 0019). * - * THESIS. One `*.sqlite` file in WAL mode backs EVERYTHING: graph nodes, + * THESIS. One `store.sqlite` file in WAL mode backs EVERYTHING: graph nodes, * edges, embeddings, and the temporal/non-graph tables (cochanges, symbol - * summaries) that today live in two native-binding engines - * (`graph.lbug` via @ladybugdb/core + `temporal.duckdb` via @duckdb/node-api). - * Collapsing both onto Node 24's built-in `node:sqlite` removes the last two - * native dependencies, which is what unlocks the real goal: a zero-dep, - * one-command, no-Docker install (`npm i -g @opencodehub/cli` and nothing else). + * summaries). It replaced the two native-binding engines this project used + * before — `graph.lbug` via @ladybugdb/core + `temporal.duckdb` via + * @duckdb/node-api. Collapsing both onto Node 24's built-in `node:sqlite` + * removed the last two native storage dependencies, which is what unlocked + * the real goal: a zero-dep, one-command, no-Docker install + * (`npm i -g @opencodehub/cli` and nothing else). * * STATUS. This file implements the FULL {@link IGraphStore} + * {@link ITemporalStore} surface against a single file. Embeddings live in - * the `embeddings` table inside store.sqlite; there is no DuckDB dependency - * and no Parquet export (ADR 0019 dropped the write-only sidecar). + * the `embeddings` table inside store.sqlite; there is no native storage + * binding and no Parquet export (ADR 0019 dropped the write-only sidecar). * * GRAPH-HASH PARITY. The hard success criterion is that a `KnowledgeGraph` * rebuilt from `listNodes({})` + `listEdges({})` produces a byte-identical