GraphDone · mvalancy · Jun 14, 2026 · Jun 14, 2026
diff --git a/tests/e2e/visual-vlm.spec.ts b/tests/e2e/visual-vlm.spec.ts
@@ -3,6 +3,7 @@ import * as fs from 'fs';
 import * as path from 'path';
 import { login, TEST_USERS } from '../helpers/auth';
 import { seedLargeGraph, deleteGraphDeep } from '../helpers/seedGraph';
+import { sweepTestData, TEST_GRAPH_PREFIX } from '../helpers/dbHealing';
 import '../helpers/testEnv';
 import { isVlmAvailable, evaluateBatch, PERSONAS, personaByKey } from '../helpers/vlm';
 
@@ -31,6 +32,11 @@ async function shot(page: Page, name: string): Promise<string> {
   return file;
 }
 
+// Self-heal: clear leftover test graphs + orphans before and after, so an
+// interrupted run never leaves the dev DB dirty (which can break THE GATE).
+test.beforeAll(async () => { await sweepTestData('vlm:before'); });
+test.afterAll(async () => { await sweepTestData('vlm:after'); });
+
 test('VLM visual evaluation across personas @vlm', async ({ page }) => {
   test.setTimeout(900_000);
   const available = await isVlmAvailable();
@@ -44,14 +50,14 @@ test('VLM visual evaluation across personas @vlm', async ({ page }) => {
   await page.waitForTimeout(1500);
 
   // 1. Empty graph — first-run invitation (new-user + visual defects).
-  const empty = await page.evaluate(async () => {
+  const empty = await page.evaluate(async (pfx) => {
     const token = localStorage.getItem('authToken') ?? '';
     const post = (query: string, variables?: unknown) =>
       fetch('/api/graphql', { method: 'POST', headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${token}` }, body: JSON.stringify({ query, variables }) }).then((r) => r.json());
     const me = await post('{ me { id } }');
-    const g = await post(`mutation($i:[GraphCreateInput!]!){createGraphs(input:$i){graphs{id}}}`, { i: [{ name: `VLM Empty ${Date.now()}`, type: 'PROJECT', status: 'ACTIVE', createdBy: me.data.me.id, isShared: true }] });
+    const g = await post(`mutation($i:[GraphCreateInput!]!){createGraphs(input:$i){graphs{id}}}`, { i: [{ name: `${pfx} VLM Empty ${Date.now()}`, type: 'PROJECT', status: 'ACTIVE', createdBy: me.data.me.id, isShared: true }] });
     return g.data.createGraphs.graphs[0].id as string;
-  });
+  }, TEST_GRAPH_PREFIX);
   cleanup.push(empty);
   await page.setViewportSize({ width: 1440, height: 900 });
   await page.evaluate((id) => localStorage.setItem('currentGraphId', id), empty);

diff --git a/tests/helpers/dbHealing.ts b/tests/helpers/dbHealing.ts
@@ -0,0 +1,106 @@
+import neo4j, { Driver } from 'neo4j-driver';
+
+/**
+ * Self-healing for the dev Neo4j so test runs never leave the database dirty —
+ * even when a run is killed mid-flight (timeout, Ctrl-C) and its per-test
+ * cleanup never executes.
+ *
+ * Heavy suites (scale-sweep, visual-vlm) call sweepTestData() in beforeAll
+ * (heal leftovers from a previous interrupted run) AND afterAll (clean up this
+ * run). It removes:
+ *   - graphs whose name carries the test sentinel (or a legacy test prefix),
+ *     with their WorkItems and Edge nodes,
+ *   - orphan WorkItems (no BELONGS_TO) — what a half-finished delete leaves,
+ *   - orphan Edge nodes (missing a source or target) — these 500 the edges
+ *     query, the original data-integrity incident.
+ *
+ * It NEVER touches seed/demo graphs (Welcome, Cycle 2, Aquarium, …) — only
+ * sentinel/test-named graphs and true orphans. Fully graceful: if Neo4j is
+ * unreachable it logs and returns zeros rather than failing the run.
+ */
+
+/** Every test-seeded graph name starts with this so the sweep can find them
+ * unambiguously without ever matching a real graph. */
+export const TEST_GRAPH_PREFIX = '[E2E]';
+
+// Legacy/explicit test-name patterns (graphs created before the sentinel, or by
+// ad-hoc probes). Anchored so they can't match real graphs.
+const LEGACY_TEST_NAME_REGEX =
+  '^(\\[E2E\\]|Scale |VLM |Clone|Parity|PathP|NodeAttach|Contract|CloneFix|CloneProbe|Pop|TP |Empty Smoke|Living E2E|ParityV|Smoke ).*';
+
+const URI = process.env.NEO4J_URI || 'bolt://localhost:7687';
+const USER = process.env.NEO4J_USER || 'neo4j';
+const PASS = process.env.NEO4J_PASSWORD || 'graphdone_password';
+
+export interface SweepResult {
+  testGraphs: number;
+  testGraphNodes: number;
+  orphanNodes: number;
+  orphanEdges: number;
+  ok: boolean;
+}
+
+async function deleteInBatches(session: any, matchDelete: string): Promise<number> {
+  // matchDelete must be a query of shape: MATCH ... WITH x LIMIT 5000 DETACH DELETE x RETURN count(x) AS c
+  let total = 0;
+  for (;;) {
+    const r = await session.run(matchDelete);
+    const c = r.records[0]?.get('c')?.toNumber?.() ?? 0;
+    total += c;
+    if (c === 0) break;
+  }
+  return total;
+}
+
+export async function sweepTestData(label = ''): Promise<SweepResult> {
+  const result: SweepResult = { testGraphs: 0, testGraphNodes: 0, orphanNodes: 0, orphanEdges: 0, ok: false };
+  let driver: Driver | undefined;
+  try {
+    driver = neo4j.driver(URI, neo4j.auth.basic(USER, PASS));
+    await driver.verifyConnectivity();
+    const session = driver.session();
+    try {
+      // 1) WorkItems + Edge nodes that belong to test-named graphs.
+      result.testGraphNodes = await deleteInBatches(
+        session,
+        `MATCH (g:Graph) WHERE g.name =~ '${LEGACY_TEST_NAME_REGEX}'
+         MATCH (g)<-[:BELONGS_TO]-(w:WorkItem)
+         OPTIONAL MATCH (w)<-[:EDGE_SOURCE|EDGE_TARGET]-(e:Edge)
+         WITH w, e LIMIT 5000 DETACH DELETE e, w RETURN count(w) AS c`
+      );
+      // 2) The test-named graphs themselves.
+      const g = await session.run(
+        `MATCH (g:Graph) WHERE g.name =~ '${LEGACY_TEST_NAME_REGEX}' DETACH DELETE g RETURN count(g) AS c`
+      );
+      result.testGraphs = g.records[0]?.get('c')?.toNumber?.() ?? 0;
+      // 3) Orphan WorkItems (belong to no graph) — what a killed delete leaves.
+      result.orphanNodes = await deleteInBatches(
+        session,
+        `MATCH (w:WorkItem) WHERE NOT (w)-[:BELONGS_TO]->(:Graph) WITH w LIMIT 5000 DETACH DELETE w RETURN count(w) AS c`
+      );
+      // 4) Orphan Edge nodes (missing a source or target) — these break the
+      //    edges query for everyone.
+      result.orphanEdges = await deleteInBatches(
+        session,
+        `MATCH (e:Edge) WHERE NOT (e)-[:EDGE_SOURCE]->(:WorkItem) OR NOT (e)-[:EDGE_TARGET]->(:WorkItem) WITH e LIMIT 5000 DETACH DELETE e RETURN count(e) AS c`
+      );
+      result.ok = true;
+      const touched = result.testGraphs + result.testGraphNodes + result.orphanNodes + result.orphanEdges;
+      if (touched > 0) {
+        // eslint-disable-next-line no-console
+        console.log(
+          `[db-heal${label ? ' ' + label : ''}] swept ${result.testGraphs} test graphs, ${result.testGraphNodes} their nodes, ${result.orphanNodes} orphan nodes, ${result.orphanEdges} orphan edges`
+        );
+      }
+    } finally {
+      await session.close();
+    }
+  } catch (err) {
+    // Graceful: never fail the test run because healing couldn't connect.
+    // eslint-disable-next-line no-console
+    console.warn(`[db-heal] skipped (${err instanceof Error ? err.message.split('\n')[0] : String(err)})`);
+  } finally {
+    await driver?.close();
+  }
+  return result;
+}
diff --git a/tests/helpers/seedGraph.ts b/tests/helpers/seedGraph.ts
@@ -1,4 +1,5 @@
 import { Page } from '@playwright/test';
+import { TEST_GRAPH_PREFIX } from './dbHealing';
 
 /**
  * Seeds realistically-shaped graphs of arbitrary size through the real GraphQL
@@ -64,7 +65,7 @@ export async function seedLargeGraph(page: Page, opts: SeedOptions): Promise<See
   const g = await gql(
     page,
     `mutation($input: [GraphCreateInput!]!) { createGraphs(input: $input) { graphs { id } } }`,
-    { input: [{ name: `${namePrefix} ${size}n ${Date.now()}`, type: 'PROJECT', status: 'ACTIVE', createdBy: userId, isShared: true }] }
+    { input: [{ name: `${TEST_GRAPH_PREFIX} ${namePrefix} ${size}n ${Date.now()}`, type: 'PROJECT', status: 'ACTIVE', createdBy: userId, isShared: true }] }
   );
   const graphId = g.createGraphs.graphs[0].id as string;
 

diff --git a/tests/perf/scale-sweep.spec.ts b/tests/perf/scale-sweep.spec.ts
@@ -3,6 +3,7 @@ import * as fs from 'fs';
 import * as path from 'path';
 import { login, TEST_USERS } from '../helpers/auth';
 import { seedLargeGraph, deleteGraphDeep } from '../helpers/seedGraph';
+import { sweepTestData } from '../helpers/dbHealing';
 import '../helpers/testEnv';
 import { envIntList, envList } from '../helpers/testEnv';
 
@@ -193,6 +194,12 @@ async function measure(page: Page, graphId: string, size: number, quality: strin
 test.describe('large-scale graph perf sweep @scale', () => {
   test.describe.configure({ mode: 'serial', timeout: 600_000 });
 
+  // Self-heal: clear leftover test graphs + orphans from any prior interrupted
+  // run before starting, and clean up this run afterward even if a per-run
+  // delete was skipped (e.g. a killed run).
+  test.beforeAll(async () => { await sweepTestData('scale:before'); });
+  test.afterAll(async () => { await sweepTestData('scale:after'); });
+
   for (const size of SIZES) {
     test(`sweep ${size} nodes`, async ({ page }) => {
       await login(page, TEST_USERS.ADMIN);