From 60add5dc8b1095d8fbbc4449f90301531b3fa9c4 Mon Sep 17 00:00:00 2001
From: Laith Al-Saadoon <alsaadoonlaith@gmail.com>
Date: Mon, 22 Jun 2026 21:38:57 +0000
Subject: [PATCH] =?UTF-8?q?feat(ingestion):=20business-logic=20phase=20?=
 =?UTF-8?q?=E2=80=94=20write=20likely=5Fplumbing=20+=20candidate=5Fbusines?=
 =?UTF-8?q?s=20into=20the=20graph?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wires the merged @opencodehub/analysis sieve kernels into `codehub analyze`.
A new `businessLogicPhase` (after `complexity`) slices each Function / Method /
Constructor / Class / Interface / Struct body, computes the deterministic
PlumbingFeatures vector, runs classifyPlumbing + classifyBusinessCandidate, and
tags the node with `likelyPlumbing` + `candidateBusiness`. The tags land in
`nodes.payload` (queryable via `payload->>'$.candidateBusiness'`), so the user
gets both concern tags from `codehub analyze` with no query, no labels, no
embeddings.

Components:
- core-types: two optional `CallableShape` fields (likelyPlumbing /
  candidateBusiness). Auto-persist through nodes.payload; no adapter change.
- extract/business-logic-features.ts: faithful Python→TS port of the feature
  extractor (computePlumbingFeatures), reproducing the marker logic — word-
  boundary / camelCase-component matching, the exact n_plumbing_signals formula
  (serialization + observability + getter/setter + dto-mapper-ratio≥0.5), and
  the ORM-base class-head detection. 44 unit tests.
- pipeline/phases/business-logic.ts: the analyze-time phase. Python/Java/Go
  only (the sieve's validated set); other languages skip silently. Class-head
  slice scans upward over a Javadoc block to reach the real `@Entity` /
  `@MappedSuperclass` annotation while excluding `@author`-style comment tags.
- default-set: registered after complexity; orchestrator test updated for the
  new topological position.

PARITY GATE (the contract): the TS analyze-pass verdicts match the Python
oracle 1368/1368 = 100.0% per-symbol across all four corpus repos
(py-cosmic-ddd / py-flask / java-petclinic / go-clean), independently
re-verified — so the shipped 0.936 plumbing precision / 0.925 business recall
hold through the port. A JPA-entity divergence (Javadoc @author shadowing the
ORM annotation) was caught by the gate at 99.63% and fixed to reach 100%.

Verified: core-types/analysis/ingestion typecheck clean; ingestion 629/629,
core-types 83/83, analysis 14/14; biome + banned-strings pass.
---
 packages/core-types/src/nodes.ts              |   17 +
 .../extract/business-logic-features.test.ts   |  371 +++++
 .../src/extract/business-logic-features.ts    | 1440 +++++++++++++++++
 .../src/pipeline/orchestrator.test.ts         |    4 +
 .../src/pipeline/phases/business-logic.ts     |  298 ++++
 .../src/pipeline/phases/default-set.ts        |    8 +
 6 files changed, 2138 insertions(+)
 create mode 100644 packages/ingestion/src/extract/business-logic-features.test.ts
 create mode 100644 packages/ingestion/src/extract/business-logic-features.ts
 create mode 100644 packages/ingestion/src/pipeline/phases/business-logic.ts

diff --git a/packages/core-types/src/nodes.ts b/packages/core-types/src/nodes.ts
index 2314025b..cfb91534 100644
--- a/packages/core-types/src/nodes.ts
+++ b/packages/core-types/src/nodes.ts
@@ -138,6 +138,23 @@ interface CallableShape {
    * supplied; absent otherwise.
    */
   readonly coveredLinesJson?: string;
+  /**
+   * Advisory concern tag from the `business-logic` phase. `true` when the
+   * deterministic sieve is confident the symbol is plumbing (serialization,
+   * DTO mapping, transport, DI wiring). Absent when the language is outside
+   * the validated set (python/java/go) or the phase did not run. Measured
+   * plumbing precision ~0.94. See `@opencodehub/analysis` `classifyPlumbing`.
+   */
+  readonly likelyPlumbing?: boolean;
+  /**
+   * Advisory concern tag from the `business-logic` phase — the recall-first
+   * complement of {@link likelyPlumbing}. `true` when the symbol is a
+   * business-logic candidate (everything the sieve did NOT classify as
+   * plumbing). High recall (~0.93), low precision by design: a "look here for
+   * domain logic" hint, not a confirmed classification. Absent when the phase
+   * did not run for this symbol's language.
+   */
+  readonly candidateBusiness?: boolean;
 }
 
 interface TypeDeclShape {
diff --git a/packages/ingestion/src/extract/business-logic-features.test.ts b/packages/ingestion/src/extract/business-logic-features.test.ts
new file mode 100644
index 00000000..473afc38
--- /dev/null
+++ b/packages/ingestion/src/extract/business-logic-features.test.ts
@@ -0,0 +1,371 @@
+/**
+ * Parity tests for the business-logic feature extractor against the Python
+ * oracle `och_bizlogic_extract.py`. Each test fixes the expected feature value
+ * to the value the Python emits for the same snippet (captured by running the
+ * Python `_extract_one` on each case), then asserts the TS port reproduces it.
+ *
+ * The four fields under test are exactly the ones the merged kernel
+ * (`@opencodehub/analysis` `classifyPlumbing`) consumes, so a passing suite
+ * means the shipped sieve verdict agrees with the Python substrate.
+ */
+
+import assert from "node:assert/strict";
+import { test } from "node:test";
+import { computePlumbingFeatures } from "./business-logic-features.js";
+
+function py(bodyText: string, symbolName: string, kind = "Function", classHeadText?: string) {
+  return computePlumbingFeatures({
+    symbolName,
+    kind,
+    bodyText,
+    lang: "python",
+    ...(classHeadText !== undefined ? { classHeadText } : {}),
+  });
+}
+
+// ── serialization calls ─────────────────────────────────────────────────────
+
+test("serialization: json.dumps(...) counts one serialization call", () => {
+  const f = py("def to_wire(self):\n    return json.dumps(self.payload)\n", "to_wire");
+  assert.equal(f.nSerializationCalls, 1);
+});
+
+test("serialization: self.model_dump() counts one serialization call", () => {
+  const f = py("def out(self):\n    return self.model_dump()\n", "out");
+  assert.equal(f.nSerializationCalls, 1);
+});
+
+test("serialization: nested log.info(json.dumps(x)) — ser=2, observ folds into plumbing", () => {
+  // Oracle: outer call head contains json/dumps (+1 ser) AND info/log (+1 observ);
+  // inner json.dumps(...) (+1 ser). So nSerializationCalls = 2.
+  const f = py("def emit(self):\n    log.info(json.dumps(self.x))\n", "emit");
+  assert.equal(f.nSerializationCalls, 2);
+  // observ call present → plumbing signal ≥ 1.
+  assert.ok(f.nPlumbingSignals >= 1);
+});
+
+test("serialization: a non-serializer call counts zero", () => {
+  const f = py("def go(self):\n    self.do_work(self.x)\n", "go");
+  assert.equal(f.nSerializationCalls, 0);
+});
+
+// ── domain conditionals vs guards ───────────────────────────────────────────
+
+test("domain conditional: value comparison is a domain signal", () => {
+  const f = py(
+    "def check(self, amount):\n    if amount > self.limit:\n        return True\n",
+    "check",
+  );
+  assert.equal(f.nDomainSignals, 1);
+});
+
+test("guard: `if x is None` is NOT a domain conditional", () => {
+  const f = py("def check(self, x):\n    if x is None:\n        return 0\n    return x\n", "check");
+  assert.equal(f.nDomainSignals, 0);
+});
+
+test("guard: `if isinstance(x, int)` is NOT a domain conditional", () => {
+  const f = py("def f(self, x):\n    if isinstance(x, int):\n        pass\n", "f");
+  assert.equal(f.nDomainSignals, 0);
+});
+
+test("guard: `if len(x) > 0` is NOT a domain conditional (len is a guard token)", () => {
+  const f = py("def f(self, x):\n    if len(x) > 0:\n        pass\n", "f");
+  assert.equal(f.nDomainSignals, 0);
+});
+
+test("conditional: elif does NOT add a second conditional (elif is a separate node)", () => {
+  const f = py(
+    "def f(self, x):\n    if x > 5:\n        pass\n    elif x < 2:\n        pass\n",
+    "f",
+  );
+  assert.equal(f.nDomainSignals, 1);
+});
+
+test("conditional: two separate if statements count two", () => {
+  const f = py("def f(self, x):\n    if x > 5:\n        pass\n    if x < 2:\n        pass\n", "f");
+  assert.equal(f.nDomainSignals, 2);
+});
+
+// ── arithmetic ───────────────────────────────────────────────────────────────
+
+test("arithmetic: `a + b` is one domain signal", () => {
+  const f = py("def f(self, a, b):\n    return a + b\n", "f");
+  assert.equal(f.nDomainSignals, 1);
+});
+
+test("arithmetic: `a + b * 2` is two domain signals (two binary ops)", () => {
+  const f = py("def total(self, a, b):\n    return a + b * 2\n", "total");
+  assert.equal(f.nDomainSignals, 2);
+});
+
+test("arithmetic: a bare comparison `a > b` is NOT arithmetic", () => {
+  const f = py("def f(self, a, b):\n    return a > b\n", "f");
+  assert.equal(f.nDomainSignals, 0);
+});
+
+test("arithmetic: augmented assignment `-=` is NOT counted as arithmetic", () => {
+  // Oracle: `self.balance_due -= amount` → n_arithmetic_ops = 0.
+  const f = py("def f(self):\n    self.balance_due -= amount\n", "pay");
+  assert.equal(f.nDomainSignals, 0);
+});
+
+// ── domain exceptions vs stdlib ─────────────────────────────────────────────
+
+test("domain exception: raising InsufficientFundsError is a domain signal", () => {
+  const f = py(
+    "def withdraw(self, amount):\n    if amount > self.balance:\n        raise InsufficientFundsError(amount)\n",
+    "withdraw",
+  );
+  // 1 conditional (amount > balance) + 1 domain exception = 2.
+  assert.equal(f.nDomainSignals, 2);
+});
+
+test("stdlib exception: raising ValueError is NOT a domain signal", () => {
+  const f = py("def parse(self, x):\n    raise ValueError('bad')\n", "do_parse");
+  assert.equal(f.nDomainSignals, 0);
+});
+
+test("domain exceptions: two distinct domain raises count two", () => {
+  const f = py(
+    "def f(self):\n    raise PaymentDeclinedError('x')\n    raise OrderConflict('y')\n",
+    "f",
+  );
+  assert.equal(f.nDomainSignals, 2);
+});
+
+// ── state transitions ───────────────────────────────────────────────────────
+
+test("state transition: assigning self.status is a domain signal", () => {
+  const f = py("def advance(self):\n    self.status = self.next_status\n", "advance", "Method");
+  assert.equal(f.nDomainSignals, 1);
+});
+
+test("state transition: assigning self.state (no RHS dot) is a domain signal", () => {
+  const f = py("def advance(self):\n    self.state = 5\n", "advance", "Method");
+  assert.equal(f.nDomainSignals, 1);
+});
+
+// ── qualified persistence / raw-SQL / bootstrap are NOT in nPlumbingSignals ──
+//
+// The shipped kernel reads ONLY nSerializationCalls / nDomainSignals /
+// nPlumbingSignals / isOrmModel, and the Python `n_plumbing_signals` (lines
+// 767-769) is composed EXACTLY as
+//   n_serialization_calls + n_observ_calls + (is_getter_setter?1:0)
+//                                          + (dto_mapper_ratio>=0.5?1:0).
+// Qualified-persistence / raw-SQL / bootstrap-name feed the Python's
+// `touches_persistence` / `is_framework_bootstrap` fields, which the kernel does
+// NOT consume — so they do NOT enter nPlumbingSignals. The expected values below
+// were captured by running the Python oracle `_extract_one` on each snippet.
+
+test("persistence: session.execute(...) is NOT in nPlumbingSignals (feeds touches_persistence)", () => {
+  // Oracle: ser=0 obs=0 gs=False dto=0 → n_plumbing_signals = 0.
+  const f = py("def save(self):\n    self.session.execute('foo')\n    db.commit()\n", "save");
+  assert.equal(f.nPlumbingSignals, 0);
+  assert.equal(f.nDomainSignals, 0);
+});
+
+test("persistence: a BARE verb `update(self.x)` is NOT a plumbing signal", () => {
+  const f = py("def thing(self):\n    update(self.x)\n", "thing");
+  assert.equal(f.nPlumbingSignals, 0);
+});
+
+test("persistence: `self.repo.get(ref)` does NOT enter nPlumbingSignals (qualified persistence is excluded)", () => {
+  // Oracle: ser=0 obs=0 gs=False (has a call) dto=0 → n_plumbing_signals = 0.
+  const f = py("def fetch(self, ref):\n    return self.repo.get(ref)\n", "fetch");
+  assert.equal(f.nPlumbingSignals, 0);
+});
+
+test("persistence: Flask web `session.get('_flashes')` is dict access, NOT persistence", () => {
+  const f = py(
+    "def flash(message):\n    flashes = session.get('_flashes', [])\n    return flashes\n",
+    "do_flash",
+  );
+  assert.equal(f.nPlumbingSignals, 0);
+});
+
+test("persistence: `context.update(...)` is NOT persistence (ctx is not a DB receiver)", () => {
+  const f = py(
+    "def update_template_context(self, context):\n    context.update(self.dispatch(name))\n    return context\n",
+    "update_template_context",
+  );
+  assert.equal(f.nPlumbingSignals, 0);
+});
+
+// ── raw SQL is NOT in nPlumbingSignals (feeds touches_persistence) ───────────
+
+test("raw SQL: SELECT ... FROM does NOT enter nPlumbingSignals", () => {
+  // Oracle: raw-SQL feeds touches_persistence, not n_plumbing_signals → 0.
+  const f = py("def q(self):\n    cur.execute('SELECT id FROM users WHERE x = 1')\n", "q");
+  assert.equal(f.nPlumbingSignals, 0);
+});
+
+test("raw SQL: INSERT INTO does NOT enter nPlumbingSignals", () => {
+  // Oracle: raw-SQL feeds touches_persistence, not n_plumbing_signals → 0.
+  const f = py('def do_add(self):\n    run("INSERT INTO user (name) VALUES (?)")\n', "do_add");
+  assert.equal(f.nPlumbingSignals, 0);
+});
+
+// ── observability ────────────────────────────────────────────────────────────
+
+test("observability: logger.info(...) is a plumbing signal", () => {
+  const f = py("def run(self):\n    logger.info('hi')\n", "run");
+  assert.ok(f.nPlumbingSignals >= 1);
+  assert.equal(f.nDomainSignals, 0);
+});
+
+// ── bootstrap name is NOT in nPlumbingSignals (feeds is_framework_bootstrap) ──
+
+test("bootstrap: create_app does NOT enter nPlumbingSignals (bootstrap is excluded)", () => {
+  // Oracle: ser=0 obs=0 gs=False (loc 4, has calls) dto=0 → n_plumbing_signals = 0.
+  const f = py(
+    "def create_app(config=None):\n    app = Flask(__name__)\n    app.config.update(config or {})\n    return app\n",
+    "create_app",
+  );
+  assert.equal(f.nPlumbingSignals, 0);
+});
+
+test("getter/setter: `to_wire` is a plumbing signal because it is a getter/setter", () => {
+  // Oracle: is_getter_setter == True (loc 2, no conditionals, <=1 return, 0 calls)
+  // → n_plumbing_signals = 1. (The old `wire` bootstrap path is NOT why it fires.)
+  const f = py("def to_wire(self):\n    return self.x\n", "to_wire");
+  assert.equal(f.nPlumbingSignals, 1);
+});
+
+test("getter/setter: a tiny pass-through `allocate` IS a getter/setter (loc<=3, no cond, <=1 return, 0 calls)", () => {
+  // Oracle: gs=True → n_plumbing_signals = 1. A plain bootstrap NAME never enters
+  // the formula, but this tiny pass-through trips the getter/setter tell.
+  const f = py("def allocate(self):\n    return self.x\n", "allocate");
+  assert.equal(f.nPlumbingSignals, 1);
+});
+
+test("bootstrap WITH domain residue: register_payment carries domain residue and no plumbing tell", () => {
+  // Oracle: n_domain_signals = 2 (the `amount > balance_due` conditional + the
+  // raised OverpaymentError). bootstrap-name does NOT enter n_plumbing_signals,
+  // and there is no serializer / observ / getter-setter / dto tell → plumb = 0.
+  const f = py(
+    "def register_payment(self, amount):\n    if amount > self.balance_due:\n        raise OverpaymentError(amount)\n    self.balance_due -= amount\n",
+    "register_payment",
+    "Method",
+  );
+  assert.equal(f.nDomainSignals, 2);
+  assert.equal(f.nPlumbingSignals, 0);
+});
+
+// ── ORM base-class match vs flask false positive ────────────────────────────
+
+test("ORM: class User(Base) is an ORM model (exact base superclass)", () => {
+  const f = py("pass\n", "User", "Class", "class User(Base):");
+  assert.equal(f.isOrmModel, true);
+});
+
+test("ORM: class Order(Model) is an ORM model", () => {
+  const f = py("pass\n", "Order", "Class", "class Order(Model):");
+  assert.equal(f.isOrmModel, true);
+});
+
+test("ORM false positive guard: class Request(RequestBase) is NOT an ORM model", () => {
+  // The precision fix: `Base` matches ONLY as an exact superclass identifier,
+  // never as a component of `RequestBase`.
+  const f = py("pass\n", "Request", "Class", "class Request(RequestBase):");
+  assert.equal(f.isOrmModel, false);
+});
+
+test("ORM: UserEntity (component role in the name) is an ORM model", () => {
+  const f = py("pass\n", "UserEntity", "Class", "class UserEntity:");
+  assert.equal(f.isOrmModel, true);
+});
+
+test("ORM: pydantic BaseModel base is dropped — class UserDTO(BaseModel) is NOT an ORM model", () => {
+  const f = py("pass\n", "UserDTO", "Class", "class UserDTO(BaseModel):");
+  assert.equal(f.isOrmModel, false);
+});
+
+test("ORM: AbstractRepository(abc.ABC) is infra plumbing, NOT an ORM model", () => {
+  // Repository is an infra ROLE component → is_orm_model is False (it is
+  // plumbing, but not a mapped entity).
+  const f = py(
+    "def add(self, p):\n    self._add(p)\n",
+    "AbstractRepository",
+    "Class",
+    "class AbstractRepository(abc.ABC):",
+  );
+  assert.equal(f.isOrmModel, false);
+});
+
+// ── Java / Go class-head + persistence parity ───────────────────────────────
+
+test("java ORM: @Entity class Owner extends BaseEntity is an ORM model", () => {
+  const f = computePlumbingFeatures({
+    symbolName: "Owner",
+    kind: "Class",
+    bodyText: "",
+    classHeadText: "@Entity\nclass Owner extends BaseEntity",
+    lang: "java",
+  });
+  assert.equal(f.isOrmModel, true);
+});
+
+test("java infra: interface OwnerRepository extends JpaRepository is NOT an ORM model", () => {
+  const f = computePlumbingFeatures({
+    symbolName: "OwnerRepository",
+    kind: "Class",
+    bodyText: "",
+    classHeadText: "interface OwnerRepository extends JpaRepository<Owner, Integer>",
+    lang: "java",
+  });
+  assert.equal(f.isOrmModel, false);
+});
+
+test("java persistence: em.persist(entity) does NOT enter nPlumbingSignals", () => {
+  // Oracle: qualified persistence feeds touches_persistence, not the kernel's
+  // n_plumbing_signals (ser=0 obs=0 gs=False dto=0) → 0.
+  const f = computePlumbingFeatures({
+    symbolName: "save",
+    kind: "Method",
+    bodyText: "void save() {\n    em.persist(entity);\n}",
+    lang: "java",
+  });
+  assert.equal(f.nPlumbingSignals, 0);
+});
+
+test("go persistence: uc.repo.Store(ctx, &task) does NOT enter nPlumbingSignals", () => {
+  // Oracle: qualified persistence feeds touches_persistence, not the kernel's
+  // n_plumbing_signals (ser=0 obs=0 gs=False dto=0) → 0.
+  const f = computePlumbingFeatures({
+    symbolName: "Create",
+    kind: "Method",
+    bodyText:
+      "func (uc *UseCase) Create(ctx context.Context) error {\n    err := uc.repo.Store(ctx, &task)\n    return err\n}",
+    lang: "go",
+  });
+  assert.equal(f.nPlumbingSignals, 0);
+});
+
+test("go: no raise/throw, so a domain-exception scan is zero", () => {
+  const f = computePlumbingFeatures({
+    symbolName: "Run",
+    kind: "Function",
+    bodyText: "func Run() {\n    panic(MyDomainError{})\n}",
+    lang: "go",
+  });
+  // Go has no raise node — domain exceptions are 0 regardless of the name.
+  assert.equal(f.nDomainSignals, 0);
+});
+
+// ── end-to-end kernel agreement spot-check ──────────────────────────────────
+
+test("kernel agreement: a pure serializer is swept (ser>0, domain=0)", () => {
+  const f = py("def to_wire(self):\n    return json.dumps(self.payload)\n", "marshal_out");
+  assert.equal(f.nSerializationCalls, 1);
+  assert.equal(f.nDomainSignals, 0);
+});
+
+test("kernel agreement: a domain method is NOT swept (domain>0)", () => {
+  const f = py(
+    "def allocate(self, line):\n    if self.can_allocate(line):\n        return line.qty * self.unit_price\n",
+    "allocate",
+    "Method",
+  );
+  assert.ok(f.nDomainSignals > 0);
+});
diff --git a/packages/ingestion/src/extract/business-logic-features.ts b/packages/ingestion/src/extract/business-logic-features.ts
new file mode 100644
index 00000000..172698ca
--- /dev/null
+++ b/packages/ingestion/src/extract/business-logic-features.ts
@@ -0,0 +1,1440 @@
+/**
+ * Business-logic / plumbing FEATURE EXTRACTOR — the companion producer for the
+ * already-shipped `classifyPlumbing` sieve in `@opencodehub/analysis`
+ * (`packages/analysis/src/business-logic.ts`).
+ *
+ * This is a faithful TypeScript port of the deterministic feature-derivation
+ * half of `och_bizlogic_extract.py` (the "student substrate" extractor). The
+ * shipped sieve numbers — 0.936 plumbing precision, 0.925 business recall —
+ * depend on reproducing the Python marker logic EXACTLY, so this module copies
+ * the Python's marker sets, word-boundary matching, qualified-call gating, and
+ * camelCase component rules verbatim. It is a precision port, not a
+ * reimagining: where the Python uses a word-boundary / component / exact match,
+ * this module uses the SAME shape and never falls back to loose substring
+ * matching (the substring path is what produced flask's 110 false-persistence
+ * hits; the precise path cut it to 9).
+ *
+ * ## Interface contract
+ *
+ * The kernel consumes four fields. This extractor computes exactly those four
+ * from the symbol's source text:
+ *
+ *   - `nSerializationCalls` — count of calls whose method/head word matches
+ *     {@link SERIALIZATION_MARKERS} (word-boundary, per call).
+ *   - `nDomainSignals` — POSITIVE domain residue: domain conditionals
+ *     (non-guard `if`) + arithmetic operators + raised domain exceptions +
+ *     state-machine transitions. Mirrors the Python iter-4
+ *     `n_domain_signals = n_domain_conditionals + n_arithmetic_ops +
+ *     n_domain_exceptions + n_state_transitions`.
+ *   - `nPlumbingSignals` — NEGATIVE plumbing tells, composed EXACTLY as the
+ *     Python `n_plumbing_signals` (och_bizlogic_extract.py lines 767-769):
+ *     `n_serialization_calls + n_observ_calls + (is_getter_setter ? 1 : 0) +
+ *     (dto_mapper_ratio >= 0.5 ? 1 : 0)`. It deliberately does NOT include
+ *     qualified-persistence, raw-SQL, or bootstrap — those feed the Python's
+ *     `touches_persistence` / `is_framework_bootstrap` fields, which the shipped
+ *     kernel does NOT read.
+ *   - `isOrmModel` — class-head ORM-base match (exact base superclass OR
+ *     camelCase component role).
+ *
+ * ## Text vs. tree
+ *
+ * The Python mixes tree-walking (if / raise / binary-op / assignment / call
+ * nodes) with text-and-regex markers applied to each node's text. This port
+ * receives the symbol's body as TEXT (`bodyText`) — there is no parser handle
+ * in the signature, and the function is pure/sync. So the tree-structural
+ * features (conditionals, arithmetic, exceptions, transitions, call
+ * enumeration) are reproduced with line/token scanning that matches the
+ * Python's per-node matchers. The marker / qualified-call / raw-SQL / camelCase
+ * helpers are ported VERBATIM from the Python regexes because they are already
+ * text-and-regex in the source. See the divergence notes inline.
+ *
+ * Determinism: pure function of its arguments, no I/O, no randomness, no shared
+ * mutable state. Safe to call at parse time alongside `cyclomaticComplexity`.
+ */
+
+export interface ComputePlumbingFeaturesArgs {
+  readonly symbolName: string;
+  /** "Function" | "Method" | "Class" | ... — only "Class" (any case) takes the class-head path. */
+  readonly kind: string;
+  /** Source text of the symbol's body/subtree. */
+  readonly bodyText: string;
+  /** For Class kinds: the head line(s) up to the first `{` or `:` (the base list). */
+  readonly classHeadText?: string;
+  readonly lang: "python" | "java" | "go";
+}
+
+export interface PlumbingFeatureCounts {
+  readonly nSerializationCalls: number;
+  /** Positive domain signals: domain conditionals + arithmetic + domain exceptions + state transitions. */
+  readonly nDomainSignals: number;
+  /**
+   * Negative plumbing signals, composed EXACTLY as Python `n_plumbing_signals`:
+   * `n_serialization_calls + n_observ_calls + (is_getter_setter ? 1 : 0) +
+   * (dto_mapper_ratio >= 0.5 ? 1 : 0)`.
+   */
+  readonly nPlumbingSignals: number;
+  readonly isOrmModel: boolean;
+}
+
+// ── marker sets (ported verbatim from och_bizlogic_extract.py) ──────────────
+
+/** Serialization markers (plumbing). */
+const SERIALIZATION_MARKERS: ReadonlySet<string> = new Set([
+  "dumps",
+  "loads",
+  "model_dump",
+  "dict",
+  "to_dict",
+  "from_dict",
+  "json",
+  "serialize",
+  "deserialize",
+  "Marshal",
+  "Unmarshal",
+  "parse",
+  "stringify",
+  "to_json",
+  "from_json",
+  "asdict",
+  "schema",
+  "encode",
+  "decode",
+  "ObjectMapper",
+  "writeValue",
+  "readValue",
+]);
+
+/** Logging / metrics / tracing markers (plumbing). */
+const OBSERV_MARKERS: ReadonlySet<string> = new Set([
+  "log",
+  "logger",
+  "logging",
+  "debug",
+  "info",
+  "warning",
+  "warn",
+  "error",
+  "exception",
+  "metric",
+  "counter",
+  "gauge",
+  "histogram",
+  "span",
+  "trace",
+  "emit",
+  "record",
+  "telemetry",
+  "println",
+  "printf",
+  "Print",
+  "Printf",
+  "Println",
+  "Sprintf",
+]);
+
+// ── ORM / persistence base classes (class-head matching only) ───────────────
+
+/**
+ * ORM declarative base — matched ONLY as an EXACT superclass identifier in the
+ * base list. Never a component/substring, so RequestBase / ResponseBase /
+ * BaseLoader do NOT count.
+ */
+const ORM_BASE_EXACT: ReadonlySet<string> = new Set([
+  "Base",
+  "declarative_base",
+  "Model",
+  "SQLModel",
+  "TortoiseModel",
+]);
+
+/** Unambiguous ORM-model role words — safe to match as a camelCase component. */
+const ORM_BASE_COMPONENT: readonly string[] = [
+  "Entity",
+  "Document",
+  "Table",
+  "AbstractEntity",
+  "AbstractPersistable",
+  "PanacheEntity",
+];
+
+// NOTE: the Python's INFRA_ROLE_COMPONENT (Repository/UnitOfWork/DAO/Mapper/…)
+// is intentionally NOT ported here. It only feeds the Python's
+// `touches_persistence` flag and forces `is_orm_model = False` — neither of
+// which is one of the four fields the shipped kernel reads (nSerializationCalls,
+// nDomainSignals, nPlumbingSignals, isOrmModel-true). An infra role never makes
+// a class an ORM model, so dropping it cannot change any kernel input.
+
+// ── guard / exception / state tokens ────────────────────────────────────────
+
+/** None/type guard predicates → NOT domain conditionals. */
+const GUARD_TOKENS: readonly string[] = [
+  "None",
+  "null",
+  "nil",
+  "isinstance",
+  "hasattr",
+  "getattr",
+  "type",
+  "instanceof",
+  "typeof",
+  "is None",
+  "is not None",
+  "== nil",
+  "!= nil",
+  "undefined",
+  "len",
+  "empty",
+  "isEmpty",
+  "== null",
+  "!= null",
+];
+
+/** Domain-exception heuristic: a raised class whose name ends in these. */
+const DOMAIN_EXC_SUFFIXES: readonly string[] = [
+  "Error",
+  "Exception",
+  "Invalid",
+  "Denied",
+  "NotAllowed",
+  "Violation",
+  "Conflict",
+  "Forbidden",
+  "Unauthorized",
+];
+
+/** Stdlib / framework errors that are plumbing, not domain. */
+const STDLIB_EXC: ReadonlySet<string> = new Set([
+  "ValueError",
+  "TypeError",
+  "KeyError",
+  "IndexError",
+  "AttributeError",
+  "RuntimeError",
+  "NotImplementedError",
+  "StopIteration",
+  "OSError",
+  "IOError",
+  "Exception",
+  "BaseException",
+  "Error",
+  "AssertionError",
+  "ImportError",
+  "FileNotFoundError",
+  "NullPointerException",
+  "IllegalArgumentException",
+  "IllegalStateException",
+  "RuntimeException",
+]);
+
+/** Status/state field names → state-machine transition signal. */
+const STATE_FIELD_TOKENS: readonly string[] = [
+  "status",
+  "state",
+  "phase",
+  "stage",
+  "step",
+  "mode",
+  "kind",
+  "level",
+  "tier",
+  "verdict",
+];
+
+// ── ported regex primitives (verbatim from the Python) ──────────────────────
+
+const IDENT_RE = /[A-Za-z_][A-Za-z0-9_]*/g;
+/** camelCase / PascalCase component splitter (Python `_CAMEL_RE`). */
+const CAMEL_RE = /[A-Z]+(?![a-z])|[A-Z][a-z0-9]*|[a-z0-9]+/g;
+
+/** Escape a marker for use inside a `\b...\b` RegExp (matches Python `re.escape`). */
+function escapeRegExp(s: string): string {
+  return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+}
+
+const WORD_RE_CACHE = new Map<string, RegExp>();
+function wordRe(marker: string): RegExp {
+  let rx = WORD_RE_CACHE.get(marker);
+  if (rx === undefined) {
+    rx = new RegExp(`\\b${escapeRegExp(marker)}\\b`, "i");
+    WORD_RE_CACHE.set(marker, rx);
+  }
+  return rx;
+}
+
+/**
+ * Word-boundary match (NOT substring). Returns true when any marker's whole-word
+ * form appears in `text`. Mirrors Python `marker_hit`. (We only need the boolean
+ * for per-call counting; the Python returns the marker string, but the caller
+ * only uses its truthiness.)
+ */
+function markerHit(text: string, markers: ReadonlySet<string>): boolean {
+  for (const m of markers) {
+    if (wordRe(m).test(text)) return true;
+  }
+  return false;
+}
+
+/** Python `_components`: lower-cased camelCase pieces of an identifier. */
+function components(ident: string): Set<string> {
+  const out = new Set<string>();
+  for (const m of ident.matchAll(CAMEL_RE)) out.add(m[0].toLowerCase());
+  return out;
+}
+
+/** Number of camelCase pieces in an identifier (Python `len(_CAMEL_RE.findall(b))`). */
+function camelPartCount(ident: string): number {
+  const m = ident.match(CAMEL_RE);
+  return m === null ? 0 : m.length;
+}
+
+/** All identifiers in a blob (Python `_IDENT_RE.findall`). */
+function findIdentifiers(text: string): string[] {
+  return [...text.matchAll(IDENT_RE)].map((m) => m[0]);
+}
+
+// NOTE: the Python `persistence_call_hit` / `raw_sql_hit` / `is_bootstrap_name`
+// helpers and their marker sets (PERSIST_*, RAW_SQL_PATTERNS, BOOTSTRAP_*,
+// QUALIFIED_CALL_RE, receiverTokens, AMBIGUOUS/DICT/CONTEXT/STRONG_RECEIVERS)
+// are intentionally NOT ported. They feed the Python's `touches_persistence`
+// and `is_framework_bootstrap` fields, neither of which is one of the four
+// fields the shipped kernel reads (nSerializationCalls, nDomainSignals,
+// nPlumbingSignals, isOrmModel). n_plumbing_signals is composed ONLY from
+// serialization + observability + getter/setter + dto-mapper-ratio (Python
+// lines 767-769), so qualified-persistence / raw-SQL / bootstrap never enter it.
+
+// ── guard / class-head (verbatim ports) ─────────────────────────────────────
+
+/** Python `is_guard_condition`. */
+function isGuardCondition(condText: string): boolean {
+  for (const g of GUARD_TOKENS) {
+    if (isIdentifier(g)) {
+      if (wordRe(g).test(condText)) return true;
+    } else {
+      if (condText.includes(g)) return true;
+    }
+  }
+  return false;
+}
+
+/** True when `s` is a single Python-style identifier (matches `str.isidentifier`). */
+function isIdentifier(s: string): boolean {
+  return /^[A-Za-z_][A-Za-z0-9_]*$/.test(s);
+}
+
+/**
+ * Decide whether a class is an ORM-mapped model from its head alone. Port of the
+ * `is_orm_model` half of Python `class_head_persistence`. Returns true ONLY for
+ * the ORM-model shapes (exact ORM base OR component ORM role). Infra repo/DAO
+ * roles are NOT ORM models (they return `is_orm_model=False` in Python).
+ *
+ * @param className   the class's own declared name (e.g. `AbstractRepository`).
+ * @param baseIdents  identifiers in the superclass / implements list.
+ * @param annotBlob   decorator/annotation text (e.g. `@Entity @Table(...)`).
+ */
+function classHeadIsOrmModel(
+  className: string,
+  baseIdents: readonly string[],
+  annotBlob: string,
+): boolean {
+  const nameComps = components(className);
+  const baseComps = new Set<string>();
+  for (const b of baseIdents) {
+    for (const c of components(b)) baseComps.add(c);
+  }
+  const annotComps = new Set<string>();
+  for (const a of findIdentifiers(annotBlob)) {
+    for (const c of components(a)) annotComps.add(c);
+  }
+
+  // ORM model — exact base superclass (no component leakage).
+  for (const b of ORM_BASE_EXACT) {
+    if (baseIdents.includes(b)) return true;
+  }
+  // ORM model — component role in name / base / annotation.
+  for (const b of ORM_BASE_COMPONENT) {
+    const bc = b.toLowerCase();
+    if (camelPartCount(b) === 1) {
+      if (nameComps.has(bc) || baseComps.has(bc) || annotComps.has(bc)) return true;
+    } else {
+      // compound (AbstractEntity, PanacheEntity): substring of name/base
+      for (const ident of [className, ...baseIdents]) {
+        if (ident.toLowerCase().includes(bc)) return true;
+      }
+    }
+  }
+  return false;
+}
+
+// ── class-head base-list extraction ─────────────────────────────────────────
+
+/**
+ * Pull (className, baseIdents) from the class-head text. Mirrors the
+ * `class_name_and_bases` Python helper for the languages the sieve is validated
+ * on (python / java / go). The head text is everything up to the first `{` or
+ * `:` (Python) — the caller supplies it as `classHeadText`.
+ *
+ *   python : `class User(Base, Mixin)`  → bases = idents inside the parens
+ *   java   : `class Owner extends BaseEntity implements X` → bases after
+ *            extends/implements
+ *   go     : `type User struct` (no inheritance) → no bases
+ *
+ * The pydantic `BaseModel` base is dropped (a DTO base, NOT an ORM model),
+ * matching `base_idents = [b for b in base_idents if b != "BaseModel"]`.
+ */
+function classNameAndBases(
+  headText: string,
+  lang: "python" | "java" | "go",
+): { className: string; baseIdents: string[]; annotBlob: string } {
+  // Annotation/decorator lines (`@Entity`, `@dataclass`) precede the class
+  // keyword; split them off so they feed the annotation blob, not the bases.
+  const lines = headText.split("\n");
+  const annotLines: string[] = [];
+  const declLines: string[] = [];
+  for (const line of lines) {
+    if (line.trim().startsWith("@")) annotLines.push(line.trim());
+    else declLines.push(line);
+  }
+  const decl = declLines.join("\n");
+  const annotBlob = annotLines.join(" ");
+
+  let className = "";
+  let baseIdents: string[] = [];
+
+  if (lang === "python") {
+    // class Name(Base1, Base2):
+    const nameMatch = decl.match(/class\s+([A-Za-z_][A-Za-z0-9_]*)/);
+    className = nameMatch?.[1] ?? "";
+    const parenMatch = decl.match(/class\s+[A-Za-z_][A-Za-z0-9_]*\s*\(([\s\S]*?)\)/);
+    if (parenMatch?.[1] !== undefined) {
+      baseIdents = findIdentifiers(parenMatch[1]);
+    }
+  } else if (lang === "java") {
+    const nameMatch = decl.match(/(?:class|interface|record|enum)\s+([A-Za-z_$][A-Za-z0-9_$]*)/);
+    className = nameMatch?.[1] ?? "";
+    // Take identifiers after `extends` / `implements`, stripping generics.
+    const extMatch = decl.match(/\bextends\b([\s\S]*?)(?:\bimplements\b|$)/);
+    const implMatch = decl.match(/\bimplements\b([\s\S]*)$/);
+    const baseSrc = `${extMatch?.[1] ?? ""} ${implMatch?.[1] ?? ""}`;
+    baseIdents = findIdentifiers(baseSrc);
+  } else {
+    // go: `type Name struct {` / `type Name interface {` — no inheritance.
+    const nameMatch = decl.match(/type\s+([A-Za-z_][A-Za-z0-9_]*)/);
+    className = nameMatch?.[1] ?? "";
+    baseIdents = [];
+  }
+
+  // Strip the class's own name if it leaked into the base list, and drop the
+  // pydantic DTO base. Mirrors the two Python filters.
+  baseIdents = baseIdents.filter((b) => b.length > 0 && b !== className && b !== "BaseModel");
+  return { className, baseIdents, annotBlob };
+}
+
+// ── body call enumeration (text reproduction of per-call walk) ──────────────
+
+/**
+ * A call site recovered from `bodyText`: the index where the callee starts and
+ * the call's "head text" (callee start → end of that physical line, capped at
+ * 200 chars). This mirrors the Python per-call
+ * `head_text = node_text(call).split("\n", 1)[0][:200]`, where the tree-sitter
+ * call node text begins at the (possibly chained) callee. Each `(` preceded by
+ * an identifier / member chain is one call site, so nested calls on the same
+ * line are enumerated separately — matching the Python `walk`, which visits a
+ * nested `json.dumps(...)` inside `log.info(...)` as its own call node.
+ */
+interface CallSite {
+  /** Head text from the callee start to end-of-line, capped at 200 chars. */
+  readonly headText: string;
+  /** Full call text from the callee start to end-of-line (uncapped). For raw-SQL. */
+  readonly fullText: string;
+}
+
+/** Matches a call opener: an identifier / member chain immediately before `(`. */
+const CALL_OPENER_RE = /(?:[A-Za-z_$][\w$]*\s*\.\s*)*([A-Za-z_$][\w$]*)\s*\(/g;
+
+/**
+ * Control-flow / declaration keywords that take a `(` but are NOT call nodes in
+ * the AST (`if (...)`, `for (...)`, `func (...)`, …). Excluding them keeps the
+ * text call-scan from inflating `n_calls` — which would wrongly disqualify a
+ * getter/setter (the gate requires `n_calls == 0`). The set is the union across
+ * python / java / go; a keyword never collides with a real callee.
+ */
+const NON_CALL_KEYWORDS: ReadonlySet<string> = new Set([
+  "if",
+  "for",
+  "while",
+  "switch",
+  "catch",
+  "return",
+  "func",
+  "def",
+  "class",
+  "with",
+  "else",
+  "elif",
+  "case",
+  "select",
+  "defer",
+  "go",
+  "range",
+  "await",
+  "yield",
+  "in",
+  "and",
+  "or",
+  "not",
+  "new",
+]);
+
+/**
+ * Strip the leading definition header (`def f(...):` / `func (r *R) F(...) {` /
+ * `void f(...) {`) so the function's OWN name+params is not enumerated as a
+ * call and its signature `*`/`(` are not scanned as body code. The Python's AST
+ * counts call/binary/if nodes only inside the body; the signature is a separate
+ * parameter/type subtree. Returns the body with the header span blanked
+ * (length-preserving so line offsets stay aligned).
+ */
+function stripDefinitionHeader(bodyText: string, lang: "python" | "java" | "go"): string {
+  const chars = bodyText.split("");
+  const n = chars.length;
+  // Mask strings/comments while LOCATING the header terminator so a `:`/`{`/`;`
+  // inside a string or comment cannot false-terminate the header span.
+  const probe = maskStringsAndComments(bodyText);
+  // Skip leading decorator / annotation lines (`@Transactional(...)`,
+  // `@dataclass`) so the header terminator is found on the DECLARATION line,
+  // not at an annotation's `:`/`(`. Decorators are separate AST nodes.
+  let scanFrom = 0;
+  for (;;) {
+    let j = scanFrom;
+    while (j < n && (probe[j] === " " || probe[j] === "\t" || probe[j] === "\n")) j++;
+    if (probe[j] === "@") {
+      let k = j;
+      while (k < n && probe[k] !== "\n") k++;
+      scanFrom = k;
+    } else {
+      scanFrom = j;
+      break;
+    }
+  }
+  // The header ends at the FIRST depth-0 terminator from the declaration start:
+  //   - python: `:` (the suite opener). The param list `(...)` and any return
+  //     annotation sit at bracket-depth > 0 or after a `)`, so a parameter
+  //     type-hint `:` is depth-protected. `class X:` terminates immediately.
+  //   - java/go: `{` (block opener) or `;` (Java abstract method / Go signature
+  //     with no block). Composite-literal `{` would be at depth > 0.
+  // Scanning from the start (rather than from a param paren) is what keeps a
+  // parenthesis-free class header `class AppContext:` from over-stripping into
+  // the class body. If no depth-0 terminator is found, nothing is stripped.
+  const terminators: ReadonlySet<string> = lang === "python" ? new Set([":"]) : new Set(["{", ";"]);
+  let depth = 0;
+  let end = -1;
+  for (let i = scanFrom; i < n; i++) {
+    const c = probe[i] ?? "";
+    if (c === "(" || c === "[" || c === "{") {
+      // an opening brace IS a java/go header terminator at depth 0
+      if (depth === 0 && terminators.has(c)) {
+        end = i;
+        break;
+      }
+      depth += 1;
+    } else if (c === ")" || c === "]" || c === "}") {
+      if (depth > 0) depth -= 1;
+    } else if (depth === 0 && terminators.has(c)) {
+      end = i;
+      break;
+    }
+  }
+  if (end === -1) return bodyText;
+  for (let i = 0; i <= end && i < n; i++) {
+    if (chars[i] !== "\n") chars[i] = " ";
+  }
+  return chars.join("");
+}
+
+/** Definition keywords that, when they immediately precede a `name(`, mark it as
+ *  a function/method DEFINITION (not a call): `def name(` / `func name(` /
+ *  `fn name(`. The Python AST counts `function_definition` separately from
+ *  `call`, so a nested method def inside a class body must NOT be a call. */
+const DEF_KEYWORDS: ReadonlySet<string> = new Set(["def", "func", "fn"]);
+
+/**
+ * Enumerate call sites. Call OPENERS are found on `maskedBody` (strings &
+ * comments masked) so a phantom `ident (` chain inside a docstring/comment
+ * (e.g. reST `request body (access ``json``, …)`) does NOT manufacture a call —
+ * the Python only ever sees real `call` AST nodes. But the per-call HEAD TEXT is
+ * sliced from `rawBody` (length-aligned with `maskedBody`), because the Python's
+ * `marker_hit` runs on the call NODE text INCLUDING its string arguments — a Go
+ * `t.Fatal("… init error …")` is an observability call BECAUSE `error` appears
+ * in the string arg, so the head must be raw.
+ */
+function enumerateCallSites(maskedBody: string, rawBody: string): CallSite[] {
+  const sites: CallSite[] = [];
+  CALL_OPENER_RE.lastIndex = 0;
+  let m: RegExpExecArray | null = CALL_OPENER_RE.exec(maskedBody);
+  while (m !== null) {
+    const callee = m[1] ?? "";
+    if (!NON_CALL_KEYWORDS.has(callee) && !precededByDefKeyword(maskedBody, m.index)) {
+      const start = m.index;
+      const nl = rawBody.indexOf("\n", start);
+      const lineEnd = nl === -1 ? rawBody.length : nl;
+      const line = rawBody.slice(start, lineEnd);
+      sites.push({ headText: line.slice(0, 200), fullText: line });
+    }
+    m = CALL_OPENER_RE.exec(maskedBody);
+  }
+  return sites;
+}
+
+/** True when the identifier chain starting at `start` is a function/method
+ *  DEFINITION name or a decorator/annotation rather than a call site:
+ *    - preceded by a definition keyword (`def`/`func`/`fn`), OR
+ *    - the chain is a decorator/annotation invocation `@Name(...)` (the char
+ *      before the leading identifier is `@`).
+ *  Both are distinct AST nodes from `call`, so they must not be counted. */
+function precededByDefKeyword(text: string, start: number): boolean {
+  // Decorator / annotation: `@Name(` — the identifier is prefixed by `@`.
+  if (text[start - 1] === "@") return true;
+  let i = start - 1;
+  while (i >= 0 && (text[i] === " " || text[i] === "\t")) i--;
+  const end = i + 1;
+  while (i >= 0 && /[A-Za-z_$]/.test(text[i] ?? "")) i--;
+  const word = text.slice(i + 1, end);
+  return DEF_KEYWORDS.has(word);
+}
+
+// ── positive domain-signal scanners (text reproduction of the AST walk) ─────
+
+/**
+ * Mask string literals and comments so the STRUCTURAL scans (conditionals,
+ * arithmetic, exceptions) do not read an operator / `if` / `raise` keyword that
+ * lives inside a string or comment as code — the Python walks AST nodes, so a
+ * `+` inside `'a + b'` or an `if` inside a comment never appears as a structural
+ * node. Two distinct replacements, both length-preserving (so line offsets and
+ * the assignment line-scan stay aligned):
+ *
+ *   - comments → spaces (no operand left behind).
+ *   - string literals → a single operand SENTINEL `0` at the opening-quote
+ *     position, the remaining chars (including the closing quote) blanked.
+ *
+ * The sentinel is the key fidelity point: the Python AST treats a string literal
+ * as an OPERAND, so `name + "_dup"` is a `binary_expression` and counts as
+ * arithmetic. Blanking the whole literal to spaces would erase the right operand
+ * and miss the `+`; leaving the sentinel `0` keeps the operand boundary so the
+ * `+` is still recognised. Likewise `x == "active"` keeps its right operand.
+ *
+ * Handles Python/JS single & double quotes, backticks, triple quotes, and
+ * `#` / `//` line comments plus block comments.
+ *
+ * NOTE: masking is applied ONLY to the structural scans. Call enumeration and
+ * marker matching deliberately run on the RAW text, because the Python's
+ * per-call matchers run on the call NODE text, which INCLUDES string-literal
+ * arguments.
+ */
+function maskStringsAndComments(src: string): string {
+  const out = src.split("");
+  let i = 0;
+  const n = src.length;
+  const blank = (from: number, to: number) => {
+    for (let k = from; k < to && k < n; k++) {
+      if (out[k] !== "\n") out[k] = " ";
+    }
+  };
+  // Replace a string literal [from, to) with a single operand sentinel at `from`
+  // and spaces elsewhere (newlines preserved for line alignment).
+  const sentinelize = (from: number, to: number) => {
+    blank(from, to);
+    if (from < n && out[from] !== "\n") out[from] = "0";
+  };
+  while (i < n) {
+    const c = src[i] ?? "";
+    const c2 = src.slice(i, i + 2);
+    const c3 = src.slice(i, i + 3);
+    // line comments
+    if (c === "#" || c2 === "//") {
+      let j = i;
+      while (j < n && src[j] !== "\n") j++;
+      blank(i, j);
+      i = j;
+      continue;
+    }
+    // block comment
+    if (c2 === "/*") {
+      let j = i + 2;
+      while (j < n && src.slice(j, j + 2) !== "*/") j++;
+      const end = Math.min(n, j + 2);
+      blank(i, end);
+      i = end;
+      continue;
+    }
+    // triple-quoted strings
+    if (c3 === '"""' || c3 === "'''") {
+      const q = c3;
+      let j = i + 3;
+      while (j < n && src.slice(j, j + 3) !== q) j++;
+      const end = Math.min(n, j + 3);
+      sentinelize(i, end);
+      i = end;
+      continue;
+    }
+    // single/double/backtick strings (single line for ' and ", possibly multi
+    // for `; we stop at the matching unescaped quote or newline for '/")
+    if (c === '"' || c === "'" || c === "`") {
+      const q = c;
+      let j = i + 1;
+      while (j < n) {
+        const cj = src[j] ?? "";
+        if (cj === "\\") {
+          j += 2;
+          continue;
+        }
+        if (cj === q) break;
+        if ((q === '"' || q === "'") && cj === "\n") break;
+        j++;
+      }
+      const end = Math.min(n, j + 1);
+      // Python f-string (`f"...{expr}..."`, `rf"…"`): the `{expr}` interpolations
+      // are REAL code (the AST parses calls/operators inside them, e.g.
+      // `f"<{type(self).__name__}>"` is a call node). Preserve `{...}` spans and
+      // mask only the literal text. A non-f string is sentinelized whole.
+      const prefix = stringPrefix(src, i);
+      if (prefix.includes("f") && (q === '"' || q === "'")) {
+        sentinelizeFString(out, src, i, end);
+      } else {
+        sentinelize(i, end);
+      }
+      i = end;
+      continue;
+    }
+    i++;
+  }
+  return out.join("");
+}
+
+/** Lower-cased string-prefix letters immediately before the opening quote at
+ *  `quoteIdx` (`f`, `r`, `b`, `rf`, `fr`, …) — used to detect f-strings. */
+function stringPrefix(src: string, quoteIdx: number): string {
+  let i = quoteIdx - 1;
+  let out = "";
+  while (i >= 0 && /[A-Za-z]/.test(src[i] ?? "")) {
+    out = (src[i] ?? "").toLowerCase() + out;
+    i--;
+  }
+  // a prefix is at most a couple of letters and must abut the quote; longer runs
+  // are an adjacent identifier, not a string prefix.
+  return out.length <= 2 ? out : "";
+}
+
+/**
+ * Mask a Python f-string [from, to): blank the literal text but PRESERVE the
+ * `{expr}` interpolation contents (real code the AST parses) so embedded calls /
+ * operators are still scanned. `{{` / `}}` are escaped literal braces (blanked).
+ * Writes into `out` in place; length-preserving.
+ */
+function sentinelizeFString(out: string[], src: string, from: number, to: number): void {
+  const n = out.length;
+  const blankOne = (k: number) => {
+    if (k < n && out[k] !== "\n") out[k] = " ";
+  };
+  let sentinelPlaced = false;
+  let k = from;
+  while (k < to) {
+    const c = src[k] ?? "";
+    if (c === "{" && src[k + 1] === "{") {
+      blankOne(k);
+      blankOne(k + 1);
+      k += 2;
+      continue;
+    }
+    if (c === "}" && src[k + 1] === "}") {
+      blankOne(k);
+      blankOne(k + 1);
+      k += 2;
+      continue;
+    }
+    if (c === "{") {
+      // preserve the interpolation contents until the matching `}` (the `{`/`}`
+      // braces themselves become spaces; an inner `:` format-spec is left as-is,
+      // harmless for the scans). Place an operand sentinel for the literal run
+      // we just blanked so the f-string still reads as an operand if needed.
+      blankOne(k);
+      k += 1;
+      let depth = 1;
+      while (k < to && depth > 0) {
+        const cc = src[k] ?? "";
+        if (cc === "{") depth += 1;
+        else if (cc === "}") {
+          depth -= 1;
+          if (depth === 0) {
+            blankOne(k);
+            k += 1;
+            break;
+          }
+        }
+        // preserve interpolation code char (leave out[k] = src char already)
+        k += 1;
+      }
+      continue;
+    }
+    // literal char → blank, but keep a single operand sentinel for the whole
+    // string so `name + f"x"` style still has a right operand.
+    blankOne(k);
+    if (!sentinelPlaced && out[k] === " ") {
+      out[k] = "0";
+      sentinelPlaced = true;
+    }
+    k += 1;
+  }
+}
+
+/**
+ * Count domain conditionals. The Python counts each `if_statement` node whose
+ * condition is NOT a guard. `elif` clauses are a separate node type and are NOT
+ * counted; nested `if`s ARE (each is its own node). Text reproduction: scan for
+ * a statement-leading `if` keyword (word-boundary), extract its condition, and
+ * count when the condition is not a guard.
+ *
+ *   python : `if <cond>:`            (NOT `elif`)
+ *   java/go: `if <cond> {` / `if (<cond>)`
+ */
+/**
+ * Enumerate the condition text of every `if_statement` node, full and
+ * multi-line aware. `elif`/`else if` clauses are part of the SAME `if_statement`
+ * AST node, so they are NOT counted (the `\bif\b` boundary already excludes
+ * `elif`; `else if` would re-match but is rare in this corpus and a separate
+ * node in Java/Go — handled below).
+ *
+ *   python : `if <cond>:` — condition spans from `if` to the depth-0 `:` that
+ *            opens the suite; a parenthesised condition may span many lines
+ *            (`if (\n   a\n   and b\n):`). We read to that colon.
+ *   java/go: `if (<cond>)` / `if <cond> {` — read to the matched `)` (paren
+ *            form) or the opening `{` (brace form).
+ */
+function enumerateIfConditions(bodyText: string, lang: "python" | "java" | "go"): string[] {
+  const conds: string[] = [];
+  const n = bodyText.length;
+  if (lang === "python") {
+    const re = /(?:^|\n)[ \t]*if\b/g;
+    let m: RegExpExecArray | null = re.exec(bodyText);
+    while (m !== null) {
+      // Start scanning right after the matched `if`.
+      const ifEnd = m.index + m[0].length;
+      let depth = 0;
+      let end = -1;
+      for (let i = ifEnd; i < n; i++) {
+        const c = bodyText[i];
+        if (c === "(" || c === "[" || c === "{") depth += 1;
+        else if (c === ")" || c === "]" || c === "}") depth -= 1;
+        else if (c === ":" && depth === 0) {
+          end = i;
+          break;
+        }
+      }
+      const cond = bodyText.slice(ifEnd, end === -1 ? n : end).trim();
+      conds.push(cond);
+      re.lastIndex = end === -1 ? n : end;
+      m = re.exec(bodyText);
+    }
+  } else {
+    // java / go: `if` followed by either `(cond)` or a brace-form `cond {`.
+    const re = /\bif\b/g;
+    let m: RegExpExecArray | null = re.exec(bodyText);
+    while (m !== null) {
+      let i = m.index + 2;
+      while (i < n && (bodyText[i] === " " || bodyText[i] === "\t")) i++;
+      let cond = "";
+      if (bodyText[i] === "(") {
+        // paren form: read to the matched close paren.
+        let depth = 0;
+        const start = i + 1;
+        for (; i < n; i++) {
+          const c = bodyText[i];
+          if (c === "(") depth += 1;
+          else if (c === ")") {
+            depth -= 1;
+            if (depth === 0) break;
+          }
+        }
+        cond = bodyText.slice(start, i).trim();
+      } else {
+        // brace form (go `if cond {`): read to the opening brace or newline.
+        let j = i;
+        while (j < n && bodyText[j] !== "{" && bodyText[j] !== "\n") j++;
+        cond = bodyText.slice(i, j).trim();
+      }
+      conds.push(cond);
+      re.lastIndex = i + 1;
+      m = re.exec(bodyText);
+    }
+  }
+  return conds;
+}
+
+function countDomainConditionals(bodyText: string, lang: "python" | "java" | "go"): number {
+  let count = 0;
+  for (const cond of enumerateIfConditions(bodyText, lang)) {
+    if (cond.length > 0 && !isGuardCondition(cond)) count += 1;
+  }
+  return count;
+}
+
+/**
+ * Count arithmetic operators. The Python counts each binary-operator AST node
+ * whose text contains one of `+ - * / %`. Augmented assignments (`+=`, `-=`)
+ * are a DIFFERENT node type and do NOT count; comparison-only operators
+ * (`>`, `<`) do NOT count. Text reproduction: count occurrences of a binary
+ * arithmetic operator that is NOT part of an augmented-assignment / increment
+ * and is flanked by operands.
+ *
+ * DIVERGENCE (magnitude only, never the zero/non-zero boundary the kernel
+ * reads): the AST counts one node per binary expression, so `a + b * 2` is 2
+ * nodes. A flat text scan also finds 2 operators here, but deeply nested or
+ * unusual expressions can differ in COUNT. The boundary (≥1 arithmetic op ⇒
+ * count ≥ 1, none ⇒ 0) is preserved, which is all `nDomainSignals > 0` needs.
+ */
+function countArithmeticOps(bodyText: string): number {
+  let count = 0;
+  // A binary arithmetic operator: one of + - * / % that has an OPERAND on each
+  // side (the previous non-space char ends an operand AND the next begins one).
+  // This single gate rejects every non-binary use in one shot:
+  //   - augmented assignment `*=` (next is `=`, not an operand start);
+  //   - the 2nd char of a comparison/assign `>=`/`==`/`<=` (prev is `=<>!`);
+  //   - unary / pointer / splat `*T` `&x` `-x` `*args` `**kwargs` `{**d}` —
+  //     these sit in a PREFIX position whose previous non-space char is `(`, `,`,
+  //     `[`, `{`, `=`, `:`, an operator, or nothing, none of which is an operand
+  //     boundary, so the left-operand test fails.
+  // Doubled `**` (pow) / `//` (floordiv) are a single AST binary node when they
+  // ARE binary (`a ** b`), counted once; `++`/`--` never pass the operand gate
+  // in a way that double-counts because we advance past the pair.
+  const ops = new Set(["+", "-", "*", "/", "%"]);
+  for (let i = 0; i < bodyText.length; i++) {
+    const ch = bodyText[i] ?? "";
+    if (!ops.has(ch)) continue;
+    const next = bodyText[i + 1] ?? "";
+    const prev = bodyText[i - 1] ?? "";
+    // Skip the second char of a comparison/assign operator (`=*`, `<*`, …) and
+    // augmented assignment (`*=`).
+    if (prev === "=" || prev === "<" || prev === ">" || prev === "!") continue;
+    if (next === "=") continue;
+    // Go/C pointer-type / deref / splat / unary `*T`. Two non-binary shapes,
+    // both with the operand ATTACHED on the right (no space — `*pendingCall`,
+    // `*testing.T`, `*Task`, `*args`, `**kwargs`):
+    //   (a) a SPACE on the left   — `t *testing.T`, `chan *Foo`, `return *p`;
+    //   (b) a TYPE-bracket on the left `]`/`)`/`}` — `[]*Task`, `map[K]*V`,
+    //       `(*Foo)`, the idiomatic Go slice/map/cast pointer-to-type forms.
+    // Binary multiply is `a * b` (spaces both sides) or `a*b`/`x[i]*2` (a NUMBER
+    // or space on the right), none of which has an attached identifier-start
+    // right operand, so this skip leaves real arithmetic intact.
+    if (ch === "*") {
+      const attachedRightIdent = /[A-Za-z_$([&*]/.test(next);
+      const prefixLeft =
+        prev === " " || prev === "\t" || prev === "]" || prev === ")" || prev === "}";
+      if (attachedRightIdent && prefixLeft) continue;
+    }
+    // Doubled operators: `**`/`//` count once IF binary; `++`/`--` never. Decide
+    // by the operand gate on the PAIR (left of first char, right of second).
+    if ((ch === "+" || ch === "-" || ch === "*" || ch === "/") && next === ch) {
+      if (ch === "*" || ch === "/") {
+        const leftOk = isOperandBoundary(prevNonSpace(bodyText, i));
+        const rightOk = isOperandStart(nextNonSpace(bodyText, i + 1));
+        if (leftOk && rightOk) count += 1;
+      }
+      i += 1;
+      continue;
+    }
+    // Binary gate: operand-ending char on the left, operand-starting char on the
+    // right. The string sentinel `0` reads as an operand on either side, so
+    // `name + "lit"` (→ `name + 0`) counts; `(*T)` / `(**kw)` / `, *args` do not.
+    if (isOperandBoundary(prevNonSpace(bodyText, i)) && isOperandStart(nextNonSpace(bodyText, i))) {
+      count += 1;
+    }
+  }
+  return count;
+}
+
+function prevNonSpace(s: string, i: number): string {
+  for (let j = i - 1; j >= 0; j--) {
+    const c = s[j] ?? "";
+    if (c !== " " && c !== "\t") return c;
+  }
+  return "";
+}
+function nextNonSpace(s: string, i: number): string {
+  for (let j = i + 1; j < s.length; j++) {
+    const c = s[j] ?? "";
+    if (c !== " " && c !== "\t") return c;
+  }
+  return "";
+}
+function isOperandBoundary(c: string): boolean {
+  return /[A-Za-z0-9_$)\]'"`]/.test(c);
+}
+function isOperandStart(c: string): boolean {
+  return /[A-Za-z0-9_$('"`]/.test(c);
+}
+
+/**
+ * Count raised domain exceptions. The Python visits each `raise`/`throw` node,
+ * tokenises its text, and counts ONE per statement when a token ends in a
+ * domain suffix and is NOT in the stdlib stoplist. Text reproduction: scan each
+ * `raise`/`throw` statement line and apply the same per-statement rule.
+ */
+function countDomainExceptions(bodyText: string, lang: "python" | "java" | "go"): number {
+  if (lang === "go") return 0; // Go has no raise/throw (raise_node = "").
+  const keyword = "raise|throw";
+  const re = new RegExp(`\\b(?:${keyword})\\b([^\\n;]*)`, "g");
+  let count = 0;
+  let m: RegExpExecArray | null = re.exec(bodyText);
+  while (m !== null) {
+    const rtext = m[1] ?? "";
+    // Mirror Python: replace '(' and the word 'new' with spaces, split on ws,
+    // strip trailing ();, take the FIRST qualifying token (break).
+    const cleaned = rtext.replace(/\(/g, " ").replace(/\bnew\b/g, " ");
+    for (const rawTok of cleaned.split(/\s+/)) {
+      const tok = rawTok.trim().replace(/[();]+$/, "");
+      if (tok.length === 0) continue;
+      if (endsWithDomainSuffix(tok) && !STDLIB_EXC.has(tok)) {
+        count += 1;
+        break;
+      }
+    }
+    m = re.exec(bodyText);
+  }
+  return count;
+}
+
+function endsWithDomainSuffix(tok: string): boolean {
+  for (const suf of DOMAIN_EXC_SUFFIXES) {
+    if (tok.endsWith(suf)) return true;
+  }
+  return false;
+}
+
+/**
+ * Result of the single faithful assignment scan (Python lines 708-722): every
+ * assignment node contributes to `assignStmts`; the attr->attr branch
+ * (`"." in atext AND "(" not in atext AND no arithmetic op`) splits into a state
+ * transition (LHS carries a {@link STATE_FIELD_TOKENS} token) or a DTO
+ * attr->attr assign.
+ */
+interface AssignmentScan {
+  /** All assignment statements (Python `assign_stmts`). */
+  readonly assignStmts: number;
+  /** attr->attr assignments whose LHS is NOT a state field (Python `attr_to_attr`). */
+  readonly attrToAttr: number;
+  /** attr->attr assignments whose LHS IS a state field (Python `n_state_transitions`). */
+  readonly nStateTransitions: number;
+}
+
+/**
+ * Scan assignment statements once and reproduce the Python assignment branch
+ * EXACTLY (och_bizlogic_extract.py lines 708-722):
+ *
+ * ```python
+ * elif assign_node and t == assign_node:
+ *     assign_stmts += 1
+ *     atext = node_text(n, src)
+ *     if "." in atext and "(" not in atext and not any(
+ *         op in atext for op in ("+", "-", "*", "/", "%")):
+ *         lhs = atext.split("=")[0]
+ *         if any(s in lhs.lower() for s in STATE_FIELD_TOKENS):
+ *             f.n_state_transitions += 1
+ *         else:
+ *             attr_to_attr += 1
+ * ```
+ *
+ * The Python tests are pure string predicates on the assignment node's text, so
+ * a line-based assignment scan that applies the SAME predicates is faithful.
+ * `assign_stmts` counts EVERY assignment (this is the `dto_mapper_ratio`
+ * denominator); the attr->attr split feeds the numerator and the state-machine
+ * signal. A plain `self.state = 5` still has a `.` (on the LHS), no `(`, no
+ * arithmetic → it qualifies as a state transition, matching the Python.
+ */
+function scanAssignments(bodyText: string, lang: "python" | "java" | "go"): AssignmentScan {
+  let assignStmts = 0;
+  let attrToAttr = 0;
+  let nStateTransitions = 0;
+  // Two aligned, length-preserving views (offsets match — same length):
+  //   - `locate`: strings AND comments masked — used to FIND the assignment `=`,
+  //     so a `=` inside a docstring/string (`methods=["GET"]`) or comment is NOT
+  //     mistaken for an assignment (a string is never an `assignment` node).
+  //   - `gate`: comments masked, strings RAW — used for the attr->attr gate
+  //     predicates, because the Python `atext = node_text(assign)` INCLUDES the
+  //     string RHS, so a `(`/`-`/`+` inside a string literal still disqualifies
+  //     attr->attr (`"(" not in atext` / no-arith).
+  const locate = maskStringsAndComments(bodyText);
+  const gate = maskCommentsOnly(bodyText);
+  // Statement units to scan:
+  //   - python: LOGICAL statements joined across `()[]{}` continuations, so a
+  //     multi-line call's `kw=value` arguments are ONE call statement, not
+  //     per-line assignments. Python has no block braces (indentation-scoped),
+  //     so `{}` only ever delimits dict/set literals → safe to treat as a
+  //     continuation.
+  //   - java/go: PHYSICAL lines. These languages use `{}` for BOTH blocks and
+  //     composite literals, so a cross-line join would swallow whole `for`/`if`
+  //     blocks; per-line scanning matches their one-assignment-per-line idiom
+  //     (`x[i] = &T{` reads as the attr->attr assignment the AST sees, and
+  //     struct-field `key: value` lines carry no `=`).
+  const spans = lang === "python" ? splitLogicalStatements(locate) : physicalLineSpans(locate);
+  for (const [s, e] of spans) {
+    const locStmt = locate.slice(s, e);
+    let eq = assignmentEqIndex(locStmt);
+    // Python class/var ANNOTATION without value (`products: repository.X`): a
+    // tree-sitter-python `assignment` node fires even with no `=`. Treat a
+    // statement-level `<name>: <type>` (a depth-0 `:` whose LHS is a plain
+    // dotted identifier, not a compound-statement keyword) as an assignment, with
+    // the `:` standing in as the LHS/RHS split point for the gate.
+    if (eq === -1 && lang === "python") {
+      eq = pythonAnnotationColonIndex(locStmt);
+    }
+    if (eq === -1) continue;
+    assignStmts += 1;
+    const gateStmt = gate.slice(s, e);
+    if (!gateStmt.includes(".")) continue;
+    if (gateStmt.includes("(")) continue;
+    if (/[+\-*/%]/.test(gateStmt)) continue;
+    const lhsLow = gateStmt.slice(0, eq).toLowerCase();
+    let isState = false;
+    for (const st of STATE_FIELD_TOKENS) {
+      if (lhsLow.includes(st)) {
+        isState = true;
+        break;
+      }
+    }
+    if (isState) nStateTransitions += 1;
+    else attrToAttr += 1;
+  }
+  return { assignStmts, attrToAttr, nStateTransitions };
+}
+
+/** Compound-statement keywords whose `:` opens a suite, NOT a type annotation. */
+const PY_COMPOUND_KEYWORDS: ReadonlySet<string> = new Set([
+  "if",
+  "elif",
+  "else",
+  "for",
+  "while",
+  "with",
+  "try",
+  "except",
+  "finally",
+  "def",
+  "class",
+  "match",
+  "case",
+  "async",
+  "lambda",
+]);
+
+/**
+ * Index of the `:` that makes a Python statement a bare ANNOTATION assignment
+ * (`name: Type` with no value), or -1. The LHS before the depth-0 `:` must be a
+ * single plain (optionally dotted/attribute) identifier — `self.products`,
+ * `products` — and must NOT begin with a compound-statement keyword (so `if x:`,
+ * `for a in b:`, `def f():` are excluded). A `:` inside `()[]{}` (slice / dict /
+ * call) is depth-protected.
+ */
+function pythonAnnotationColonIndex(stmt: string): number {
+  let depth = 0;
+  for (let i = 0; i < stmt.length; i++) {
+    const c = stmt[i] ?? "";
+    if (c === "(" || c === "[" || c === "{") depth += 1;
+    else if (c === ")" || c === "]" || c === "}") depth -= 1;
+    else if (c === ":" && depth === 0) {
+      const lhs = stmt.slice(0, i).trim();
+      // LHS must be a plain dotted identifier (no spaces, no operators/commas).
+      if (!/^[A-Za-z_$][\w$]*(?:\s*\.\s*[A-Za-z_$][\w$]*)*$/.test(lhs)) return -1;
+      const firstWord = lhs.split(".")[0]?.trim() ?? "";
+      if (PY_COMPOUND_KEYWORDS.has(firstWord)) return -1;
+      return i;
+    }
+  }
+  return -1;
+}
+
+/** Physical-line spans `[start, end)` over `src` (one per `\n`-delimited line). */
+function physicalLineSpans(src: string): Array<[number, number]> {
+  const spans: Array<[number, number]> = [];
+  let start = 0;
+  for (let i = 0; i < src.length; i++) {
+    if (src[i] === "\n") {
+      spans.push([start, i]);
+      start = i + 1;
+    }
+  }
+  if (start <= src.length) spans.push([start, src.length]);
+  return spans;
+}
+
+/**
+ * Split source into logical-statement spans `[start, end)` (PYTHON only). A
+ * newline at bracket-depth 0 (and not a `\` line-continuation) ends a statement;
+ * a newline inside `()`/`[]`/`{}` is a continuation and stays in the same
+ * statement. This keeps `f(\n  kw=val,\n)` continuation lines from being read as
+ * depth-0 `kw=val` assignments — the AST sees one call node, not assignments.
+ */
+function splitLogicalStatements(src: string): Array<[number, number]> {
+  const spans: Array<[number, number]> = [];
+  const n = src.length;
+  let depth = 0;
+  let start = 0;
+  for (let i = 0; i < n; i++) {
+    const c = src[i] ?? "";
+    if (c === "(" || c === "[" || c === "{") depth += 1;
+    else if (c === ")" || c === "]" || c === "}") {
+      if (depth > 0) depth -= 1;
+    } else if ((c === "\n" || c === ";") && depth === 0) {
+      if (c === "\n" && src[i - 1] === "\\") continue;
+      spans.push([start, i]);
+      start = i + 1;
+    }
+  }
+  if (start < n) spans.push([start, n]);
+  return spans;
+}
+
+/** Blank `#` / `//` line comments and `/* … *​/` blocks to spaces (length
+ *  preserved, newlines kept), leaving string literals intact. Used by the
+ *  assignment scan, whose Python counterpart reads the assignment node text
+ *  (comments excluded, string RHS literals included). */
+function maskCommentsOnly(src: string): string {
+  const out = src.split("");
+  let i = 0;
+  const n = src.length;
+  const blank = (from: number, to: number) => {
+    for (let k = from; k < to && k < n; k++) if (out[k] !== "\n") out[k] = " ";
+  };
+  while (i < n) {
+    const c = src[i] ?? "";
+    const c2 = src.slice(i, i + 2);
+    if (c === "#" || c2 === "//") {
+      let j = i;
+      while (j < n && src[j] !== "\n") j++;
+      blank(i, j);
+      i = j;
+      continue;
+    }
+    if (c2 === "/*") {
+      let j = i + 2;
+      while (j < n && src.slice(j, j + 2) !== "*/") j++;
+      const end = Math.min(n, j + 2);
+      blank(i, end);
+      i = end;
+      continue;
+    }
+    // Skip over string interiors so a `#`/`//` INSIDE a string is not treated as
+    // a comment start (preserve the string content for the assignment gate).
+    if (c === '"' || c === "'" || c === "`") {
+      const q = c;
+      let j = i + 1;
+      while (j < n) {
+        const cj = src[j] ?? "";
+        if (cj === "\\") {
+          j += 2;
+          continue;
+        }
+        if (cj === q) break;
+        if ((q === '"' || q === "'") && cj === "\n") break;
+        j++;
+      }
+      i = Math.min(n, j + 1);
+      continue;
+    }
+    i++;
+  }
+  return out.join("");
+}
+
+/**
+ * Count `return` statements (Python `n_returns`, one per `return_statement`
+ * node). Word-boundary scan over the body. Go uses `return` as well. Feeds the
+ * getter/setter gate (`n_returns <= 1`).
+ */
+function countReturns(bodyText: string): number {
+  const m = bodyText.match(/\breturn\b/g);
+  return m === null ? 0 : m.length;
+}
+
+/**
+ * Count ALL conditionals (Python `n_total_conditionals`, one per `if_statement`
+ * node — guards INCLUDED). Distinct from {@link countDomainConditionals}, which
+ * counts only the NON-guard subset. Feeds the getter/setter gate
+ * (`n_total_conditionals == 0`).
+ *
+ *   python : `if <cond>:`            (NOT `elif`)
+ *   java/go: `if (<cond>)` / `if ... {`
+ */
+function countTotalConditionals(bodyText: string, lang: "python" | "java" | "go"): number {
+  return enumerateIfConditions(bodyText, lang).length;
+}
+
+/**
+ * Index of the top-level `=` that makes a line a statement-level ASSIGNMENT, or
+ * -1. Mirrors the Python `assignment` AST node, which is distinct from:
+ *   - keyword arguments / default params (`f(x=1)`) — the `=` is INSIDE parens,
+ *     so we require paren/bracket depth 0;
+ *   - comparisons (`==`, `!=`, `<=`, `>=`) and walrus (`:=`), arrow (`=>`);
+ *   - augmented assignments (`+= -= *= /= %= **= //= &= |= ^= >>= <<=`), which
+ *     are an `augmented_assignment` node in Python, NOT an `assignment`.
+ * Returns the depth-0 plain-`=` index (the LHS/RHS split point).
+ */
+function assignmentEqIndex(line: string): number {
+  let depth = 0;
+  for (let i = 0; i < line.length; i++) {
+    const c = line[i] ?? "";
+    if (c === "(" || c === "[" || c === "{") depth += 1;
+    else if (c === ")" || c === "]" || c === "}") depth -= 1;
+    else if (c === "=" && depth === 0) {
+      const before = line[i - 1] ?? "";
+      const after = line[i + 1] ?? "";
+      if (after === "=") continue; // ==
+      if (after === ">") continue; // =>
+      if (before === "=" || before === "!" || before === "<" || before === ">" || before === ":")
+        continue; // ==, !=, <=, >=, :=
+      // augmented assignment: `<op>=` where op is an arithmetic/bitwise/shift op.
+      if (
+        before === "+" ||
+        before === "-" ||
+        before === "*" ||
+        before === "/" ||
+        before === "%" ||
+        before === "&" ||
+        before === "|" ||
+        before === "^"
+      )
+        continue;
+      return i;
+    }
+  }
+  return -1;
+}
+
+// ── public entrypoint ───────────────────────────────────────────────────────
+
+/**
+ * Compute the four feature fields the {@link classifyPlumbing} kernel consumes,
+ * faithful to `och_bizlogic_extract.py`. Pure, deterministic, no I/O.
+ */
+export function computePlumbingFeatures(args: ComputePlumbingFeaturesArgs): PlumbingFeatureCounts {
+  const { symbolName, kind, bodyText, classHeadText, lang } = args;
+  const isClass = kind.toLowerCase() === "class";
+
+  // The scan body has the leading definition header (`def f(...):` /
+  // `func (r *R) F(...) {`) blanked so the symbol's OWN name+params are not
+  // enumerated as a call and the signature's pointer-`*` / param `(` are not
+  // scanned as body code. The Python AST counts call/binary/if/return nodes
+  // only inside the body; the signature is a separate parameter/type subtree.
+  const scanBody = stripDefinitionHeader(bodyText, lang);
+
+  // Go `type X interface { … }` / `type X struct { … }` bodies are field &
+  // method-spec declarations — they contain NO call_expression / binary /
+  // if / return / assignment AST nodes, so the Python's body-derived counts are
+  // all 0 (verified across the corpus: every Go class row has ser=obs=dto=0 and
+  // n_domain_signals=0; only is_getter_setter can fire on a short type). A flat
+  // text scan would otherwise read interface method specs like `Info(msg ...)`
+  // as calls (matching observ markers) — so we zero the body scans for Go types
+  // to mirror the AST exactly. `is_getter_setter` still evaluates below with
+  // nCalls = nReturns = nTotalConditionals = 0.
+  const goTypeDecl = lang === "go" && isClass;
+
+  // STRUCTURAL/CALL scans run on the MASKED body so an operator / `if` / `raise`
+  // keyword OR a phantom `ident (` chain inside a string or comment (docstring
+  // reST like `(access ``json``, …)`) is not miscounted — the Python walks AST
+  // nodes, so string/comment text never appears as a structural or call node.
+  // String literals are replaced by an operand sentinel `0`, so a real call's
+  // callee/method name (where every serialization/observability marker lives)
+  // survives intact (`json.dumps(0)`), while string ARGUMENTS cannot manufacture
+  // a marker hit or a phantom call.
+  const maskedBody = maskStringsAndComments(scanBody);
+
+  // --- nSerializationCalls + observability calls + n_calls -------------------
+  // Walk each call site once; mirror the Python per-call marker matching.
+  let nSerializationCalls = 0;
+  let nObservCalls = 0;
+  let nCalls = 0;
+  if (!goTypeDecl) {
+    for (const site of enumerateCallSites(maskedBody, scanBody)) {
+      nCalls += 1;
+      if (markerHit(site.headText, SERIALIZATION_MARKERS)) nSerializationCalls += 1;
+      if (markerHit(site.headText, OBSERV_MARKERS)) nObservCalls += 1;
+    }
+  }
+
+  // --- positive domain signals ----------------------------------------------
+  const nDomainConditionals = goTypeDecl ? 0 : countDomainConditionals(maskedBody, lang);
+  const nArithmeticOps = goTypeDecl ? 0 : countArithmeticOps(maskedBody);
+  const nDomainExceptions = goTypeDecl ? 0 : countDomainExceptions(maskedBody, lang);
+  const { assignStmts, attrToAttr, nStateTransitions } = goTypeDecl
+    ? { assignStmts: 0, attrToAttr: 0, nStateTransitions: 0 }
+    : scanAssignments(scanBody, lang);
+  const nDomainSignals =
+    nDomainConditionals + nArithmeticOps + nDomainExceptions + nStateTransitions;
+
+  // --- getter/setter + dto_mapper_ratio (Python lines 718-722, 724-729) ------
+  // `loc` = endLine - startLine + 1. The business-logic phase slices the body
+  // as `lines.slice(startLine-1, endLine).join("\n")` — no trailing newline —
+  // so the body's physical line count equals `loc`. A trailing empty line (test
+  // snippets end with "\n") is dropped so `loc` matches the Python node span.
+  const loc = bodyLineCount(bodyText);
+  const nReturns = goTypeDecl ? 0 : countReturns(scanBody);
+  const nTotalConditionals = goTypeDecl ? 0 : countTotalConditionals(maskedBody, lang);
+
+  // dto_mapper_ratio = round(attr_to_attr / assign_stmts, 3) when assign_stmts>0.
+  const dtoMapperRatio = assignStmts > 0 ? round3(attrToAttr / assignStmts) : 0.0;
+
+  // is_getter_setter (Python operator precedence: A and B and (C or (D and E))):
+  //   loc<=4 AND n_total_conditionals==0 AND
+  //   (name startswith get/set/is/has OR (n_returns<=1 AND n_calls==0))
+  // then the inner `if loc<=3` actually sets it — so loc<=3 is the binding bound.
+  const lowName = symbolName.toLowerCase();
+  let isGetterSetter = false;
+  if (
+    loc <= 4 &&
+    nTotalConditionals === 0 &&
+    (lowName.startsWith("get") ||
+      lowName.startsWith("set") ||
+      lowName.startsWith("is") ||
+      lowName.startsWith("has") ||
+      (nReturns <= 1 && nCalls === 0))
+  ) {
+    if (loc <= 3) isGetterSetter = true;
+  }
+
+  // --- ORM-model class-head detection ---------------------------------------
+  let isOrmModel = false;
+  if (isClass) {
+    const head = classHeadText ?? "";
+    const { className, baseIdents, annotBlob } = classNameAndBases(head, lang);
+    isOrmModel = classHeadIsOrmModel(className, baseIdents, annotBlob);
+  }
+
+  // --- negative plumbing signals (EXACT Python n_plumbing_signals) -----------
+  // och_bizlogic_extract.py lines 767-769:
+  //   n_plumbing_signals = n_serialization_calls + n_observ_calls
+  //                        + (1 if is_getter_setter else 0)
+  //                        + (1 if dto_mapper_ratio >= 0.5 else 0)
+  // Qualified-persistence / raw-SQL / bootstrap are NOT part of this field — they
+  // feed touches_persistence / is_framework_bootstrap, which the kernel ignores.
+  const nPlumbingSignals =
+    nSerializationCalls + nObservCalls + (isGetterSetter ? 1 : 0) + (dtoMapperRatio >= 0.5 ? 1 : 0);
+
+  return { nSerializationCalls, nDomainSignals, nPlumbingSignals, isOrmModel };
+}
+
+/** Round to 3 decimals (Python `round(x, 3)`, half-to-even-tolerant for our use). */
+function round3(x: number): number {
+  return Math.round(x * 1000) / 1000;
+}
+
+/**
+ * Physical line count of a sliced body, equal to the Python `loc`
+ * (`endLine - startLine + 1`). The business-logic phase produces the body via
+ * `lines.slice(startLine-1, endLine).join("\n")`, which has NO trailing newline,
+ * so its line count already equals `loc`. Test/standalone snippets often end in
+ * a trailing "\n"; we drop a single trailing empty segment so `loc` still
+ * matches the Python AST node span (which ends on the last non-empty line).
+ */
+function bodyLineCount(bodyText: string): number {
+  if (bodyText.length === 0) return 0;
+  const parts = bodyText.split("\n");
+  if (parts.length > 1 && parts[parts.length - 1] === "") parts.pop();
+  return parts.length;
+}
diff --git a/packages/ingestion/src/pipeline/orchestrator.test.ts b/packages/ingestion/src/pipeline/orchestrator.test.ts
index 8b8d9e67..d247a85f 100644
--- a/packages/ingestion/src/pipeline/orchestrator.test.ts
+++ b/packages/ingestion/src/pipeline/orchestrator.test.ts
@@ -52,6 +52,10 @@ describe("runIngestion (end-to-end)", () => {
         "coverage",
         "markdown",
         "parse",
+        // `business-logic` depends on parse + scan; once parse completes it is
+        // ready alongside complexity/orm/routes and the alphabetic tiebreak
+        // ("business-logic" < "complexity") lands it first.
+        "business-logic",
         "complexity",
         "orm",
         "routes",
diff --git a/packages/ingestion/src/pipeline/phases/business-logic.ts b/packages/ingestion/src/pipeline/phases/business-logic.ts
new file mode 100644
index 00000000..714abde5
--- /dev/null
+++ b/packages/ingestion/src/pipeline/phases/business-logic.ts
@@ -0,0 +1,298 @@
+/**
+ * Business-logic phase — annotate Function / Method / Constructor / Class /
+ * Interface / Struct nodes with two advisory concern tags:
+ *
+ *   - `likelyPlumbing`   (precision-first, ~0.94): the symbol is almost
+ *     certainly plumbing (serialization, DTO mapping, transport, DI wiring).
+ *   - `candidateBusiness` (recall-first, ~0.93): the recall-first complement —
+ *     everything the sieve did NOT classify as plumbing is a "look here for
+ *     domain logic" candidate.
+ *
+ * The user gets both tags from `codehub analyze` with no query, no labels, no
+ * embeddings. They land in `nodes.payload` and are reachable via SQLite JSON1
+ * (`payload->>'$.candidateBusiness'`).
+ *
+ * ## How it works
+ *
+ *   1. For each in-scope definition, slice its source body from the scanned
+ *      file (start/end lines from the parse phase).
+ *   2. {@link computePlumbingFeatures} reduces the body (+ class head) to the
+ *      small deterministic feature vector the sieve consumes.
+ *   3. {@link classifyPlumbing} / {@link classifyBusinessCandidate} (the merged,
+ *      validated `@opencodehub/analysis` kernels) produce the two tags.
+ *   4. The node is re-added with the tags set; {@link KnowledgeGraph.addNode}
+ *      keeps the entry with more defined fields, so the annotated version wins
+ *      (same merge contract the complexity phase relies on).
+ *
+ * ## Validated languages only
+ *
+ * The sieve's precision floor was measured on Python, Java, and Go. Other
+ * languages are SKIPPED (no tag emitted) rather than given an unbacked verdict
+ * — `SIEVE_VALIDATED_LANGUAGES` gates the per-file loop.
+ *
+ * ## Determinism
+ *
+ * Files iterated in sorted order; definitions in (startLine, qualifiedName)
+ * order — identical to the complexity phase. {@link computePlumbingFeatures}
+ * and both kernels are pure, so the tags are byte-stable across runs and safe
+ * under the `graphHash` contract.
+ */
+
+import { promises as fs } from "node:fs";
+import {
+  classifyBusinessCandidate,
+  classifyPlumbing,
+  type PlumbingFeatures,
+} from "@opencodehub/analysis";
+import type { GraphNode, NodeKind } from "@opencodehub/core-types";
+import { computePlumbingFeatures } from "../../extract/business-logic-features.js";
+import type { LanguageId } from "../../providers/types.js";
+import type { PipelineContext, PipelinePhase } from "../types.js";
+import { PARSE_PHASE_NAME, type ParseOutput } from "./parse.js";
+import { SCAN_PHASE_NAME, type ScanOutput } from "./scan.js";
+
+export const BUSINESS_LOGIC_PHASE_NAME = "business-logic" as const;
+
+export interface BusinessLogicOutput {
+  /** Symbols that received a tag (either likelyPlumbing or candidateBusiness). */
+  readonly symbolsTagged: number;
+  /** Symbols tagged as confident plumbing. */
+  readonly plumbing: number;
+  /** Symbols tagged as business candidates. */
+  readonly candidates: number;
+  /** Definitions skipped (unvalidated language, unreadable file, no node). */
+  readonly skipped: number;
+}
+
+/** Languages the sieve is validated on. Maps the analysis-layer string set to
+ *  the ingestion {@link LanguageId} union so the per-file gate is type-checked. */
+const VALIDATED_LANGS: ReadonlySet<LanguageId> = new Set<LanguageId>(["python", "java", "go"]);
+
+/** The lang string the extractor expects (narrowed from the validated set). */
+type SieveLang = "python" | "java" | "go";
+
+/** Kinds the sieve tags: callables plus the class-like kinds (entities carry
+ *  domain methods; the ORM-model signal is class-head based). */
+const TAGGABLE_KINDS: ReadonlySet<NodeKind> = new Set<NodeKind>([
+  "Function",
+  "Method",
+  "Constructor",
+  "Class",
+  "Interface",
+  "Struct",
+]);
+
+export const businessLogicPhase: PipelinePhase<BusinessLogicOutput> = {
+  name: BUSINESS_LOGIC_PHASE_NAME,
+  deps: [PARSE_PHASE_NAME, SCAN_PHASE_NAME],
+  async run(ctx, deps) {
+    const parse = deps.get(PARSE_PHASE_NAME) as ParseOutput | undefined;
+    const scan = deps.get(SCAN_PHASE_NAME) as ScanOutput | undefined;
+    if (parse === undefined) {
+      throw new Error("business-logic: parse output missing from dependency map");
+    }
+    if (scan === undefined) {
+      throw new Error("business-logic: scan output missing from dependency map");
+    }
+    return runBusinessLogic(ctx, parse, scan);
+  },
+};
+
+export async function runBusinessLogic(
+  ctx: PipelineContext,
+  parse: ParseOutput,
+  scan: ScanOutput,
+): Promise<BusinessLogicOutput> {
+  const absByRel = new Map<string, string>();
+  const langByRel = new Map<string, LanguageId>();
+  for (const f of scan.files) {
+    if (f.language === undefined) continue;
+    absByRel.set(f.relPath, f.absPath);
+    langByRel.set(f.relPath, f.language);
+  }
+
+  // Index taggable graph nodes by the same 4-tuple the complexity phase uses,
+  // so each definition resolves with one Map.get instead of a full-graph scan.
+  const nodeIndex = buildNodeIndex(ctx);
+
+  let symbolsTagged = 0;
+  let plumbing = 0;
+  let candidates = 0;
+  let skipped = 0;
+
+  const files = [...parse.definitionsByFile.keys()].sort();
+  const sourceCache = new Map<string, string[] | null>();
+
+  for (const filePath of files) {
+    const lang = langByRel.get(filePath);
+    if (lang === undefined || !VALIDATED_LANGS.has(lang)) continue; // unvalidated → skip silently
+    const sieveLang = lang as SieveLang;
+
+    const defs = (parse.definitionsByFile.get(filePath) ?? []).filter((d) =>
+      TAGGABLE_KINDS.has(d.kind),
+    );
+    if (defs.length === 0) continue;
+
+    const lines = await loadLines(absByRel.get(filePath), sourceCache, ctx, filePath);
+    if (lines === null) {
+      skipped += defs.length;
+      continue;
+    }
+
+    // Deterministic order, matching the complexity phase tiebreak.
+    const sorted = [...defs].sort((a, b) => {
+      if (a.startLine !== b.startLine) return a.startLine - b.startLine;
+      return a.qualifiedName < b.qualifiedName ? -1 : a.qualifiedName > b.qualifiedName ? 1 : 0;
+    });
+
+    for (const def of sorted) {
+      const node = nodeIndex.get(nodeKey(def.filePath, def.name, def.kind, def.startLine));
+      if (node === undefined) {
+        skipped += 1;
+        continue;
+      }
+      const bodyText = sliceBody(lines, def.startLine, def.endLine);
+      const isClassLike = def.kind === "Class" || def.kind === "Interface" || def.kind === "Struct";
+      const classHeadText = isClassLike ? sliceClassHead(lines, def.startLine) : undefined;
+
+      const features: PlumbingFeatures = computePlumbingFeatures({
+        symbolName: def.name,
+        kind: def.kind,
+        bodyText,
+        ...(classHeadText !== undefined ? { classHeadText } : {}),
+        lang: sieveLang,
+      });
+
+      const sieve = classifyPlumbing(features);
+      const candidate = classifyBusinessCandidate(features);
+
+      ctx.graph.addNode(withTags(node, sieve.likelyPlumbing, candidate.candidateBusiness));
+
+      symbolsTagged += 1;
+      if (sieve.likelyPlumbing) plumbing += 1;
+      if (candidate.candidateBusiness) candidates += 1;
+    }
+  }
+
+  ctx.onProgress?.({
+    phase: BUSINESS_LOGIC_PHASE_NAME,
+    kind: "note",
+    message: `business-logic: tagged ${symbolsTagged} (${plumbing} plumbing, ${candidates} candidates), ${skipped} skipped`,
+  });
+
+  return { symbolsTagged, plumbing, candidates, skipped };
+}
+
+async function loadLines(
+  abs: string | undefined,
+  cache: Map<string, string[] | null>,
+  ctx: PipelineContext,
+  filePath: string,
+): Promise<string[] | null> {
+  if (abs === undefined) return null;
+  const cached = cache.get(abs);
+  if (cached !== undefined) return cached;
+  try {
+    const text = await fs.readFile(abs, "utf8");
+    const lines = text.split("\n");
+    cache.set(abs, lines);
+    return lines;
+  } catch (err) {
+    ctx.onProgress?.({
+      phase: BUSINESS_LOGIC_PHASE_NAME,
+      kind: "warn",
+      message: `business-logic: cannot read ${filePath}: ${(err as Error).message}`,
+    });
+    cache.set(abs, null);
+    return null;
+  }
+}
+
+/** Body text from `startLine` to `endLine` inclusive (1-based lines). */
+function sliceBody(lines: readonly string[], startLine: number, endLine: number): string {
+  const s = Math.max(0, startLine - 1);
+  const e = endLine >= startLine ? Math.min(lines.length, endLine) : Math.min(lines.length, s + 50);
+  return lines.slice(s, e).join("\n");
+}
+
+/** Class-head text the ORM-model detector matches against. Two parts:
+ *
+ *   1. PRECEDING annotation / decorator lines. JPA puts `@Entity` /
+ *      `@MappedSuperclass` (and Python `@dataclass` etc.) on the line(s) ABOVE
+ *      the class declaration, while the parse phase's `startLine` points at the
+ *      `class`/`type` keyword. Without scanning upward the entity annotation is
+ *      missed and `isOrmModel` reads false — the Java-entity divergence the
+ *      parity check caught. We walk up over contiguous `@…` / comment / blank
+ *      lines and prepend them so the extractor's annotation blob sees them.
+ *   2. The declaration line(s) down to the first `{` or `:` that opens the body
+ *      (the base / superclass list), capped at a few lines.
+ *
+ * Never includes the body. */
+function sliceClassHead(lines: readonly string[], startLine: number): string {
+  const s = Math.max(0, startLine - 1);
+  // Walk upward, collecting ONLY real annotation lines (`@Entity`,
+  // `@MappedSuperclass`, `@dataclass`). Comment and blank lines are stepped
+  // OVER (a Javadoc block can sit between the annotation and the class) but
+  // NOT collected — a Javadoc `@author` / `@param` tag would otherwise leak
+  // into the extractor's annotation blob and shadow the real ORM annotation,
+  // flipping isOrmModel false (the JPA-entity divergence the parity check
+  // caught). Code lines stop the climb.
+  const pre: string[] = [];
+  let inBlockComment = false;
+  for (let i = s - 1; i >= 0 && i >= s - 10; i--) {
+    const line = lines[i] ?? "";
+    const t = line.trim();
+    // Track block-comment boundaries walking upward: a line ending `*/` opens
+    // (from below) a comment region; a line containing `/*` closes it.
+    if (t.endsWith("*/")) inBlockComment = true;
+    const isComment = inBlockComment || t.startsWith("//") || t.startsWith("*");
+    if (t.includes("/*")) inBlockComment = false;
+    if (t === "" || isComment) continue; // step over, don't collect
+    if (t.startsWith("@")) {
+      pre.unshift(line); // a real annotation line
+      continue;
+    }
+    break; // hit a code line — stop climbing
+  }
+  const head: string[] = [];
+  for (let i = s; i < Math.min(lines.length, s + 4); i++) {
+    const line = lines[i] ?? "";
+    head.push(line);
+    if (line.includes("{") || line.includes(":")) break;
+  }
+  return [...pre, ...head].join("\n");
+}
+
+function nodeKey(
+  filePath: string,
+  name: string,
+  kind: NodeKind,
+  startLine: number | undefined,
+): string {
+  return `${filePath}\x00${name}\x00${kind}\x00${startLine}`;
+}
+
+function buildNodeIndex(ctx: PipelineContext): ReadonlyMap<string, GraphNode> {
+  const index = new Map<string, GraphNode>();
+  for (const n of ctx.graph.nodes()) {
+    if (!TAGGABLE_KINDS.has(n.kind)) continue;
+    const startLine = (n as unknown as { readonly startLine?: number }).startLine;
+    const key = nodeKey(n.filePath, n.name, n.kind, startLine);
+    if (!index.has(key)) index.set(key, n);
+  }
+  return index;
+}
+
+/**
+ * Re-attach the two advisory tags onto a taggable node. The kind guard narrows
+ * the {@link GraphNode} union to the callable / class-like kinds that carry the
+ * {@link CallableShape} fields, so the spread is type-safe (mirrors the
+ * `withComplexity` narrowing idiom in the complexity phase). Other kinds — which
+ * `TAGGABLE_KINDS` already excludes — fall through unchanged.
+ */
+function withTags(node: GraphNode, likelyPlumbing: boolean, candidateBusiness: boolean): GraphNode {
+  if (TAGGABLE_KINDS.has(node.kind)) {
+    return { ...node, likelyPlumbing, candidateBusiness } as GraphNode;
+  }
+  return node;
+}
diff --git a/packages/ingestion/src/pipeline/phases/default-set.ts b/packages/ingestion/src/pipeline/phases/default-set.ts
index 21c61cfa..c38e1143 100644
--- a/packages/ingestion/src/pipeline/phases/default-set.ts
+++ b/packages/ingestion/src/pipeline/phases/default-set.ts
@@ -23,6 +23,7 @@
 import type { PipelinePhase } from "../types.js";
 import { accessesPhase } from "./accesses.js";
 import { annotatePhase } from "./annotate.js";
+import { businessLogicPhase } from "./business-logic.js";
 import { cochangePhase } from "./cochange.js";
 import { communitiesPhase } from "./communities.js";
 import { complexityPhase } from "./complexity.js";
@@ -76,6 +77,13 @@ export const DEFAULT_PHASES: readonly PipelinePhase[] = [
   // learn to honour it. It has no downstream dependents today.
   incrementalScopePhase,
   complexityPhase,
+  // `business-logic` runs after complexity (it reuses the same callable +
+  // class definition walk) and tags Function/Method/Class nodes with the
+  // deterministic `likelyPlumbing` / `candidateBusiness` advisory concern
+  // tags. Python/Java/Go only (the sieve's validated set); no-op for other
+  // languages. No downstream dependents — the tags are read from the graph by
+  // MCP tools / `payload->>'$.candidateBusiness'` at query time.
+  businessLogicPhase,
   routesPhase,
   openapiPhase,
   toolsPhase,