From 33a91e28852d5d74ad9dbc49c39d7797964ae40e Mon Sep 17 00:00:00 2001
From: Don Syme <dsyme@github.com>
Date: Sat, 6 Jun 2026 17:31:42 +0100
Subject: [PATCH 1/8] feat(compile): add hidden --use-samples flag for
 deterministic safe-outputs replay

Adds a hidden compile mode that replaces the agentic 'Execute coding agent'
step with a deterministic driver that replays declarative `samples` entries
through the real safe-outputs MCP server. Makes end-to-end tests deterministic
without invoking any LLM.

Frontmatter:
  safe-outputs:
    create-issue:
      samples:
        - title: "..."
          body: "..."

Each entry conforms to the MCP tool inputSchema; recognized sidecar keys
(`patch` for create-pull-request and push-to-pull-request-branch) are
stripped before validation and consumed by the replay driver for branch +
patch pre-staging.

Hidden surface:
- CLI flag `--use-samples` is hidden from `gh aw compile --help`
- JSON schema description marks `samples` as 'Internal hidden feature'

Implementation:
- Static JSON Schema validation against safe_outputs_tools.json at compile time
- Deterministic step ordering (sorted by SafeOutputsConfig struct field name)
- New driver actions/setup/js/apply_samples.cjs spawns the real MCP server
  over stdio, sends one tools/call per sample, writes a synthetic
  terminal_reason: completed marker so handle_agent_failure recognizes success
- Driver pre-stages git branches + patches for create_pull_request and
  push_to_pull_request_branch samples so the real handler can derive a diff

Tests:
- 5 unit tests covering validation, sidecar stripping, deterministic ordering,
  sidecar partitioning
- 1 integration test verifying the agent step is replaced
- 2 vitest specs driving the real MCP server end-to-end
---
 actions/setup/js/apply_samples.cjs           | 354 ++++++++++
 actions/setup/js/apply_samples.test.cjs      | 115 +++
 cmd/gh-aw/main.go                            |   4 +
 pkg/cli/compile_compiler_setup.go            |   6 +
 pkg/cli/compile_config.go                    |   1 +
 pkg/parser/schemas/main_workflow_schema.json | 704 +++++++++++++++++++
 pkg/workflow/compiler_types.go               |  12 +
 pkg/workflow/compiler_validators.go          |   1 +
 pkg/workflow/compiler_yaml_ai_execution.go   |   8 +
 pkg/workflow/safe_outputs_config.go          |  56 ++
 pkg/workflow/samples_replay.go               | 103 +++
 pkg/workflow/samples_replay_test.go          |  98 +++
 pkg/workflow/samples_validation.go           | 166 +++++
 pkg/workflow/samples_validation_test.go      | 170 +++++
 pkg/workflow/workflow_builder.go             |   1 +
 15 files changed, 1799 insertions(+)
 create mode 100644 actions/setup/js/apply_samples.cjs
 create mode 100644 actions/setup/js/apply_samples.test.cjs
 create mode 100644 pkg/workflow/samples_replay.go
 create mode 100644 pkg/workflow/samples_replay_test.go
 create mode 100644 pkg/workflow/samples_validation.go
 create mode 100644 pkg/workflow/samples_validation_test.go

diff --git a/actions/setup/js/apply_samples.cjs b/actions/setup/js/apply_samples.cjs
new file mode 100644
index 00000000000..73311b28c0e
--- /dev/null
+++ b/actions/setup/js/apply_samples.cjs
@@ -0,0 +1,354 @@
+#!/usr/bin/env node
+// @ts-check
+
+// apply_samples.cjs
+//
+// Deterministic replay driver for `gh aw compile --use-samples`.
+//
+// Reads `GH_AW_SAMPLES` (a JSON array of `{tool, arguments, sidecars}`
+// entries produced by the compiler), spawns the safe-outputs MCP server
+// (`safe_outputs_mcp_server.cjs`) as a child process, sends one JSON-RPC
+// `tools/call` per sample over stdio, and writes a synthetic `agent-stdio.log`
+// so downstream log-parsing / failure-handling steps continue to work.
+//
+// For samples whose tool is `create_pull_request` or `push_to_pull_request_branch`
+// and whose sidecars include `patch`, the driver pre-stages a branch and commits
+// the patch into the workspace BEFORE invoking the MCP tool. This lets the
+// real `create_pull_request` MCP handler (which derives a git diff against the
+// base branch) produce a meaningful transport payload.
+//
+// Env contract:
+//   GH_AW_SAMPLES        — JSON array of replay entries (required)
+//   GH_AW_AGENT_STDIO_LOG     — path where the synthetic stdio log is written
+//   GH_AW_SAFE_OUTPUTS_CONFIG_PATH — path to the MCP server's config.json
+//   GH_AW_SAFE_OUTPUTS        — path to the MCP server's outputs.jsonl
+//   GITHUB_WORKSPACE          — git working directory for pre-staging (optional;
+//                               falls back to cwd)
+
+const { spawn } = require("child_process");
+const fs = require("fs");
+const path = require("path");
+const os = require("os");
+
+const DEFAULT_BASE_BRANCH = process.env.GH_AW_CUSTOM_BASE_BRANCH || process.env.GITHUB_BASE_REF || process.env.GITHUB_REF_NAME || "main";
+const PATCH_SIDECAR_TOOLS = new Set(["create_pull_request", "push_to_pull_request_branch"]);
+
+/**
+ * @typedef {Object} SampleEntry
+ * @property {string} tool
+ * @property {Record<string, any>} arguments
+ * @property {Record<string, any>} [sidecars]
+ */
+
+/**
+ * Read and parse the GH_AW_SAMPLES env var. Returns an empty array (with a
+ * warning) when unset or empty so the workflow can still complete cleanly.
+ * @returns {SampleEntry[]}
+ */
+function loadSamples() {
+  const raw = process.env.GH_AW_SAMPLES;
+  if (!raw || !raw.trim()) {
+    console.error("apply_samples: GH_AW_SAMPLES is empty — no samples to replay.");
+    return [];
+  }
+  let parsed;
+  try {
+    parsed = JSON.parse(raw);
+  } catch (err) {
+    throw new Error(`apply_samples: failed to parse GH_AW_SAMPLES as JSON: ${/** @type {Error} */ err.message}`);
+  }
+  if (!Array.isArray(parsed)) {
+    throw new Error("apply_samples: GH_AW_SAMPLES must be a JSON array");
+  }
+  for (const [i, entry] of parsed.entries()) {
+    if (!entry || typeof entry !== "object" || typeof entry.tool !== "string") {
+      throw new Error(`apply_samples: entry ${i} is missing a string "tool" field`);
+    }
+    if (!entry.arguments || typeof entry.arguments !== "object") {
+      throw new Error(`apply_samples: entry ${i} (tool=${entry.tool}) is missing an "arguments" object`);
+    }
+  }
+  return parsed;
+}
+
+/**
+ * Run a git subcommand synchronously and return stdout. Throws on non-zero exit.
+ * @param {string[]} args
+ * @param {string} cwd
+ * @returns {string}
+ */
+function runGit(args, cwd) {
+  const { spawnSync } = require("child_process");
+  const result = spawnSync("git", args, { cwd, encoding: "utf8" });
+  if (result.status !== 0) {
+    throw new Error(`git ${args.join(" ")} failed (exit ${result.status}): ${result.stderr || result.stdout}`);
+  }
+  return result.stdout;
+}
+
+/**
+ * Ensure git user.email / user.name are configured so commits succeed in CI.
+ * @param {string} cwd
+ */
+function ensureGitIdentity(cwd) {
+  try {
+    runGit(["config", "user.email"], cwd);
+  } catch {
+    runGit(["config", "user.email", "gh-aw-samples@github.com"], cwd);
+  }
+  try {
+    runGit(["config", "user.name"], cwd);
+  } catch {
+    runGit(["config", "user.name", "gh-aw samples"], cwd);
+  }
+}
+
+/**
+ * Pre-stage a branch + patch for samples whose tool reads the workspace diff.
+ * Mutates `entry.arguments.branch` to the actual checked-out branch.
+ * @param {SampleEntry} entry
+ * @param {number} index
+ * @param {string} workspace
+ */
+function preStagePatch(entry, index, workspace) {
+  const patch = entry.sidecars && entry.sidecars.patch;
+  if (typeof patch !== "string" || !patch.trim()) {
+    return;
+  }
+  const branch = typeof entry.arguments.branch === "string" && entry.arguments.branch.trim() ? entry.arguments.branch.trim() : `gh-aw-sample-${index + 1}`;
+  entry.arguments.branch = branch;
+
+  ensureGitIdentity(workspace);
+
+  // Start from the base branch so the diff is meaningful. Tolerate the case
+  // where the base ref doesn't exist locally — fall back to HEAD.
+  try {
+    runGit(["checkout", DEFAULT_BASE_BRANCH], workspace);
+  } catch (err) {
+    console.error(`apply_samples: could not check out base branch ${DEFAULT_BASE_BRANCH}: ${/** @type {Error} */ err.message}; staying on current HEAD`);
+  }
+
+  // Create the branch (or check it out if it already exists from a previous sample).
+  try {
+    runGit(["checkout", "-b", branch], workspace);
+  } catch {
+    runGit(["checkout", branch], workspace);
+  }
+
+  // Write patch to a temp file and apply it.
+  const tmpPatch = path.join(os.tmpdir(), `gh-aw-sample-${index + 1}.patch`);
+  fs.writeFileSync(tmpPatch, patch.endsWith("\n") ? patch : patch + "\n");
+  try {
+    runGit(["apply", "--whitespace=nowarn", tmpPatch], workspace);
+  } catch (err) {
+    // Fall back to --3way for patches that don't apply cleanly on top of an
+    // empty working tree (uncommon but possible for synthetic samples).
+    runGit(["apply", "--3way", "--whitespace=nowarn", tmpPatch], workspace);
+  }
+
+  runGit(["add", "-A"], workspace);
+  runGit(["commit", "-m", `gh-aw sample ${index + 1}: ${entry.tool}`, "--allow-empty"], workspace);
+}
+
+/**
+ * Send a single JSON-RPC request to the MCP server child process and resolve
+ * with the parsed JSON response (or reject on timeout).
+ * @param {import("child_process").ChildProcessWithoutNullStreams} child
+ * @param {NodeJS.WritableStream} stdin
+ * @param {object} request
+ * @param {AsyncIterableIterator<string>} responseIterator
+ * @returns {Promise<any>}
+ */
+async function sendJsonRpc(child, stdin, request, responseIterator) {
+  stdin.write(JSON.stringify(request) + "\n");
+  const { value, done } = await responseIterator.next();
+  if (done) {
+    throw new Error(`apply_samples: MCP server closed stdout before responding to request id=${request.id}`);
+  }
+  return JSON.parse(value);
+}
+
+/**
+ * Turn the MCP server's stdout into an async iterator of line strings.
+ * @param {NodeJS.ReadableStream} stdout
+ */
+async function* lineIterator(stdout) {
+  let buffer = "";
+  for await (const chunk of stdout) {
+    buffer += chunk.toString();
+    let newlineIdx;
+    while ((newlineIdx = buffer.indexOf("\n")) !== -1) {
+      const line = buffer.slice(0, newlineIdx).trim();
+      buffer = buffer.slice(newlineIdx + 1);
+      if (line) {
+        yield line;
+      }
+    }
+  }
+  if (buffer.trim()) {
+    yield buffer.trim();
+  }
+}
+
+/**
+ * Locate the safe_outputs_mcp_server.cjs script. The setup action copies it
+ * into ${RUNNER_TEMP}/gh-aw/actions/ alongside this driver; fall back to
+ * resolving via __dirname for local-execution / tests.
+ * @returns {string}
+ */
+function resolveMcpServerPath() {
+  const candidates = [
+    path.join(__dirname, "safe_outputs_mcp_server.cjs"),
+    process.env.RUNNER_TEMP ? path.join(process.env.RUNNER_TEMP, "gh-aw", "actions", "safe_outputs_mcp_server.cjs") : null,
+    process.env.RUNNER_TEMP ? path.join(process.env.RUNNER_TEMP, "gh-aw", "safeoutputs", "safe_outputs_mcp_server.cjs") : null,
+  ].filter(/** @returns {p is string} */ p => typeof p === "string");
+  for (const candidate of candidates) {
+    if (fs.existsSync(candidate)) {
+      return candidate;
+    }
+  }
+  throw new Error(`apply_samples: could not locate safe_outputs_mcp_server.cjs. Looked in: ${candidates.join(", ")}`);
+}
+
+/**
+ * Append a synthetic terminal_reason: completed marker to the engine stdio log
+ * so downstream parsers / handle_agent_failure recognize the replay as a
+ * successful agent run.
+ * @param {string} logPath
+ * @param {number} sampleCount
+ */
+function writeSyntheticStdioLog(logPath, sampleCount) {
+  if (!logPath) return;
+  try {
+    fs.mkdirSync(path.dirname(logPath), { recursive: true });
+  } catch {
+    /* ignore */
+  }
+  const lines = [
+    `gh-aw samples replay: ${sampleCount} MCP tools/call invocation(s) completed deterministically.`,
+    JSON.stringify({
+      type: "result",
+      subtype: "success",
+      terminal_reason: "completed",
+      num_turns: sampleCount,
+      driver: "apply_samples",
+    }),
+    "",
+  ];
+  fs.appendFileSync(logPath, lines.join("\n"));
+}
+
+async function main() {
+  const samples = loadSamples();
+  const workspace = process.env.GITHUB_WORKSPACE || process.cwd();
+  const logPath = process.env.GH_AW_AGENT_STDIO_LOG || "";
+
+  // Pre-stage branches/patches.
+  samples.forEach((sample, i) => {
+    if (PATCH_SIDECAR_TOOLS.has(sample.tool)) {
+      preStagePatch(sample, i, workspace);
+    }
+  });
+
+  if (samples.length === 0) {
+    console.error("apply_samples: nothing to replay; exiting cleanly.");
+    writeSyntheticStdioLog(logPath, 0);
+    return;
+  }
+
+  const serverPath = resolveMcpServerPath();
+  console.error(`apply_samples: spawning MCP server ${serverPath}`);
+  const child = spawn(process.execPath, [serverPath], {
+    stdio: ["pipe", "pipe", "inherit"],
+    env: process.env,
+  });
+
+  const stdoutIter = lineIterator(child.stdout);
+  let nextId = 1;
+  const failures = [];
+
+  try {
+    // Initialize handshake.
+    const initRsp = await sendJsonRpc(
+      child,
+      child.stdin,
+      {
+        jsonrpc: "2.0",
+        id: nextId++,
+        method: "initialize",
+        params: {
+          protocolVersion: "2025-06-18",
+          capabilities: {},
+          clientInfo: { name: "apply_samples", version: "1.0.0" },
+        },
+      },
+      stdoutIter
+    );
+    if (initRsp.error) {
+      throw new Error(`MCP initialize failed: ${JSON.stringify(initRsp.error)}`);
+    }
+
+    // Send one tools/call per sample.
+    for (const [i, sample] of samples.entries()) {
+      const callRsp = await sendJsonRpc(
+        child,
+        child.stdin,
+        {
+          jsonrpc: "2.0",
+          id: nextId++,
+          method: "tools/call",
+          params: { name: sample.tool, arguments: sample.arguments },
+        },
+        stdoutIter
+      );
+      if (callRsp.error) {
+        failures.push(`sample[${i}] (tool=${sample.tool}): ${JSON.stringify(callRsp.error)}`);
+        continue;
+      }
+      const result = callRsp.result;
+      if (result && result.isError) {
+        const text = result.content && result.content[0] && result.content[0].text;
+        failures.push(`sample[${i}] (tool=${sample.tool}): ${text || JSON.stringify(result)}`);
+      } else {
+        console.error(`apply_samples: sample[${i}] (tool=${sample.tool}) ok`);
+      }
+    }
+  } finally {
+    try {
+      child.stdin.end();
+    } catch {
+      /* ignore */
+    }
+    // Give the server up to 2s to exit cleanly.
+    await new Promise(resolve => {
+      const timer = setTimeout(() => {
+        try {
+          child.kill("SIGTERM");
+        } catch {
+          /* ignore */
+        }
+        resolve(undefined);
+      }, 2000);
+      child.once("exit", () => {
+        clearTimeout(timer);
+        resolve(undefined);
+      });
+    });
+  }
+
+  writeSyntheticStdioLog(logPath, samples.length);
+
+  if (failures.length > 0) {
+    throw new Error(`apply_samples: ${failures.length} sample(s) failed:\n  - ${failures.join("\n  - ")}`);
+  }
+  console.error(`apply_samples: ${samples.length} sample(s) replayed successfully.`);
+}
+
+if (require.main === module) {
+  main().catch(err => {
+    console.error(err && err.stack ? err.stack : String(err));
+    process.exit(1);
+  });
+}
+
+module.exports = { main, loadSamples, preStagePatch, resolveMcpServerPath };
diff --git a/actions/setup/js/apply_samples.test.cjs b/actions/setup/js/apply_samples.test.cjs
new file mode 100644
index 00000000000..a2210c54761
--- /dev/null
+++ b/actions/setup/js/apply_samples.test.cjs
@@ -0,0 +1,115 @@
+// @ts-check
+//
+// apply_samples.test.cjs
+//
+// Smoke test for the deterministic samples replay driver. Spawns the
+// driver as a subprocess (so it actually launches the real MCP server) and
+// asserts that:
+//   - the driver exits 0
+//   - the MCP server appends the expected JSONL entry to GH_AW_SAFE_OUTPUTS
+//   - the synthetic agent-stdio log includes a `terminal_reason: completed` marker
+//
+// Tests intentionally use the simplest safe-output tool (`create_issue`) so we
+// do not need to set up a git working tree for patch sidecars.
+
+import { describe, it, expect, beforeAll } from "vitest";
+import { spawnSync } from "child_process";
+import fs from "fs";
+import path from "path";
+import os from "os";
+import { fileURLToPath } from "url";
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
+const driverPath = path.join(__dirname, "apply_samples.cjs");
+
+function makeTempDir(prefix) {
+  return fs.mkdtempSync(path.join(os.tmpdir(), prefix));
+}
+
+describe.sequential("apply_samples.cjs", () => {
+  let tempDir;
+  let configPath;
+  let outputsPath;
+  let logPath;
+
+  beforeAll(() => {
+    tempDir = makeTempDir("gh-aw-apply-samples-");
+    configPath = path.join(tempDir, "config.json");
+    outputsPath = path.join(tempDir, "outputs.jsonl");
+    logPath = path.join(tempDir, "agent-stdio.log");
+
+    // Minimal safe-outputs config enabling only the `create_issue` tool. The
+    // bootstrap loader keys off the snake-case keys present here.
+    fs.writeFileSync(
+      configPath,
+      JSON.stringify({
+        create_issue: { max: 1 },
+      })
+    );
+  });
+
+  it("replays a create_issue sample through the real MCP server and emits a completed marker", () => {
+    const samples = [
+      {
+        tool: "create_issue",
+        arguments: {
+          title: "Deterministic sample issue",
+          body: "This issue was emitted by the apply_samples driver during a unit test.",
+        },
+      },
+    ];
+
+    const result = spawnSync(process.execPath, [driverPath], {
+      env: {
+        ...process.env,
+        GH_AW_SAMPLES: JSON.stringify(samples),
+        GH_AW_SAFE_OUTPUTS_CONFIG_PATH: configPath,
+        GH_AW_SAFE_OUTPUTS: outputsPath,
+        GH_AW_AGENT_STDIO_LOG: logPath,
+      },
+      encoding: "utf8",
+      timeout: 15000,
+    });
+
+    if (result.status !== 0) {
+      // Surface stderr so failures are diagnosable in CI.
+      throw new Error(`driver exited with status ${result.status}\nstderr:\n${result.stderr}\nstdout:\n${result.stdout}`);
+    }
+
+    expect(fs.existsSync(outputsPath)).toBe(true);
+    const outputLines = fs
+      .readFileSync(outputsPath, "utf8")
+      .split("\n")
+      .filter(line => line.trim().length > 0);
+    expect(outputLines.length).toBeGreaterThanOrEqual(1);
+
+    const firstEntry = JSON.parse(outputLines[0]);
+    expect(firstEntry.type).toBe("create_issue");
+    expect(firstEntry.title).toBe("Deterministic sample issue");
+
+    expect(fs.existsSync(logPath)).toBe(true);
+    const logText = fs.readFileSync(logPath, "utf8");
+    expect(logText).toContain("terminal_reason");
+    expect(logText).toContain("completed");
+  });
+
+  it("exits cleanly when GH_AW_SAMPLES is empty", () => {
+    const result = spawnSync(process.execPath, [driverPath], {
+      env: {
+        ...process.env,
+        GH_AW_SAMPLES: "[]",
+        GH_AW_SAFE_OUTPUTS_CONFIG_PATH: configPath,
+        GH_AW_SAFE_OUTPUTS: outputsPath,
+        GH_AW_AGENT_STDIO_LOG: path.join(tempDir, "empty-log.log"),
+      },
+      encoding: "utf8",
+      timeout: 10000,
+    });
+
+    expect(result.status).toBe(0);
+    const logText = fs.readFileSync(path.join(tempDir, "empty-log.log"), "utf8");
+    expect(logText).toContain("terminal_reason");
+  });
+});
diff --git a/cmd/gh-aw/main.go b/cmd/gh-aw/main.go
index 895fdb2beed..27748d7cee5 100644
--- a/cmd/gh-aw/main.go
+++ b/cmd/gh-aw/main.go
@@ -304,6 +304,7 @@ Examples:
 		priorManifestFile, _ := cmd.Flags().GetString("prior-manifest-file")
 		ghes, _ := cmd.Flags().GetBool("ghes")
 		verbose, _ := cmd.Flags().GetBool("verbose")
+		useSamples, _ := cmd.Flags().GetBool("use-samples")
 		if err := validateEngine(engineOverride); err != nil {
 			return err
 		}
@@ -364,6 +365,7 @@ Examples:
 			ValidateImages:         validateImages,
 			PriorManifestFile:      priorManifestFile,
 			GHESCompat:             ghes,
+			UseSamples:             useSamples,
 		}
 		if _, err := cli.CompileWorkflows(cmd.Context(), config); err != nil {
 			// Return error as-is without additional formatting
@@ -703,6 +705,8 @@ Use "` + string(constants.CLIExtensionPrefix) + ` help all" to show help for all
 	compileCmd.Flags().Bool("strict", false, "Override frontmatter to enforce strict mode validation for all workflows (enforces action pinning, network config, safe-outputs, refuses write permissions and deprecated fields). Note: Workflows default to strict mode unless frontmatter sets strict: false")
 	compileCmd.Flags().Bool("trial", false, "Enable trial mode compilation (modifies workflows for trial execution)")
 	compileCmd.Flags().String("logical-repo", "", "Repository to simulate workflow execution against (for trial mode)")
+	compileCmd.Flags().Bool("use-samples", false, "Hidden: replace the agentic 'Execute coding agent' step with a deterministic driver that replays the workflow's safe-outputs `samples` frontmatter entries through the safe-outputs MCP server. Used to make end-to-end tests deterministic.")
+	_ = compileCmd.Flags().MarkHidden("use-samples")
 	compileCmd.Flags().Bool("dependabot", false, "Generate dependency manifests (package.json, requirements.txt, go.mod) and Dependabot config when dependencies are detected")
 	compileCmd.Flags().Bool("force", false, "Force overwrite of existing dependency files (e.g., dependabot.yml)")
 	compileCmd.Flags().Bool("refresh-stop-time", false, "Force regeneration of stop-after times instead of preserving existing values from lock files")
diff --git a/pkg/cli/compile_compiler_setup.go b/pkg/cli/compile_compiler_setup.go
index bdc672aea13..f5853e752ec 100644
--- a/pkg/cli/compile_compiler_setup.go
+++ b/pkg/cli/compile_compiler_setup.go
@@ -147,6 +147,12 @@ func configureCompilerFlags(compiler *workflow.Compiler, config CompileConfig) {
 		}
 	}
 
+	// Replace the agentic step with a deterministic samples replay driver when requested (hidden feature).
+	if config.UseSamples {
+		compileCompilerSetupLog.Print("Enabling --use-samples: agentic step will be replaced by a deterministic replay driver")
+		compiler.SetUseSamples(true)
+	}
+
 	// Set refresh stop time flag
 	compiler.SetRefreshStopTime(config.RefreshStopTime)
 	if config.RefreshStopTime {
diff --git a/pkg/cli/compile_config.go b/pkg/cli/compile_config.go
index 24206c2d583..901c3ccb27e 100644
--- a/pkg/cli/compile_config.go
+++ b/pkg/cli/compile_config.go
@@ -13,6 +13,7 @@ type CompileConfig struct {
 	Purge                  bool     // Remove orphaned lock files
 	TrialMode              bool     // Enable trial mode (suppress safe outputs)
 	TrialLogicalRepoSlug   string   // Target repository for trial mode
+	UseSamples             bool     // Hidden: replace agentic step with a deterministic samples replay driver
 	Strict                 bool     // Enable strict mode validation
 	Dependabot             bool     // Generate Dependabot manifests for npm dependencies
 	ForceOverwrite         bool     // Force overwrite of existing files (dependabot.yml)
diff --git a/pkg/parser/schemas/main_workflow_schema.json b/pkg/parser/schemas/main_workflow_schema.json
index 27e69031485..9be825d9a2a 100644
--- a/pkg/parser/schemas/main_workflow_schema.json
+++ b/pkg/parser/schemas/main_workflow_schema.json
@@ -4394,6 +4394,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "additionalProperties": false
@@ -4984,6 +5000,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "additionalProperties": false,
@@ -5062,6 +5094,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "additionalProperties": false
@@ -5117,6 +5165,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "additionalProperties": false
@@ -5258,6 +5322,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "additionalProperties": false,
@@ -5378,6 +5458,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "additionalProperties": false
@@ -5428,6 +5524,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "additionalProperties": false,
@@ -5558,6 +5670,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "additionalProperties": false,
@@ -5657,6 +5785,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "additionalProperties": false,
@@ -5742,6 +5886,22 @@
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
                 },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
+                },
                 "github-token": {
                   "$ref": "#/$defs/github_token",
                   "description": "GitHub token to use for this specific output type. Overrides global github-token if specified."
@@ -5814,6 +5974,22 @@
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
                 },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
+                },
                 "state-reason": {
                   "type": "string",
                   "enum": ["completed", "not_planned", "duplicate"],
@@ -5900,6 +6076,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "additionalProperties": false,
@@ -5981,6 +6173,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "additionalProperties": false,
@@ -6097,6 +6305,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "additionalProperties": false,
@@ -6472,6 +6696,22 @@
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
                 },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
+                },
                 "allow-workflows": {
                   "type": "boolean",
                   "description": "When true, adds workflows: write to the GitHub App token permissions. Required when allowed-files targets .github/workflows/ paths. Requires safe-outputs.github-app to be configured because the workflows permission is a GitHub App-only permission and cannot be granted via GITHUB_TOKEN.",
@@ -6551,6 +6791,22 @@
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
                 },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
+                },
                 "required-labels": {
                   "type": "array",
                   "items": {
@@ -6644,6 +6900,22 @@
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
                 },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
+                },
                 "required-labels": {
                   "type": "array",
                   "items": {
@@ -6715,6 +6987,22 @@
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
                 },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
+                },
                 "required-labels": {
                   "type": "array",
                   "items": {
@@ -6781,6 +7069,22 @@
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
                 },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
+                },
                 "required-labels": {
                   "type": "array",
                   "items": {
@@ -6845,6 +7149,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "additionalProperties": false
@@ -6884,6 +7204,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "additionalProperties": false
@@ -6928,6 +7264,22 @@
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
                 },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
+                },
                 "github-app": {
                   "$ref": "#/$defs/github_app",
                   "description": "GitHub App credentials for minting an installation access token scoped to checks:write for this handler. When set, a short-lived token is minted before the handler runs and revoked afterwards."
@@ -7037,6 +7389,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "additionalProperties": false
@@ -7122,6 +7490,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "additionalProperties": false
@@ -7231,6 +7615,22 @@
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
                 },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
+                },
                 "required-labels": {
                   "type": "array",
                   "items": {
@@ -7305,6 +7705,22 @@
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
                 },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
+                },
                 "required-labels": {
                   "type": "array",
                   "items": {
@@ -7412,6 +7828,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "additionalProperties": false
@@ -7486,6 +7918,22 @@
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
                 },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
+                },
                 "required-labels": {
                   "type": "array",
                   "items": {
@@ -7565,6 +8013,22 @@
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
                 },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
+                },
                 "required-labels": {
                   "type": "array",
                   "items": {
@@ -7656,6 +8120,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "additionalProperties": false
@@ -7730,6 +8210,22 @@
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
                 },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
+                },
                 "required-labels": {
                   "type": "array",
                   "items": {
@@ -7818,6 +8314,22 @@
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
                 },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
+                },
                 "required-labels": {
                   "type": "array",
                   "items": {
@@ -8016,6 +8528,22 @@
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
                 },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
+                },
                 "github-token-for-extra-empty-commit": {
                   "type": "string",
                   "description": "Token used to push an empty commit after pushing changes to trigger CI events. Works around the GITHUB_TOKEN limitation where pushes don't trigger workflow runs. Defaults to the magic secret GH_AW_CI_TRIGGER_TOKEN if set in the repository. Use a secret expression (e.g. '${{ secrets.CI_TOKEN }}') for a custom token, or 'app' for GitHub App auth."
@@ -8201,6 +8729,22 @@
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
                 },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
+                },
                 "required-labels": {
                   "type": "array",
                   "items": {
@@ -8275,6 +8819,22 @@
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
                 },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
+                },
                 "required-labels": {
                   "type": "array",
                   "items": {
@@ -8347,6 +8907,22 @@
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
                 },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
+                },
                 "required-labels": {
                   "type": "array",
                   "items": {
@@ -8412,6 +8988,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "required": ["workflows"],
@@ -8568,6 +9160,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "required": ["workflows"],
@@ -8632,6 +9240,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "additionalProperties": false
@@ -8694,6 +9318,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "additionalProperties": false
@@ -8744,6 +9384,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "additionalProperties": false
@@ -8809,6 +9465,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "additionalProperties": false
@@ -8961,6 +9633,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "additionalProperties": false
@@ -9654,6 +10342,22 @@
                   "type": "boolean",
                   "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)",
                   "examples": [true, false]
+                },
+                "samples": {
+                  "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.",
+                  "oneOf": [
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "additionalProperties": true
+                      }
+                    },
+                    {
+                      "type": "object",
+                      "additionalProperties": true
+                    }
+                  ]
                 }
               },
               "additionalProperties": false
diff --git a/pkg/workflow/compiler_types.go b/pkg/workflow/compiler_types.go
index 9c0639c4977..4ea0310b547 100644
--- a/pkg/workflow/compiler_types.go
+++ b/pkg/workflow/compiler_types.go
@@ -70,6 +70,7 @@ type Compiler struct {
 	forceStaged             bool                     // If true, force all safe-outputs into staged mode
 	trialMode               bool                     // If true, suppress safe outputs for trial mode execution
 	trialLogicalRepoSlug    string                   // If set in trial mode, the logical repository to checkout
+	useSamples              bool                     // If true, replace the agentic step with a deterministic samples replay driver (hidden feature)
 	refreshStopTime         bool                     // If true, regenerate stop-after times instead of preserving existing ones
 	forceRefreshActionPins  bool                     // If true, clear action cache and resolve all actions from GitHub API
 	failFast                bool                     // If true, stop at first validation error instead of collecting all errors
@@ -203,6 +204,14 @@ func (c *Compiler) SetTrialLogicalRepoSlug(repo string) {
 	c.trialLogicalRepoSlug = repo
 }
 
+// SetUseSamples configures whether to replace the agentic step with a
+// deterministic replay driver that feeds `samples` entries to the safe-outputs
+// MCP server via real `tools/call` JSON-RPC. Hidden feature used by
+// `gh aw compile --use-samples`.
+func (c *Compiler) SetUseSamples(use bool) {
+	c.useSamples = use
+}
+
 // SetStrictMode configures whether to enable strict validation mode
 func (c *Compiler) SetStrictMode(strict bool) {
 	c.strictMode = strict
@@ -446,6 +455,7 @@ type WorkflowData struct {
 	WorkflowID                     string         // workflow identifier derived from markdown filename (basename without extension)
 	TrialMode                      bool           // whether the workflow is running in trial mode
 	TrialLogicalRepo               string         // target repository slug for trial mode (owner/repo)
+	UseSamples                     bool           // whether the agentic step should be replaced by a deterministic samples replay driver (hidden feature)
 	FrontmatterName                string         // name field from frontmatter (for code scanning alert driver default)
 	FrontmatterEmoji               string         // emoji field from frontmatter (for display in footers and UI)
 	FrontmatterYAML                string         // raw frontmatter YAML content (rendered as comment in lock file for reference)
@@ -627,6 +637,8 @@ type BaseSafeOutputConfig struct {
 	GitHubApp                *GitHubAppConfig `yaml:"github-app,omitempty"`                 // GitHub App credentials for minting a per-handler installation access token
 	Staged                   bool             `yaml:"staged,omitempty"`                     // If true, emit step summary messages instead of making GitHub API calls for this specific output type
 	NormalizeClosingKeywords *bool            `yaml:"normalize-closing-keywords,omitempty"` // When true for this output type, strip backticks from recognized issue-closing keywords in body fields.
+	// Samples carries deterministic replay samples for the hidden `gh aw compile --use-samples` flag. Each entry is the JSON object passed to the corresponding MCP tool's `tools/call` arguments. Sample-only sidecar fields (e.g. `patch` for create_pull_request) are stripped before the call and used by the replay driver.
+	Samples []map[string]any `yaml:"samples,omitempty"`
 }
 
 // SafeOutputsConfig holds configuration for automatic output routes
diff --git a/pkg/workflow/compiler_validators.go b/pkg/workflow/compiler_validators.go
index 316ba6a8b27..d1286dcd4bb 100644
--- a/pkg/workflow/compiler_validators.go
+++ b/pkg/workflow/compiler_validators.go
@@ -152,6 +152,7 @@ func (c *Compiler) validateCoreToolConfiguration(workflowData *WorkflowData, mar
 		{logMessage: "Validating sandbox configuration", validateFn: func() error { return validateSandboxConfig(workflowData) }},
 		{logMessage: "Validating safe-outputs target fields", validateFn: func() error { return validateSafeOutputsTarget(workflowData.SafeOutputs) }},
 		{logMessage: "Validating safe-outputs max fields", validateFn: func() error { return validateSafeOutputsMax(workflowData.SafeOutputs) }},
+		{logMessage: "Validating safe-outputs samples entries against MCP tool schemas", validateFn: func() error { return validateSafeOutputsSamples(workflowData.SafeOutputs) }},
 		{logMessage: "Validating safe-outputs allowed-domains", validateFn: func() error { return c.validateSafeOutputsAllowedDomains(workflowData.SafeOutputs) }},
 		{logMessage: "Validating safe-outputs merge-pull-request", validateFn: func() error { return validateSafeOutputsMergePullRequest(workflowData.SafeOutputs) }},
 		{logMessage: "Validating safe-outputs needs declarations", validateFn: func() error { return validateSafeOutputsNeeds(workflowData) }},
diff --git a/pkg/workflow/compiler_yaml_ai_execution.go b/pkg/workflow/compiler_yaml_ai_execution.go
index b3b6826b74a..48473d9c883 100644
--- a/pkg/workflow/compiler_yaml_ai_execution.go
+++ b/pkg/workflow/compiler_yaml_ai_execution.go
@@ -9,6 +9,14 @@ import (
 
 // generateEngineExecutionSteps generates the GitHub Actions steps for executing the AI engine
 func (c *Compiler) generateEngineExecutionSteps(yaml *strings.Builder, data *WorkflowData, engine CodingAgentEngine, logFile string) {
+	// --use-samples (hidden) replaces the agent step with a deterministic driver
+	// that replays the workflow's safe-outputs `samples` frontmatter entries
+	// through the safe-outputs MCP server. The engine is never invoked.
+	if data.UseSamples {
+		compilerYamlLog.Printf("Replacing engine execution with samples replay driver: engine=%s", engine.GetID())
+		c.generateSamplesReplayStep(yaml, data, logFile)
+		return
+	}
 
 	steps := engine.GetExecutionSteps(data, logFile)
 	compilerYamlLog.Printf("Generating engine execution steps: engine=%s, steps=%d", engine.GetID(), len(steps))
diff --git a/pkg/workflow/safe_outputs_config.go b/pkg/workflow/safe_outputs_config.go
index 58dc548537a..e3ad6fb024e 100644
--- a/pkg/workflow/safe_outputs_config.go
+++ b/pkg/workflow/safe_outputs_config.go
@@ -9,6 +9,7 @@ import (
 	"github.com/github/gh-aw/pkg/logger"
 	"github.com/github/gh-aw/pkg/sliceutil"
 	"github.com/github/gh-aw/pkg/typeutil"
+	"go.yaml.in/yaml/v3"
 )
 
 var safeOutputsConfigLog = logger.New("workflow:safe_outputs_config")
@@ -755,6 +756,61 @@ func (c *Compiler) parseBaseSafeOutputConfig(configMap map[string]any, config *B
 			config.Staged = stagedBool
 		}
 	}
+
+	// Parse samples list (hidden feature: deterministic replay samples for --use-samples).
+	// Accepts either a YAML list of objects, or a single object that is auto-wrapped
+	// into a one-element list, or a YAML string scalar containing a list (for
+	// authoring convenience with `|` block scalars in frontmatter).
+	if samples, exists := configMap["samples"]; exists {
+		parsed := parseSamplesValue(samples)
+		if len(parsed) > 0 {
+			safeOutputsConfigLog.Printf("Parsed %d samples entries", len(parsed))
+			config.Samples = parsed
+		}
+	}
+}
+
+// parseSamplesValue normalizes a `samples` frontmatter value into a list of
+// objects. Accepted shapes (most-permissive first):
+//   - YAML list of mappings: returned as-is
+//   - single YAML mapping: wrapped into a one-element list
+//   - YAML string containing a list/mapping (authoring with `|` block scalar):
+//     parsed as YAML and re-normalized
+//
+// Any other shape returns an empty slice — schema validation will then report
+// "no samples found".
+func parseSamplesValue(samples any) []map[string]any {
+	switch v := samples.(type) {
+	case []any:
+		out := make([]map[string]any, 0, len(v))
+		for _, item := range v {
+			if m, ok := item.(map[string]any); ok {
+				out = append(out, m)
+			} else if mStr, ok := item.(map[string]string); ok {
+				converted := make(map[string]any, len(mStr))
+				for k, s := range mStr {
+					converted[k] = s
+				}
+				out = append(out, converted)
+			}
+		}
+		return out
+	case map[string]any:
+		return []map[string]any{v}
+	case string:
+		trimmed := strings.TrimSpace(v)
+		if trimmed == "" {
+			return nil
+		}
+		var nested any
+		if err := yaml.Unmarshal([]byte(trimmed), &nested); err != nil {
+			safeOutputsConfigLog.Printf("Failed to parse samples string as YAML: %v", err)
+			return nil
+		}
+		return parseSamplesValue(nested)
+	default:
+		return nil
+	}
 }
 
 // SafeOutputStepConfig holds configuration for building a single safe output step
diff --git a/pkg/workflow/samples_replay.go b/pkg/workflow/samples_replay.go
new file mode 100644
index 00000000000..c0f49d804a8
--- /dev/null
+++ b/pkg/workflow/samples_replay.go
@@ -0,0 +1,103 @@
+package workflow
+
+import (
+	"encoding/json"
+	"fmt"
+	"sort"
+	"strings"
+)
+
+// SampleEntry is the per-call payload consumed by apply_samples.cjs.
+// Each entry corresponds to a single MCP `tools/call` invocation.
+type SampleEntry struct {
+	// Tool is the snake_case MCP tool name (e.g. "create_pull_request").
+	Tool string `json:"tool"`
+	// Arguments are passed verbatim as the MCP `tools/call` arguments.
+	// Sample sidecar fields (e.g. `patch`) have already been stripped.
+	Arguments map[string]any `json:"arguments"`
+	// Sidecars carries fields stripped from Arguments that need out-of-band
+	// pre-staging by the driver (e.g. `patch` for create_pull_request).
+	Sidecars map[string]any `json:"sidecars,omitempty"`
+}
+
+// collectSampleEntries walks the safe-outputs config and flattens every
+// configured `samples` entry into the order they will be sent to the MCP
+// server. Iteration order is deterministic (sorted by struct field name) so
+// that compiled YAML is stable across runs.
+func collectSampleEntries(config *SafeOutputsConfig) []SampleEntry {
+	if config == nil {
+		return nil
+	}
+
+	fieldNames := make([]string, 0, len(safeOutputFieldMapping))
+	for fieldName := range safeOutputFieldMapping {
+		fieldNames = append(fieldNames, fieldName)
+	}
+	sort.Strings(fieldNames)
+
+	var entries []SampleEntry
+	for _, fieldName := range fieldNames {
+		toolName := safeOutputFieldMapping[fieldName]
+		base := extractBaseSafeOutputConfig(config, fieldName)
+		if base == nil || len(base.Samples) == 0 {
+			continue
+		}
+		sidecarKeys := sampleSidecarFields[toolName]
+		for _, sample := range base.Samples {
+			args := make(map[string]any, len(sample))
+			var sidecars map[string]any
+			for k, v := range sample {
+				if sidecarKeys[k] {
+					if sidecars == nil {
+						sidecars = make(map[string]any)
+					}
+					sidecars[k] = v
+					continue
+				}
+				args[k] = v
+			}
+			entries = append(entries, SampleEntry{
+				Tool:      toolName,
+				Arguments: args,
+				Sidecars:  sidecars,
+			})
+		}
+	}
+	return entries
+}
+
+// generateSamplesReplayStep emits the YAML that replaces the agentic
+// `Execute coding agent` step when the hidden `gh aw compile --use-samples`
+// flag is used. It spawns the safe-outputs MCP server over stdio and feeds it
+// a `tools/call` for every collected sample, after pre-staging branches/patches
+// for samples that carry them.
+func (c *Compiler) generateSamplesReplayStep(yaml *strings.Builder, data *WorkflowData, logFile string) {
+	entries := collectSampleEntries(data.SafeOutputs)
+	compilerYamlLog.Printf("Generating samples replay step: entries=%d", len(entries))
+
+	// Serialize entries to JSON for the driver. Always emit valid JSON even when
+	// empty so the driver can produce a clear `no samples configured` message
+	// rather than crashing on an empty env var.
+	payload, err := json.Marshal(entries)
+	if err != nil {
+		// Should never happen for map[string]any payloads; fall back to empty
+		// array so the workflow still compiles and the driver reports cleanly.
+		compilerYamlLog.Printf("Warning: failed to marshal samples entries: %v", err)
+		payload = []byte("[]")
+	}
+
+	yaml.WriteString("      - name: Replay safe-outputs samples (deterministic)\n")
+	yaml.WriteString("        id: agentic_execution\n")
+	yaml.WriteString("        env:\n")
+	yaml.WriteString("          GH_AW_SAMPLES: |\n")
+	for _, line := range strings.Split(string(payload), "\n") {
+		fmt.Fprintf(yaml, "            %s\n", line)
+	}
+	fmt.Fprintf(yaml, "          GH_AW_AGENT_STDIO_LOG: %s\n", logFile)
+	yaml.WriteString("          GH_AW_SAFE_OUTPUTS_CONFIG_PATH: ${{ runner.temp }}/gh-aw/safeoutputs/config.json\n")
+	yaml.WriteString("          GH_AW_SAFE_OUTPUTS: ${{ runner.temp }}/gh-aw/safeoutputs/outputs.jsonl\n")
+	yaml.WriteString("        run: |\n")
+	yaml.WriteString("          set -euo pipefail\n")
+	yaml.WriteString("          mkdir -p \"$(dirname \"$GH_AW_AGENT_STDIO_LOG\")\"\n")
+	yaml.WriteString("          node \"${{ runner.temp }}/gh-aw/actions/apply_samples.cjs\"\n")
+}
diff --git a/pkg/workflow/samples_replay_test.go b/pkg/workflow/samples_replay_test.go
new file mode 100644
index 00000000000..37096b20b00
--- /dev/null
+++ b/pkg/workflow/samples_replay_test.go
@@ -0,0 +1,98 @@
+//go:build integration
+
+package workflow
+
+import (
+	"os"
+	"strings"
+	"testing"
+)
+
+// TestUseSamplesReplacesAgentStep verifies that compiling with
+// SetUseSamples(true) replaces the engine `Execute coding agent` step
+// with the deterministic `Replay safe-outputs samples` step driven by
+// apply_samples.cjs.
+func TestUseSamplesReplacesAgentStep(t *testing.T) {
+	const md = `---
+on:
+  workflow_dispatch:
+permissions: read-all
+engine:
+  id: claude
+safe-outputs:
+  create-issue:
+    samples:
+      - title: "Deterministic test issue"
+        body: "Issue body emitted by gh-aw samples replay."
+---
+
+Trivial workflow whose only job is to be compiled with --use-samples.
+`
+
+	tmpFile, err := os.CreateTemp("", "use-samples-*.md")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.Remove(tmpFile.Name())
+	if _, err := tmpFile.WriteString(md); err != nil {
+		t.Fatal(err)
+	}
+	tmpFile.Close()
+
+	t.Run("Default Mode", func(t *testing.T) {
+		compiler := NewCompiler()
+		if err := compiler.CompileWorkflow(tmpFile.Name()); err != nil {
+			t.Fatalf("compile failed: %v", err)
+		}
+		lockPath := strings.TrimSuffix(tmpFile.Name(), ".md") + ".lock.yml"
+		defer os.Remove(lockPath)
+		b, err := os.ReadFile(lockPath)
+		if err != nil {
+			t.Fatalf("read lock: %v", err)
+		}
+		lockContent := string(b)
+		if strings.Contains(lockContent, "Replay safe-outputs samples") {
+			t.Error("Did not expect samples replay step in default mode")
+		}
+		if strings.Contains(lockContent, "apply_samples.cjs") {
+			t.Error("Did not expect apply_samples driver in default mode")
+		}
+	})
+
+	t.Run("Use Samples Mode", func(t *testing.T) {
+		compiler := NewCompiler()
+		compiler.SetUseSamples(true)
+		if err := compiler.CompileWorkflow(tmpFile.Name()); err != nil {
+			t.Fatalf("compile failed: %v", err)
+		}
+		workflowData, err := compiler.ParseWorkflowFile(tmpFile.Name())
+		if err != nil {
+			t.Fatalf("ParseWorkflowFile failed: %v", err)
+		}
+		if !workflowData.UseSamples {
+			t.Fatal("Expected workflowData.UseSamples to be true after SetUseSamples(true)")
+		}
+		lockPath := strings.TrimSuffix(tmpFile.Name(), ".md") + ".lock.yml"
+		defer os.Remove(lockPath)
+		b, _ := os.ReadFile(lockPath)
+		lockContent := string(b)
+		if !strings.Contains(lockContent, "Replay safe-outputs samples (deterministic)") {
+			t.Error("Expected `Replay safe-outputs samples (deterministic)` step in lock file")
+		}
+		if !strings.Contains(lockContent, "apply_samples.cjs") {
+			t.Error("Expected lock file to invoke apply_samples.cjs driver")
+		}
+		if !strings.Contains(lockContent, "GH_AW_SAMPLES:") {
+			t.Error("Expected GH_AW_SAMPLES env var in lock file")
+		}
+		if !strings.Contains(lockContent, `"tool":"create_issue"`) {
+			t.Error("Expected JSON-encoded create_issue tool entry in lock file")
+		}
+		if !strings.Contains(lockContent, "Deterministic test issue") {
+			t.Error("Expected sample title in lock file")
+		}
+		if !strings.Contains(lockContent, "id: agentic_execution") {
+			t.Error("Expected id: agentic_execution on the replay step")
+		}
+	})
+}
diff --git a/pkg/workflow/samples_validation.go b/pkg/workflow/samples_validation.go
new file mode 100644
index 00000000000..79bfbc0f5f0
--- /dev/null
+++ b/pkg/workflow/samples_validation.go
@@ -0,0 +1,166 @@
+package workflow
+
+import (
+	"encoding/json"
+	"fmt"
+	"reflect"
+	"sort"
+	"strings"
+	"sync"
+
+	"github.com/santhosh-tekuri/jsonschema/v6"
+)
+
+// sampleSidecarFields lists fields recognized inside a `samples` entry
+// that are NOT passed to the MCP tool's `tools/call` arguments. They are stripped
+// from the sample before schema validation and consumed by the replay driver
+// (e.g. to pre-stage a branch + patch on disk).
+var sampleSidecarFields = map[string]map[string]bool{
+	"create_pull_request": {
+		"patch": true,
+	},
+	"push_to_pull_request_branch": {
+		"patch": true,
+	},
+}
+
+// compiledToolSchemas caches the per-tool jsonschema.Schema parsed from the
+// embedded safe_outputs_tools.json. Compiled lazily on first use.
+var (
+	compiledToolSchemasOnce sync.Once
+	compiledToolSchemas     map[string]*jsonschema.Schema
+	compiledToolSchemasErr  error
+)
+
+func getCompiledToolSchemas() (map[string]*jsonschema.Schema, error) {
+	compiledToolSchemasOnce.Do(func() {
+		var tools []struct {
+			Name        string          `json:"name"`
+			InputSchema json.RawMessage `json:"inputSchema"`
+		}
+		if err := json.Unmarshal([]byte(safeOutputsToolsJSONContent), &tools); err != nil {
+			compiledToolSchemasErr = fmt.Errorf("failed to parse safe_outputs_tools.json for samples validation: %w", err)
+			return
+		}
+		out := make(map[string]*jsonschema.Schema, len(tools))
+		for _, t := range tools {
+			if len(t.InputSchema) == 0 {
+				continue
+			}
+			var schemaDoc any
+			if err := json.Unmarshal(t.InputSchema, &schemaDoc); err != nil {
+				compiledToolSchemasErr = fmt.Errorf("failed to parse inputSchema for tool %q: %w", t.Name, err)
+				return
+			}
+			compiler := jsonschema.NewCompiler()
+			schemaURL := fmt.Sprintf("inmem://safe-outputs-tools/%s.json", t.Name)
+			if err := compiler.AddResource(schemaURL, schemaDoc); err != nil {
+				compiledToolSchemasErr = fmt.Errorf("failed to add schema resource for tool %q: %w", t.Name, err)
+				return
+			}
+			schema, err := compiler.Compile(schemaURL)
+			if err != nil {
+				compiledToolSchemasErr = fmt.Errorf("failed to compile inputSchema for tool %q: %w", t.Name, err)
+				return
+			}
+			out[t.Name] = schema
+		}
+		compiledToolSchemas = out
+	})
+	return compiledToolSchemas, compiledToolSchemasErr
+}
+
+// validateSafeOutputsSamples validates every `samples` entry on every
+// enabled safe-output handler against the corresponding MCP tool's inputSchema.
+// Sample sidecar fields (e.g. `patch`) are stripped before validation. Returns
+// the first error encountered; iteration order is deterministic (sorted by
+// struct field name) so error messages are stable.
+func validateSafeOutputsSamples(config *SafeOutputsConfig) error {
+	if config == nil {
+		return nil
+	}
+
+	fieldNames := make([]string, 0, len(safeOutputFieldMapping))
+	for fieldName := range safeOutputFieldMapping {
+		fieldNames = append(fieldNames, fieldName)
+	}
+	sort.Strings(fieldNames)
+
+	for _, fieldName := range fieldNames {
+		toolName := safeOutputFieldMapping[fieldName]
+		base := extractBaseSafeOutputConfig(config, fieldName)
+		if base == nil || len(base.Samples) == 0 {
+			continue
+		}
+		if err := validateSamplesForTool(toolName, base.Samples); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// extractBaseSafeOutputConfig returns the embedded BaseSafeOutputConfig of the
+// non-nil safe-output config at SafeOutputsConfig.<fieldName>, or nil if the
+// field is unset or the struct does not embed BaseSafeOutputConfig.
+func extractBaseSafeOutputConfig(config *SafeOutputsConfig, fieldName string) *BaseSafeOutputConfig {
+	field, ok := safeOutputPointerFieldValue(config, fieldName)
+	if !ok || field.IsNil() {
+		return nil
+	}
+	elem := field.Elem()
+	if elem.Kind() != reflect.Struct {
+		return nil
+	}
+	baseField := elem.FieldByName("BaseSafeOutputConfig")
+	if !baseField.IsValid() || !baseField.CanAddr() {
+		return nil
+	}
+	if base, ok := baseField.Addr().Interface().(*BaseSafeOutputConfig); ok {
+		return base
+	}
+	return nil
+}
+
+// validateSamplesForTool validates each sample against the named MCP tool's
+// inputSchema after stripping recognized sidecar fields.
+func validateSamplesForTool(toolName string, samples []map[string]any) error {
+	schemas, err := getCompiledToolSchemas()
+	if err != nil {
+		return err
+	}
+	schema, found := schemas[toolName]
+	if !found {
+		return fmt.Errorf("samples: no MCP tool schema found for %q (yaml key %q). Available tools come from pkg/workflow/js/safe_outputs_tools.json", toolName, toolDisplayKey(toolName))
+	}
+	displayKey := toolDisplayKey(toolName)
+	sidecars := sampleSidecarFields[toolName]
+	for i, sample := range samples {
+		stripped := stripSidecarFields(sample, sidecars)
+		if err := schema.Validate(stripped); err != nil {
+			return fmt.Errorf("safe-outputs.%s.samples[%d]: %w", displayKey, i, err)
+		}
+	}
+	return nil
+}
+
+// stripSidecarFields returns a shallow copy of sample with sidecar keys removed.
+// The original map is not modified.
+func stripSidecarFields(sample map[string]any, sidecars map[string]bool) map[string]any {
+	if len(sidecars) == 0 {
+		return sample
+	}
+	out := make(map[string]any, len(sample))
+	for k, v := range sample {
+		if sidecars[k] {
+			continue
+		}
+		out[k] = v
+	}
+	return out
+}
+
+// toolDisplayKey converts a snake_case MCP tool name into the hyphenated YAML
+// frontmatter key (e.g. "create_pull_request" -> "create-pull-request").
+func toolDisplayKey(toolName string) string {
+	return strings.ReplaceAll(toolName, "_", "-")
+}
diff --git a/pkg/workflow/samples_validation_test.go b/pkg/workflow/samples_validation_test.go
new file mode 100644
index 00000000000..394d35d665d
--- /dev/null
+++ b/pkg/workflow/samples_validation_test.go
@@ -0,0 +1,170 @@
+package workflow
+
+import (
+	"strings"
+	"testing"
+)
+
+// TestValidateSafeOutputsSamples_Valid covers the happy path for the
+// strict schema validation of samples entries. We use create_issue (no
+// sidecars, just title/body) and create_pull_request (with the `patch` sidecar
+// that must be stripped before validation).
+func TestValidateSafeOutputsSamples_Valid(t *testing.T) {
+	cfg := &SafeOutputsConfig{
+		CreateIssues: &CreateIssuesConfig{
+			BaseSafeOutputConfig: BaseSafeOutputConfig{
+				Samples: []map[string]any{
+					{
+						"title": "Sample issue",
+						"body":  "Sample body",
+					},
+				},
+			},
+		},
+		CreatePullRequests: &CreatePullRequestsConfig{
+			BaseSafeOutputConfig: BaseSafeOutputConfig{
+				Samples: []map[string]any{
+					{
+						"title":  "Sample PR",
+						"body":   "Sample PR body",
+						"branch": "gh-aw-sample-pr",
+						// patch is a sidecar — must be stripped before validation
+						// and must NOT cause an `additionalProperties` failure.
+						"patch": "diff --git a/foo b/foo\nnew file mode 100644\n--- /dev/null\n+++ b/foo\n@@ -0,0 +1 @@\n+hi\n",
+					},
+				},
+			},
+		},
+	}
+	if err := validateSafeOutputsSamples(cfg); err != nil {
+		t.Fatalf("expected no validation error, got: %v", err)
+	}
+}
+
+// TestValidateSafeOutputsSamples_MissingRequired verifies that omitting a
+// required field (title) surfaces a stable, parseable error.
+func TestValidateSafeOutputsSamples_MissingRequired(t *testing.T) {
+	cfg := &SafeOutputsConfig{
+		CreateIssues: &CreateIssuesConfig{
+			BaseSafeOutputConfig: BaseSafeOutputConfig{
+				Samples: []map[string]any{
+					{
+						// title intentionally missing
+						"body": "Body without title",
+					},
+				},
+			},
+		},
+	}
+	err := validateSafeOutputsSamples(cfg)
+	if err == nil {
+		t.Fatal("expected validation error for missing title, got nil")
+	}
+	msg := err.Error()
+	if !strings.Contains(msg, "create-issue") {
+		t.Errorf("expected error to reference the YAML key `create-issue`, got: %s", msg)
+	}
+	if !strings.Contains(msg, "samples[0]") {
+		t.Errorf("expected error to reference `samples[0]`, got: %s", msg)
+	}
+}
+
+// TestValidateSafeOutputsSamples_SidecarStripped verifies that the `patch`
+// sidecar is stripped before validation, so a create_pull_request sample with
+// only the schema-required fields PLUS a patch validates cleanly.
+func TestValidateSafeOutputsSamples_SidecarStripped(t *testing.T) {
+	cfg := &SafeOutputsConfig{
+		CreatePullRequests: &CreatePullRequestsConfig{
+			BaseSafeOutputConfig: BaseSafeOutputConfig{
+				Samples: []map[string]any{
+					{
+						"title":  "PR",
+						"body":   "PR body",
+						"branch": "gh-aw-x",
+						"patch":  "diff --git a/x b/x\n",
+					},
+				},
+			},
+		},
+	}
+	if err := validateSafeOutputsSamples(cfg); err != nil {
+		t.Fatalf("expected sidecar to be stripped and validation to pass, got: %v", err)
+	}
+}
+
+// TestCollectSampleEntries_DeterministicOrdering verifies that entries are
+// emitted in a stable order across runs (sorted by SafeOutputsConfig field name)
+// so that compiled YAML is deterministic.
+func TestCollectSampleEntries_DeterministicOrdering(t *testing.T) {
+	cfg := &SafeOutputsConfig{
+		CreateIssues: &CreateIssuesConfig{
+			BaseSafeOutputConfig: BaseSafeOutputConfig{
+				Samples: []map[string]any{
+					{"title": "A", "body": "A"},
+				},
+			},
+		},
+		AddComments: &AddCommentsConfig{
+			BaseSafeOutputConfig: BaseSafeOutputConfig{
+				Samples: []map[string]any{
+					{"body": "comment-A"},
+				},
+			},
+		},
+	}
+	first := collectSampleEntries(cfg)
+	second := collectSampleEntries(cfg)
+
+	if len(first) != 2 {
+		t.Fatalf("expected 2 entries, got %d", len(first))
+	}
+	if first[0].Tool != second[0].Tool || first[1].Tool != second[1].Tool {
+		t.Errorf("expected deterministic ordering across runs, got first=%v second=%v", first, second)
+	}
+	// Sorted by struct field name: AddComments < CreateIssues.
+	if first[0].Tool != "add_comment" {
+		t.Errorf("expected first entry tool to be add_comment (alphabetical struct field order), got %q", first[0].Tool)
+	}
+	if first[1].Tool != "create_issue" {
+		t.Errorf("expected second entry tool to be create_issue, got %q", first[1].Tool)
+	}
+}
+
+// TestCollectSampleEntries_SidecarPartitioning verifies that sidecar fields
+// land in Sidecars (not Arguments) so the driver knows what to pre-stage.
+func TestCollectSampleEntries_SidecarPartitioning(t *testing.T) {
+	cfg := &SafeOutputsConfig{
+		CreatePullRequests: &CreatePullRequestsConfig{
+			BaseSafeOutputConfig: BaseSafeOutputConfig{
+				Samples: []map[string]any{
+					{
+						"title":  "PR",
+						"body":   "Body",
+						"branch": "br",
+						"patch":  "diff --git a/x b/x\n",
+					},
+				},
+			},
+		},
+	}
+	entries := collectSampleEntries(cfg)
+	if len(entries) != 1 {
+		t.Fatalf("expected 1 entry, got %d", len(entries))
+	}
+	e := entries[0]
+	if e.Tool != "create_pull_request" {
+		t.Errorf("expected tool create_pull_request, got %q", e.Tool)
+	}
+	if _, hasPatchInArgs := e.Arguments["patch"]; hasPatchInArgs {
+		t.Error("expected patch to be stripped from Arguments")
+	}
+	if e.Arguments["title"] != "PR" || e.Arguments["body"] != "Body" || e.Arguments["branch"] != "br" {
+		t.Errorf("expected title/body/branch to remain in Arguments, got %#v", e.Arguments)
+	}
+	if e.Sidecars == nil {
+		t.Fatal("expected Sidecars to be non-nil")
+	}
+	if patch, ok := e.Sidecars["patch"].(string); !ok || !strings.HasPrefix(patch, "diff --git") {
+		t.Errorf("expected patch to be present in Sidecars as a git diff string, got %#v", e.Sidecars["patch"])
+	}
+}
diff --git a/pkg/workflow/workflow_builder.go b/pkg/workflow/workflow_builder.go
index 762de84d3ea..2a87ff848a3 100644
--- a/pkg/workflow/workflow_builder.go
+++ b/pkg/workflow/workflow_builder.go
@@ -67,6 +67,7 @@ func (c *Compiler) buildInitialWorkflowData(
 		ToolsStartupTimeout:     toolsResult.toolsStartupTimeout,
 		TrialMode:               c.trialMode,
 		TrialLogicalRepo:        c.trialLogicalRepoSlug,
+		UseSamples:              c.useSamples,
 		StrictMode:              c.strictMode,
 		AllowActionRefs:         c.allowActionRefs,
 		ValidateAWFConfig:       !c.skipValidation,

From 310d9b8aaa34448a269310e145285bbdb8c0e0b8 Mon Sep 17 00:00:00 2001
From: Don Syme <dsyme@github.com>
Date: Sat, 6 Jun 2026 17:43:18 +0100
Subject: [PATCH 2/8] feat(compile): force-disable threat-detection under
 --use-samples
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The deterministic samples replay driver emits synthetic safe-outputs
purely to exercise downstream handlers in end-to-end tests. Running
the LLM-backed threat-detection job against those fabricated payloads
defeats determinism, costs tokens, and can spuriously flag the test
fixtures.

When --use-samples is set, extractSafeOutputsConfig now nils out
SafeOutputsConfig.ThreatDetection unconditionally — overriding both
the implicit default and any explicit threat-detection: true. The
override is logged.

Tests:
- new TestExtractSafeOutputsConfig_UseSamplesDisablesThreatDetection
  covers default mode (detection enabled), --use-samples + default
  (disabled), and --use-samples + explicit true (still disabled)
- TestUseSamplesReplacesAgentStep additionally asserts no detection:
  job appears in the compiled lock file
---
 pkg/workflow/safe_outputs_config.go           |  8 +++
 pkg/workflow/samples_replay_test.go           |  5 ++
 pkg/workflow/samples_threat_detection_test.go | 63 +++++++++++++++++++
 3 files changed, 76 insertions(+)
 create mode 100644 pkg/workflow/samples_threat_detection_test.go

diff --git a/pkg/workflow/safe_outputs_config.go b/pkg/workflow/safe_outputs_config.go
index e3ad6fb024e..1a7541c8eef 100644
--- a/pkg/workflow/safe_outputs_config.go
+++ b/pkg/workflow/safe_outputs_config.go
@@ -694,6 +694,14 @@ func (c *Compiler) extractSafeOutputsConfig(frontmatter map[string]any) *SafeOut
 		}
 	}
 
+	// Force-disable threat detection when --use-samples is active: the replay driver
+	// emits synthetic outputs solely for deterministic end-to-end tests, and running
+	// an LLM-backed detection pass would defeat that determinism.
+	if config != nil && c.useSamples && config.ThreatDetection != nil {
+		safeOutputsConfigLog.Print("Disabling threat-detection because --use-samples is set")
+		config.ThreatDetection = nil
+	}
+
 	if config != nil {
 		safeOutputsConfigLog.Print("Successfully extracted safe-outputs configuration")
 	} else {
diff --git a/pkg/workflow/samples_replay_test.go b/pkg/workflow/samples_replay_test.go
index 37096b20b00..b603ce1fd99 100644
--- a/pkg/workflow/samples_replay_test.go
+++ b/pkg/workflow/samples_replay_test.go
@@ -94,5 +94,10 @@ Trivial workflow whose only job is to be compiled with --use-samples.
 		if !strings.Contains(lockContent, "id: agentic_execution") {
 			t.Error("Expected id: agentic_execution on the replay step")
 		}
+		// Threat detection must be force-disabled under --use-samples so the
+		// deterministic replay isn't perturbed by an LLM-backed detection job.
+		if strings.Contains(lockContent, "\n  detection:\n") {
+			t.Error("Expected no `detection:` job under --use-samples")
+		}
 	})
 }
diff --git a/pkg/workflow/samples_threat_detection_test.go b/pkg/workflow/samples_threat_detection_test.go
new file mode 100644
index 00000000000..ba7c082d2be
--- /dev/null
+++ b/pkg/workflow/samples_threat_detection_test.go
@@ -0,0 +1,63 @@
+package workflow
+
+import "testing"
+
+// TestExtractSafeOutputsConfig_UseSamplesDisablesThreatDetection verifies
+// that --use-samples force-disables threat detection so the deterministic
+// replay isn't perturbed by an LLM-backed detection job.
+func TestExtractSafeOutputsConfig_UseSamplesDisablesThreatDetection(t *testing.T) {
+	frontmatter := map[string]any{
+		"safe-outputs": map[string]any{
+			"create-issue": map[string]any{
+				"samples": []any{
+					map[string]any{"title": "x", "body": "y"},
+				},
+			},
+		},
+	}
+
+	t.Run("default mode applies threat-detection", func(t *testing.T) {
+		c := NewCompiler()
+		cfg := c.extractSafeOutputsConfig(frontmatter)
+		if cfg == nil {
+			t.Fatal("expected non-nil SafeOutputsConfig")
+		}
+		if cfg.ThreatDetection == nil {
+			t.Fatal("expected default threat-detection to be applied in default mode")
+		}
+	})
+
+	t.Run("use-samples disables threat-detection (default)", func(t *testing.T) {
+		c := NewCompiler()
+		c.SetUseSamples(true)
+		cfg := c.extractSafeOutputsConfig(frontmatter)
+		if cfg == nil {
+			t.Fatal("expected non-nil SafeOutputsConfig")
+		}
+		if cfg.ThreatDetection != nil {
+			t.Fatal("expected threat-detection to be force-disabled under --use-samples")
+		}
+	})
+
+	t.Run("use-samples disables threat-detection (explicit true)", func(t *testing.T) {
+		fm := map[string]any{
+			"safe-outputs": map[string]any{
+				"threat-detection": true,
+				"create-issue": map[string]any{
+					"samples": []any{
+						map[string]any{"title": "x", "body": "y"},
+					},
+				},
+			},
+		}
+		c := NewCompiler()
+		c.SetUseSamples(true)
+		cfg := c.extractSafeOutputsConfig(fm)
+		if cfg == nil {
+			t.Fatal("expected non-nil SafeOutputsConfig")
+		}
+		if cfg.ThreatDetection != nil {
+			t.Fatal("expected explicit threat-detection: true to be force-disabled under --use-samples")
+		}
+	})
+}

From 9284a627c7d97f47933bab183c182d190e7e215e Mon Sep 17 00:00:00 2001
From: Don Syme <dsyme@github.com>
Date: Sat, 6 Jun 2026 18:01:53 +0100
Subject: [PATCH 3/8] test(samples): cover preStagePatch end-to-end for
 create_pull_request
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds three vitest specs that drive the apply_samples driver's
preStagePatch path against a real, throwaway git working tree:

1. create_pull_request with a 'patch' sidecar checks out the
   requested branch, applies the diff, and commits it — and the
   resulting diff is visible via 'git diff main...<branch>', which
   is precisely what the downstream MCP create_pull_request handler
   reads when generating its bundle/patch payload.

2. push_to_pull_request_branch without an explicit 'branch' falls
   back to 'gh-aw-sample-<i+1>' and still applies the patch.

3. preStagePatch is a no-op when called with a tool that has no
   patch sidecar (defense in depth around the PATCH_SIDECAR_TOOLS
   gate in main()).

Together with the existing Go unit tests for sidecar partitioning
and schema-stripping, this closes the testing gap around the
patch-sidecar flow that was previously only covered structurally.
---
 actions/setup/js/apply_samples.test.cjs | 143 ++++++++++++++++++++++++
 1 file changed, 143 insertions(+)

diff --git a/actions/setup/js/apply_samples.test.cjs b/actions/setup/js/apply_samples.test.cjs
index a2210c54761..1546b8435de 100644
--- a/actions/setup/js/apply_samples.test.cjs
+++ b/actions/setup/js/apply_samples.test.cjs
@@ -14,6 +14,7 @@
 
 import { describe, it, expect, beforeAll } from "vitest";
 import { spawnSync } from "child_process";
+import { createRequire } from "module";
 import fs from "fs";
 import path from "path";
 import os from "os";
@@ -23,11 +24,29 @@ const __filename = fileURLToPath(import.meta.url);
 const __dirname = path.dirname(__filename);
 
 const driverPath = path.join(__dirname, "apply_samples.cjs");
+const require = createRequire(import.meta.url);
 
 function makeTempDir(prefix) {
   return fs.mkdtempSync(path.join(os.tmpdir(), prefix));
 }
 
+function git(args, cwd) {
+  const r = spawnSync("git", args, { cwd, encoding: "utf8" });
+  if (r.status !== 0) {
+    throw new Error(`git ${args.join(" ")} failed: ${r.stderr || r.stdout}`);
+  }
+  return r.stdout;
+}
+
+function initRepo(dir, defaultBranch) {
+  git(["init", "-q", "-b", defaultBranch], dir);
+  git(["config", "user.email", "ghaw-test@example.com"], dir);
+  git(["config", "user.name", "ghaw test"], dir);
+  fs.writeFileSync(path.join(dir, "README.md"), "# seed\n");
+  git(["add", "."], dir);
+  git(["commit", "-q", "-m", "seed"], dir);
+}
+
 describe.sequential("apply_samples.cjs", () => {
   let tempDir;
   let configPath;
@@ -113,3 +132,127 @@ describe.sequential("apply_samples.cjs", () => {
     expect(logText).toContain("terminal_reason");
   });
 });
+
+describe("apply_samples.cjs preStagePatch (create_pull_request / push_to_pull_request_branch)", () => {
+  // Load the module under test directly so we can drive preStagePatch in
+  // isolation against a real, throwaway git working tree. This is the
+  // critical code path that turns a `patch` sidecar on a sample entry into
+  // a real branch + commit that the downstream MCP `create_pull_request`
+  // handler (which derives a git diff) can act on.
+  const { preStagePatch } = require("./apply_samples.cjs");
+
+  /**
+   * Build a unified diff that adds a brand-new file. Synthetic but realistic.
+   */
+  function newFileDiff(filePath, contents) {
+    const lines = contents.split("\n");
+    // Strip trailing empty element produced by a terminating "\n" so the
+    // hunk header line count matches what git apply expects.
+    if (lines[lines.length - 1] === "") lines.pop();
+    const body = lines.map(l => "+" + l).join("\n");
+    return `diff --git a/${filePath} b/${filePath}\n` + `new file mode 100644\n` + `index 0000000..1111111\n` + `--- /dev/null\n` + `+++ b/${filePath}\n` + `@@ -0,0 +1,${lines.length} @@\n` + body + "\n";
+  }
+
+  it("checks out the requested branch and commits the patch on it (create_pull_request)", () => {
+    const workspace = makeTempDir("gh-aw-prestage-cpr-");
+    initRepo(workspace, "main");
+
+    const branchName = "feat/gh-aw-sample-branch";
+    const fileToAdd = "sample-feature.txt";
+    const fileBody = "hello from a deterministic sample\nsecond line\n";
+    const entry = {
+      tool: "create_pull_request",
+      arguments: {
+        title: "Sample PR",
+        body: "Sample PR body",
+        branch: branchName,
+      },
+      sidecars: { patch: newFileDiff(fileToAdd, fileBody) },
+    };
+
+    // GH_AW_CUSTOM_BASE_BRANCH steers preStagePatch to check out the right
+    // base ref inside our fresh repo (default is GITHUB_BASE_REF / "main").
+    const prev = process.env.GH_AW_CUSTOM_BASE_BRANCH;
+    process.env.GH_AW_CUSTOM_BASE_BRANCH = "main";
+    try {
+      preStagePatch(entry, 0, workspace);
+    } finally {
+      if (prev === undefined) delete process.env.GH_AW_CUSTOM_BASE_BRANCH;
+      else process.env.GH_AW_CUSTOM_BASE_BRANCH = prev;
+    }
+
+    // 1. Branch name on the entry is preserved (driver must forward it to MCP).
+    expect(entry.arguments.branch).toBe(branchName);
+
+    // 2. The named branch exists in the working repo.
+    const branches = git(["branch", "--list", branchName], workspace).trim();
+    expect(branches).toContain(branchName);
+
+    // 3. Current HEAD is that branch.
+    const head = git(["rev-parse", "--abbrev-ref", "HEAD"], workspace).trim();
+    expect(head).toBe(branchName);
+
+    // 4. The patch was applied AND committed (not just sitting in the worktree).
+    const status = git(["status", "--porcelain"], workspace).trim();
+    expect(status).toBe("");
+    expect(fs.existsSync(path.join(workspace, fileToAdd))).toBe(true);
+    expect(fs.readFileSync(path.join(workspace, fileToAdd), "utf8")).toBe(fileBody);
+
+    // 5. The commit message identifies the sample so failures are diagnosable.
+    const lastMsg = git(["log", "-1", "--pretty=%s"], workspace).trim();
+    expect(lastMsg).toMatch(/gh-aw sample 1: create_pull_request/);
+
+    // 6. The new file shows up as a real diff against the base branch — this is
+    // precisely what the downstream MCP create_pull_request handler will read.
+    const diff = git(["diff", "main..." + branchName, "--", fileToAdd], workspace);
+    expect(diff).toContain("+hello from a deterministic sample");
+  });
+
+  it("defaults the branch name to gh-aw-sample-<i+1> when none is supplied", () => {
+    const workspace = makeTempDir("gh-aw-prestage-default-");
+    initRepo(workspace, "main");
+
+    const entry = {
+      tool: "push_to_pull_request_branch",
+      arguments: {
+        body: "Sample push body",
+        // branch intentionally omitted — driver should synthesize one.
+      },
+      sidecars: { patch: newFileDiff("push-feature.txt", "from push sample\n") },
+    };
+
+    const prev = process.env.GH_AW_CUSTOM_BASE_BRANCH;
+    process.env.GH_AW_CUSTOM_BASE_BRANCH = "main";
+    try {
+      preStagePatch(entry, 2, workspace);
+    } finally {
+      if (prev === undefined) delete process.env.GH_AW_CUSTOM_BASE_BRANCH;
+      else process.env.GH_AW_CUSTOM_BASE_BRANCH = prev;
+    }
+
+    // Index in preStagePatch is zero-based; the default uses i+1 → "gh-aw-sample-3".
+    expect(entry.arguments.branch).toBe("gh-aw-sample-3");
+    const head = git(["rev-parse", "--abbrev-ref", "HEAD"], workspace).trim();
+    expect(head).toBe("gh-aw-sample-3");
+    expect(fs.existsSync(path.join(workspace, "push-feature.txt"))).toBe(true);
+  });
+
+  it("is a no-op when the sample tool isn't in the patch-sidecar set", () => {
+    // We assert this at the driver level (PATCH_SIDECAR_TOOLS gate in main()),
+    // but preStagePatch itself should also be a no-op when called with an
+    // entry that has no patch sidecar — protecting against misuse.
+    const workspace = makeTempDir("gh-aw-prestage-noop-");
+    initRepo(workspace, "main");
+
+    const entry = {
+      tool: "create_issue",
+      arguments: { title: "x", body: "y" },
+    };
+    preStagePatch(entry, 0, workspace);
+
+    // Still on main, no extra commits, no new files.
+    expect(git(["rev-parse", "--abbrev-ref", "HEAD"], workspace).trim()).toBe("main");
+    const log = git(["log", "--pretty=%s"], workspace).trim().split("\n");
+    expect(log).toEqual(["seed"]);
+  });
+});

From e262eb36292310d14763acbf51fb7b18b9baf774 Mon Sep 17 00:00:00 2001
From: Don Syme <dsyme@github.com>
Date: Sat, 6 Jun 2026 18:04:24 +0100
Subject: [PATCH 4/8] test(samples): e2e smoke test for create-pull-request +
 patch sidecar
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Compiles a workflow whose only safe-output is `create-pull-request`
with a samples entry carrying a multi-line `patch:` block scalar,
then inspects the generated lock.yml. Extracts the GH_AW_SAMPLES
JSON literal block out of the compiled YAML and asserts:

- the agentic step is replaced by the replay step
- the entry tool is "create_pull_request"
- the patch is partitioned into sidecars, NOT arguments — the MCP
  create_pull_request handler must not receive a literal patch
  argument; it derives the diff from the working tree
- title/body/branch are preserved in arguments
- the patch payload (including the diff header and the added line)
  survives YAML emission verbatim so the driver can git-apply it
- no detection: job is emitted

This closes the loop from frontmatter -> compiled YAML for the
patch-sidecar flow, complementing the existing Go unit tests
(sidecar partitioning) and the vitest preStagePatch specs (which
exercise the runtime side against a real git repo).
---
 pkg/workflow/samples_replay_test.go | 173 ++++++++++++++++++++++++++++
 1 file changed, 173 insertions(+)

diff --git a/pkg/workflow/samples_replay_test.go b/pkg/workflow/samples_replay_test.go
index b603ce1fd99..609fd58be45 100644
--- a/pkg/workflow/samples_replay_test.go
+++ b/pkg/workflow/samples_replay_test.go
@@ -3,6 +3,7 @@
 package workflow
 
 import (
+	"encoding/json"
 	"os"
 	"strings"
 	"testing"
@@ -101,3 +102,175 @@ Trivial workflow whose only job is to be compiled with --use-samples.
 		}
 	})
 }
+
+// TestUseSamplesCreatePullRequestWithPatch is the end-to-end smoke test for
+// the create-pull-request + patch sidecar flow. It compiles a workflow whose
+// only safe-output is `create-pull-request` with a `samples` entry carrying
+// a `patch` sidecar, then inspects the generated lock.yml to verify that:
+//
+//  1. The agentic step is replaced by the deterministic replay step
+//  2. GH_AW_SAMPLES contains a JSON-encoded create_pull_request entry
+//  3. The patch is partitioned into `sidecars`, NOT into `arguments`
+//     (the MCP server's create_pull_request handler must NOT receive `patch`
+//     as a tool argument — it derives the diff from the working tree)
+//  4. The branch name and other PR fields land in `arguments`
+//  5. The actual diff payload is preserved verbatim in the lock file
+//     (so the driver can `git apply` it at replay time)
+//  6. No `detection:` job is emitted
+func TestUseSamplesCreatePullRequestWithPatch(t *testing.T) {
+	const patch = "diff --git a/sample.txt b/sample.txt\nnew file mode 100644\nindex 0000000..1111111\n--- /dev/null\n+++ b/sample.txt\n@@ -0,0 +1 @@\n+hello from gh-aw samples\n"
+
+	md := `---
+on:
+  workflow_dispatch:
+permissions: read-all
+engine:
+  id: claude
+safe-outputs:
+  create-pull-request:
+    samples:
+      - title: "Sample PR from gh-aw"
+        body: "PR body emitted by samples replay."
+        branch: "feat/gh-aw-sample-pr"
+        patch: |
+` + indentBlock(patch, "          ") + `---
+
+Trivial workflow exercising create-pull-request via --use-samples.
+`
+
+	tmpFile, err := os.CreateTemp("", "use-samples-cpr-*.md")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.Remove(tmpFile.Name())
+	if _, err := tmpFile.WriteString(md); err != nil {
+		t.Fatal(err)
+	}
+	tmpFile.Close()
+
+	compiler := NewCompiler()
+	compiler.SetUseSamples(true)
+	if err := compiler.CompileWorkflow(tmpFile.Name()); err != nil {
+		t.Fatalf("compile failed: %v", err)
+	}
+	lockPath := strings.TrimSuffix(tmpFile.Name(), ".md") + ".lock.yml"
+	defer os.Remove(lockPath)
+	b, err := os.ReadFile(lockPath)
+	if err != nil {
+		t.Fatalf("read lock: %v", err)
+	}
+	lock := string(b)
+
+	// 1. Agentic step replaced
+	if !strings.Contains(lock, "Replay safe-outputs samples (deterministic)") {
+		t.Error("Expected `Replay safe-outputs samples (deterministic)` step in lock file")
+	}
+	if !strings.Contains(lock, "apply_samples.cjs") {
+		t.Error("Expected lock file to invoke apply_samples.cjs driver")
+	}
+
+	// 2. GH_AW_SAMPLES contains a create_pull_request entry
+	if !strings.Contains(lock, "GH_AW_SAMPLES:") {
+		t.Fatal("Expected GH_AW_SAMPLES env var in lock file")
+	}
+	if !strings.Contains(lock, `"tool":"create_pull_request"`) {
+		t.Error("Expected JSON-encoded create_pull_request tool entry in lock file")
+	}
+
+	// Extract the GH_AW_SAMPLES JSON block from the YAML for structural assertions.
+	samplesJSON := extractGHAWSamplesJSON(t, lock)
+	var entries []map[string]any
+	if err := json.Unmarshal([]byte(samplesJSON), &entries); err != nil {
+		t.Fatalf("failed to parse GH_AW_SAMPLES JSON: %v\nRaw:\n%s", err, samplesJSON)
+	}
+	if len(entries) != 1 {
+		t.Fatalf("expected exactly one sample entry, got %d", len(entries))
+	}
+	entry := entries[0]
+
+	// 3. Patch is in sidecars, NOT in arguments
+	args, _ := entry["arguments"].(map[string]any)
+	sidecars, _ := entry["sidecars"].(map[string]any)
+	if args == nil {
+		t.Fatal("expected entry.arguments to be an object")
+	}
+	if _, hasPatchInArgs := args["patch"]; hasPatchInArgs {
+		t.Error("patch must be stripped from arguments — MCP create_pull_request handler must not receive it")
+	}
+	if sidecars == nil {
+		t.Fatal("expected entry.sidecars to be present (patch should land here)")
+	}
+	gotPatch, _ := sidecars["patch"].(string)
+	if gotPatch == "" {
+		t.Fatal("expected sidecars.patch to be a non-empty string")
+	}
+
+	// 4. PR fields preserved in arguments
+	if args["title"] != "Sample PR from gh-aw" {
+		t.Errorf("arguments.title = %q, want %q", args["title"], "Sample PR from gh-aw")
+	}
+	if args["body"] != "PR body emitted by samples replay." {
+		t.Errorf("arguments.body = %q, want %q", args["body"], "PR body emitted by samples replay.")
+	}
+	if args["branch"] != "feat/gh-aw-sample-pr" {
+		t.Errorf("arguments.branch = %q, want %q", args["branch"], "feat/gh-aw-sample-pr")
+	}
+
+	// 5. Patch payload preserved verbatim
+	if !strings.Contains(gotPatch, "diff --git a/sample.txt b/sample.txt") {
+		t.Errorf("sidecars.patch missing diff header; got: %q", gotPatch)
+	}
+	if !strings.Contains(gotPatch, "+hello from gh-aw samples") {
+		t.Errorf("sidecars.patch missing payload line; got: %q", gotPatch)
+	}
+
+	// 6. No detection job
+	if strings.Contains(lock, "\n  detection:\n") {
+		t.Error("Expected no `detection:` job under --use-samples")
+	}
+}
+
+// indentBlock prefixes every line of s with prefix. Used to embed a multi-line
+// patch under a YAML block scalar in the test fixture.
+func indentBlock(s, prefix string) string {
+	lines := strings.Split(strings.TrimRight(s, "\n"), "\n")
+	for i, line := range lines {
+		lines[i] = prefix + line
+	}
+	return strings.Join(lines, "\n") + "\n"
+}
+
+// extractGHAWSamplesJSON pulls the literal block scalar value of GH_AW_SAMPLES
+// out of the compiled YAML and returns the unindented JSON text. This avoids
+// pulling in a full YAML parser for what is a tightly-controlled emit format.
+func extractGHAWSamplesJSON(t *testing.T, lock string) string {
+	t.Helper()
+	const marker = "GH_AW_SAMPLES: |\n"
+	start := strings.Index(lock, marker)
+	if start < 0 {
+		t.Fatalf("could not find %q in lock file", marker)
+	}
+	start += len(marker)
+	// Determine indentation from the first content line.
+	rest := lock[start:]
+	firstNL := strings.Index(rest, "\n")
+	if firstNL < 0 {
+		t.Fatal("malformed GH_AW_SAMPLES block: no newline after first line")
+	}
+	firstLine := rest[:firstNL]
+	indent := firstLine[:len(firstLine)-len(strings.TrimLeft(firstLine, " "))]
+	if indent == "" {
+		t.Fatal("malformed GH_AW_SAMPLES block: expected indented content")
+	}
+	// Collect lines until we hit one that no longer starts with the same indent
+	// (i.e. the next YAML key like GH_AW_AGENT_STDIO_LOG).
+	var out strings.Builder
+	for _, line := range strings.Split(rest, "\n") {
+		if !strings.HasPrefix(line, indent) {
+			break
+		}
+		out.WriteString(strings.TrimPrefix(line, indent))
+		out.WriteString("\n")
+	}
+	return strings.TrimSpace(out.String())
+}

From 5194f4bef2915b932ff63ab5e8a3b250760d2bd0 Mon Sep 17 00:00:00 2001
From: Don Syme <dsyme@github.com>
Date: Sat, 6 Jun 2026 19:42:17 +0100
Subject: [PATCH 5/8] fix(samples): emit `[]` not `null` when no samples
 configured

Observed in CI:
  Error: apply_samples: GH_AW_SAMPLES must be a JSON array
    at loadSamples (apply_samples.cjs:61:11)

Root cause: when a workflow opts into --use-samples but configures no
`samples:` entries (or only on disabled handlers), collectSampleEntries
returns a nil Go slice. json.Marshal(nil) produces the literal string
"null", which the driver rightly refuses to treat as an array.

Compiler fix (pkg/workflow/samples_replay.go): normalize a nil entries
slice to an empty []SampleEntry{} before marshaling so GH_AW_SAMPLES is
always emitted as a valid JSON array ("[]" in the empty case).

Driver defense (actions/setup/js/apply_samples.cjs): also tolerate a
literal JSON `null` payload and treat it as "no samples to replay",
so an older compiler against a newer driver doesn't crash either.

Tests:
- new Go integration test TestUseSamplesEmitsEmptyArrayWhenNoSamplesConfigured
  compiles a workflow that uses --use-samples with safe-outputs but no
  samples entries, then asserts GH_AW_SAMPLES is exactly "[]" (and
  emphatically not "null")
- new vitest spec verifies the driver exits 0 on GH_AW_SAMPLES="null"
  and logs "GH_AW_SAMPLES is null"
---
 actions/setup/js/apply_samples.cjs      |  6 +++
 actions/setup/js/apply_samples.test.cjs | 25 ++++++++++
 pkg/workflow/samples_replay.go          |  9 ++++
 pkg/workflow/samples_replay_test.go     | 62 +++++++++++++++++++++++++
 4 files changed, 102 insertions(+)

diff --git a/actions/setup/js/apply_samples.cjs b/actions/setup/js/apply_samples.cjs
index 73311b28c0e..ce54bf29e07 100644
--- a/actions/setup/js/apply_samples.cjs
+++ b/actions/setup/js/apply_samples.cjs
@@ -57,6 +57,12 @@ function loadSamples() {
   } catch (err) {
     throw new Error(`apply_samples: failed to parse GH_AW_SAMPLES as JSON: ${/** @type {Error} */ err.message}`);
   }
+  // Tolerate a literal JSON `null` payload (older compiler emitted it for
+  // workflows with --use-samples but no `samples:` entries). Treat as empty.
+  if (parsed === null) {
+    console.error("apply_samples: GH_AW_SAMPLES is null — treating as no samples to replay.");
+    return [];
+  }
   if (!Array.isArray(parsed)) {
     throw new Error("apply_samples: GH_AW_SAMPLES must be a JSON array");
   }
diff --git a/actions/setup/js/apply_samples.test.cjs b/actions/setup/js/apply_samples.test.cjs
index 1546b8435de..9b2963bdead 100644
--- a/actions/setup/js/apply_samples.test.cjs
+++ b/actions/setup/js/apply_samples.test.cjs
@@ -131,6 +131,31 @@ describe.sequential("apply_samples.cjs", () => {
     const logText = fs.readFileSync(path.join(tempDir, "empty-log.log"), "utf8");
     expect(logText).toContain("terminal_reason");
   });
+
+  // Defense in depth: an older compiler that marshaled a nil Go slice would
+  // emit `null` into GH_AW_SAMPLES. Newer drivers must tolerate that and
+  // treat it as "no samples", not crash with `must be a JSON array`.
+  it("exits cleanly when GH_AW_SAMPLES is the literal `null`", () => {
+    const logPath = path.join(tempDir, "null-log.log");
+    const result = spawnSync(process.execPath, [driverPath], {
+      env: {
+        ...process.env,
+        GH_AW_SAMPLES: "null",
+        GH_AW_SAFE_OUTPUTS_CONFIG_PATH: configPath,
+        GH_AW_SAFE_OUTPUTS: outputsPath,
+        GH_AW_AGENT_STDIO_LOG: logPath,
+      },
+      encoding: "utf8",
+      timeout: 10000,
+    });
+
+    if (result.status !== 0) {
+      throw new Error(`driver exited with status ${result.status}\nstderr:\n${result.stderr}\nstdout:\n${result.stdout}`);
+    }
+    expect(result.stderr).toContain("GH_AW_SAMPLES is null");
+    const logText = fs.readFileSync(logPath, "utf8");
+    expect(logText).toContain("terminal_reason");
+  });
 });
 
 describe("apply_samples.cjs preStagePatch (create_pull_request / push_to_pull_request_branch)", () => {
diff --git a/pkg/workflow/samples_replay.go b/pkg/workflow/samples_replay.go
index c0f49d804a8..fec6a477f6b 100644
--- a/pkg/workflow/samples_replay.go
+++ b/pkg/workflow/samples_replay.go
@@ -75,6 +75,15 @@ func (c *Compiler) generateSamplesReplayStep(yaml *strings.Builder, data *Workfl
 	entries := collectSampleEntries(data.SafeOutputs)
 	compilerYamlLog.Printf("Generating samples replay step: entries=%d", len(entries))
 
+	// Normalize a nil slice to an empty slice so json.Marshal emits "[]" not "null".
+	// The driver rejects anything that isn't a JSON array; emitting "null" here
+	// would crash the replay step with `GH_AW_SAMPLES must be a JSON array` for
+	// workflows that opt into --use-samples but configure no samples (or whose
+	// configured samples all live on disabled handlers).
+	if entries == nil {
+		entries = []SampleEntry{}
+	}
+
 	// Serialize entries to JSON for the driver. Always emit valid JSON even when
 	// empty so the driver can produce a clear `no samples configured` message
 	// rather than crashing on an empty env var.
diff --git a/pkg/workflow/samples_replay_test.go b/pkg/workflow/samples_replay_test.go
index 609fd58be45..df1ac109d3f 100644
--- a/pkg/workflow/samples_replay_test.go
+++ b/pkg/workflow/samples_replay_test.go
@@ -274,3 +274,65 @@ func extractGHAWSamplesJSON(t *testing.T, lock string) string {
 	}
 	return strings.TrimSpace(out.String())
 }
+
+// TestUseSamplesEmitsEmptyArrayWhenNoSamplesConfigured guards against a
+// regression where compiling with --use-samples but no `samples:` entries on
+// any enabled handler caused json.Marshal of a nil Go slice to emit the
+// literal string "null" into GH_AW_SAMPLES, which the driver rightly
+// rejected with `GH_AW_SAMPLES must be a JSON array`. The compiler must
+// emit "[]" instead so the driver can exit cleanly with `no samples to
+// replay`.
+func TestUseSamplesEmitsEmptyArrayWhenNoSamplesConfigured(t *testing.T) {
+	// Workflow opts into --use-samples and configures safe-outputs but has
+	// no `samples:` entries on the create-issue handler.
+	const md = `---
+on:
+  workflow_dispatch:
+permissions: read-all
+engine:
+  id: claude
+safe-outputs:
+  create-issue:
+    title-prefix: "[no-samples] "
+---
+
+Workflow with safe-outputs but no samples — should still compile and
+emit a valid empty-array GH_AW_SAMPLES under --use-samples.
+`
+
+	tmpFile, err := os.CreateTemp("", "use-samples-empty-*.md")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.Remove(tmpFile.Name())
+	if _, err := tmpFile.WriteString(md); err != nil {
+		t.Fatal(err)
+	}
+	tmpFile.Close()
+
+	compiler := NewCompiler()
+	compiler.SetUseSamples(true)
+	if err := compiler.CompileWorkflow(tmpFile.Name()); err != nil {
+		t.Fatalf("compile failed: %v", err)
+	}
+	lockPath := strings.TrimSuffix(tmpFile.Name(), ".md") + ".lock.yml"
+	defer os.Remove(lockPath)
+	b, err := os.ReadFile(lockPath)
+	if err != nil {
+		t.Fatalf("read lock: %v", err)
+	}
+	lock := string(b)
+
+	// Must still emit the replay step.
+	if !strings.Contains(lock, "Replay safe-outputs samples (deterministic)") {
+		t.Fatal("Expected replay step in lock file even with no samples configured")
+	}
+
+	samplesJSON := extractGHAWSamplesJSON(t, lock)
+	if samplesJSON == "null" {
+		t.Fatalf("GH_AW_SAMPLES must not be the literal `null` (driver would reject it); got %q", samplesJSON)
+	}
+	if samplesJSON != "[]" {
+		t.Fatalf("GH_AW_SAMPLES = %q, want %q", samplesJSON, "[]")
+	}
+}

From 9253ebe1011eb14aeae0b9ce1639dd25bc8882f7 Mon Sep 17 00:00:00 2001
From: Don Syme <dsyme@github.com>
Date: Sat, 6 Jun 2026 19:58:28 +0100
Subject: [PATCH 6/8] fix(samples): CI failures + review feedback

CI fixes:
- pkg/workflow/samples_replay.go: switch to strings.SplitSeq per
  the modernize linter (lint-go was failing)
- actions/setup/js/apply_samples.cjs: weaken the JSDoc type on
  sendJsonRpc's child parameter from ChildProcessWithoutNullStreams
  to ChildProcess so the value returned by spawn() with
  stdio: ["pipe", "pipe", "inherit"] (which has a null stderr)
  type-checks (js-typecheck was failing)

Review feedback (all Copilot inline comments):
- apply_samples.cjs: replace the /** @type {Error} */ casts on
  catch bindings with the shared getErrorMessage(err) helper so
  catch-unknown narrowing is actually safe under @ts-check
- samples_replay_test.go: stop swallowing the ReadFile error in
  the Use-Samples-Mode subtest; t.Fatalf on failure like the
  default-mode subtest does
- samples_validation.go: stripSidecarFields now always returns a
  fresh map, matching its doc comment (no more accidental aliasing
  of the caller's input when sidecars is empty)
- safe_outputs_config.go: drop the YAML-string branch of
  parseSamplesValue; the JSON schema for samples only allows
  array/object, so the string form would be rejected upstream
  before this code runs. Removes the now-unused yaml import.

The Copilot comment about collectSampleEntries emitting null was
addressed in the prior commit (5194f4bef2) which normalizes nil
to []SampleEntry{} before json.Marshal.
---
 actions/setup/js/apply_samples.cjs  |  7 ++++---
 pkg/workflow/safe_outputs_config.go | 24 +++++-------------------
 pkg/workflow/samples_replay.go      |  2 +-
 pkg/workflow/samples_replay_test.go |  5 ++++-
 pkg/workflow/samples_validation.go  |  6 ++----
 5 files changed, 16 insertions(+), 28 deletions(-)

diff --git a/actions/setup/js/apply_samples.cjs b/actions/setup/js/apply_samples.cjs
index ce54bf29e07..c5d3f6924b2 100644
--- a/actions/setup/js/apply_samples.cjs
+++ b/actions/setup/js/apply_samples.cjs
@@ -29,6 +29,7 @@ const { spawn } = require("child_process");
 const fs = require("fs");
 const path = require("path");
 const os = require("os");
+const { getErrorMessage } = require("./error_helpers.cjs");
 
 const DEFAULT_BASE_BRANCH = process.env.GH_AW_CUSTOM_BASE_BRANCH || process.env.GITHUB_BASE_REF || process.env.GITHUB_REF_NAME || "main";
 const PATCH_SIDECAR_TOOLS = new Set(["create_pull_request", "push_to_pull_request_branch"]);
@@ -55,7 +56,7 @@ function loadSamples() {
   try {
     parsed = JSON.parse(raw);
   } catch (err) {
-    throw new Error(`apply_samples: failed to parse GH_AW_SAMPLES as JSON: ${/** @type {Error} */ err.message}`);
+    throw new Error(`apply_samples: failed to parse GH_AW_SAMPLES as JSON: ${getErrorMessage(err)}`);
   }
   // Tolerate a literal JSON `null` payload (older compiler emitted it for
   // workflows with --use-samples but no `samples:` entries). Treat as empty.
@@ -131,7 +132,7 @@ function preStagePatch(entry, index, workspace) {
   try {
     runGit(["checkout", DEFAULT_BASE_BRANCH], workspace);
   } catch (err) {
-    console.error(`apply_samples: could not check out base branch ${DEFAULT_BASE_BRANCH}: ${/** @type {Error} */ err.message}; staying on current HEAD`);
+    console.error(`apply_samples: could not check out base branch ${DEFAULT_BASE_BRANCH}: ${getErrorMessage(err)}; staying on current HEAD`);
   }
 
   // Create the branch (or check it out if it already exists from a previous sample).
@@ -159,7 +160,7 @@ function preStagePatch(entry, index, workspace) {
 /**
  * Send a single JSON-RPC request to the MCP server child process and resolve
  * with the parsed JSON response (or reject on timeout).
- * @param {import("child_process").ChildProcessWithoutNullStreams} child
+ * @param {import("child_process").ChildProcess} child
  * @param {NodeJS.WritableStream} stdin
  * @param {object} request
  * @param {AsyncIterableIterator<string>} responseIterator
diff --git a/pkg/workflow/safe_outputs_config.go b/pkg/workflow/safe_outputs_config.go
index 1a7541c8eef..3899b7c28e2 100644
--- a/pkg/workflow/safe_outputs_config.go
+++ b/pkg/workflow/safe_outputs_config.go
@@ -9,7 +9,6 @@ import (
 	"github.com/github/gh-aw/pkg/logger"
 	"github.com/github/gh-aw/pkg/sliceutil"
 	"github.com/github/gh-aw/pkg/typeutil"
-	"go.yaml.in/yaml/v3"
 )
 
 var safeOutputsConfigLog = logger.New("workflow:safe_outputs_config")
@@ -767,8 +766,8 @@ func (c *Compiler) parseBaseSafeOutputConfig(configMap map[string]any, config *B
 
 	// Parse samples list (hidden feature: deterministic replay samples for --use-samples).
 	// Accepts either a YAML list of objects, or a single object that is auto-wrapped
-	// into a one-element list, or a YAML string scalar containing a list (for
-	// authoring convenience with `|` block scalars in frontmatter).
+	// into a one-element list. The JSON schema rejects scalar/string shapes so we
+	// don't need a defensive YAML-string branch here.
 	if samples, exists := configMap["samples"]; exists {
 		parsed := parseSamplesValue(samples)
 		if len(parsed) > 0 {
@@ -779,14 +778,12 @@ func (c *Compiler) parseBaseSafeOutputConfig(configMap map[string]any, config *B
 }
 
 // parseSamplesValue normalizes a `samples` frontmatter value into a list of
-// objects. Accepted shapes (most-permissive first):
+// objects. Accepted shapes:
 //   - YAML list of mappings: returned as-is
 //   - single YAML mapping: wrapped into a one-element list
-//   - YAML string containing a list/mapping (authoring with `|` block scalar):
-//     parsed as YAML and re-normalized
 //
-// Any other shape returns an empty slice — schema validation will then report
-// "no samples found".
+// Any other shape returns an empty slice — schema validation rejects those
+// shapes upstream and we keep this parser strict to match.
 func parseSamplesValue(samples any) []map[string]any {
 	switch v := samples.(type) {
 	case []any:
@@ -805,17 +802,6 @@ func parseSamplesValue(samples any) []map[string]any {
 		return out
 	case map[string]any:
 		return []map[string]any{v}
-	case string:
-		trimmed := strings.TrimSpace(v)
-		if trimmed == "" {
-			return nil
-		}
-		var nested any
-		if err := yaml.Unmarshal([]byte(trimmed), &nested); err != nil {
-			safeOutputsConfigLog.Printf("Failed to parse samples string as YAML: %v", err)
-			return nil
-		}
-		return parseSamplesValue(nested)
 	default:
 		return nil
 	}
diff --git a/pkg/workflow/samples_replay.go b/pkg/workflow/samples_replay.go
index fec6a477f6b..28f16afe1c4 100644
--- a/pkg/workflow/samples_replay.go
+++ b/pkg/workflow/samples_replay.go
@@ -99,7 +99,7 @@ func (c *Compiler) generateSamplesReplayStep(yaml *strings.Builder, data *Workfl
 	yaml.WriteString("        id: agentic_execution\n")
 	yaml.WriteString("        env:\n")
 	yaml.WriteString("          GH_AW_SAMPLES: |\n")
-	for _, line := range strings.Split(string(payload), "\n") {
+	for line := range strings.SplitSeq(string(payload), "\n") {
 		fmt.Fprintf(yaml, "            %s\n", line)
 	}
 	fmt.Fprintf(yaml, "          GH_AW_AGENT_STDIO_LOG: %s\n", logFile)
diff --git a/pkg/workflow/samples_replay_test.go b/pkg/workflow/samples_replay_test.go
index df1ac109d3f..ac8232fd825 100644
--- a/pkg/workflow/samples_replay_test.go
+++ b/pkg/workflow/samples_replay_test.go
@@ -75,7 +75,10 @@ Trivial workflow whose only job is to be compiled with --use-samples.
 		}
 		lockPath := strings.TrimSuffix(tmpFile.Name(), ".md") + ".lock.yml"
 		defer os.Remove(lockPath)
-		b, _ := os.ReadFile(lockPath)
+		b, err := os.ReadFile(lockPath)
+		if err != nil {
+			t.Fatalf("read lock: %v", err)
+		}
 		lockContent := string(b)
 		if !strings.Contains(lockContent, "Replay safe-outputs samples (deterministic)") {
 			t.Error("Expected `Replay safe-outputs samples (deterministic)` step in lock file")
diff --git a/pkg/workflow/samples_validation.go b/pkg/workflow/samples_validation.go
index 79bfbc0f5f0..b04219aa23c 100644
--- a/pkg/workflow/samples_validation.go
+++ b/pkg/workflow/samples_validation.go
@@ -144,11 +144,9 @@ func validateSamplesForTool(toolName string, samples []map[string]any) error {
 }
 
 // stripSidecarFields returns a shallow copy of sample with sidecar keys removed.
-// The original map is not modified.
+// The original map is never modified, even when no sidecars are configured —
+// callers may mutate the returned map without affecting the caller's input.
 func stripSidecarFields(sample map[string]any, sidecars map[string]bool) map[string]any {
-	if len(sidecars) == 0 {
-		return sample
-	}
 	out := make(map[string]any, len(sample))
 	for k, v := range sample {
 		if sidecars[k] {

From a7798979d5e8f06a2e5beee9373ae57164afd552 Mon Sep 17 00:00:00 2001
From: Don Syme <dsyme@github.com>
Date: Sun, 7 Jun 2026 00:04:20 +0100
Subject: [PATCH 7/8] Address pelikhan review: require shim.cjs and use core.*
 in apply_samples driver

---
 actions/setup/js/apply_samples.cjs | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/actions/setup/js/apply_samples.cjs b/actions/setup/js/apply_samples.cjs
index c5d3f6924b2..0a3dc7844cf 100644
--- a/actions/setup/js/apply_samples.cjs
+++ b/actions/setup/js/apply_samples.cjs
@@ -1,5 +1,6 @@
 #!/usr/bin/env node
 // @ts-check
+/// <reference types="@actions/github-script" />
 
 // apply_samples.cjs
 //
@@ -25,6 +26,8 @@
 //   GITHUB_WORKSPACE          — git working directory for pre-staging (optional;
 //                               falls back to cwd)
 
+require("./shim.cjs");
+
 const { spawn } = require("child_process");
 const fs = require("fs");
 const path = require("path");
@@ -49,7 +52,7 @@ const PATCH_SIDECAR_TOOLS = new Set(["create_pull_request", "push_to_pull_reques
 function loadSamples() {
   const raw = process.env.GH_AW_SAMPLES;
   if (!raw || !raw.trim()) {
-    console.error("apply_samples: GH_AW_SAMPLES is empty — no samples to replay.");
+    core.warning("apply_samples: GH_AW_SAMPLES is empty — no samples to replay.");
     return [];
   }
   let parsed;
@@ -61,7 +64,7 @@ function loadSamples() {
   // Tolerate a literal JSON `null` payload (older compiler emitted it for
   // workflows with --use-samples but no `samples:` entries). Treat as empty.
   if (parsed === null) {
-    console.error("apply_samples: GH_AW_SAMPLES is null — treating as no samples to replay.");
+    core.warning("apply_samples: GH_AW_SAMPLES is null — treating as no samples to replay.");
     return [];
   }
   if (!Array.isArray(parsed)) {
@@ -132,7 +135,7 @@ function preStagePatch(entry, index, workspace) {
   try {
     runGit(["checkout", DEFAULT_BASE_BRANCH], workspace);
   } catch (err) {
-    console.error(`apply_samples: could not check out base branch ${DEFAULT_BASE_BRANCH}: ${getErrorMessage(err)}; staying on current HEAD`);
+    core.warning(`apply_samples: could not check out base branch ${DEFAULT_BASE_BRANCH}: ${getErrorMessage(err)}; staying on current HEAD`);
   }
 
   // Create the branch (or check it out if it already exists from a previous sample).
@@ -258,13 +261,13 @@ async function main() {
   });
 
   if (samples.length === 0) {
-    console.error("apply_samples: nothing to replay; exiting cleanly.");
+    core.info("apply_samples: nothing to replay; exiting cleanly.");
     writeSyntheticStdioLog(logPath, 0);
     return;
   }
 
   const serverPath = resolveMcpServerPath();
-  console.error(`apply_samples: spawning MCP server ${serverPath}`);
+  core.info(`apply_samples: spawning MCP server ${serverPath}`);
   const child = spawn(process.execPath, [serverPath], {
     stdio: ["pipe", "pipe", "inherit"],
     env: process.env,
@@ -317,7 +320,7 @@ async function main() {
         const text = result.content && result.content[0] && result.content[0].text;
         failures.push(`sample[${i}] (tool=${sample.tool}): ${text || JSON.stringify(result)}`);
       } else {
-        console.error(`apply_samples: sample[${i}] (tool=${sample.tool}) ok`);
+        core.info(`apply_samples: sample[${i}] (tool=${sample.tool}) ok`);
       }
     }
   } finally {
@@ -348,13 +351,12 @@ async function main() {
   if (failures.length > 0) {
     throw new Error(`apply_samples: ${failures.length} sample(s) failed:\n  - ${failures.join("\n  - ")}`);
   }
-  console.error(`apply_samples: ${samples.length} sample(s) replayed successfully.`);
+  core.info(`apply_samples: ${samples.length} sample(s) replayed successfully.`);
 }
 
 if (require.main === module) {
   main().catch(err => {
-    console.error(err && err.stack ? err.stack : String(err));
-    process.exit(1);
+    core.setFailed(err && err.stack ? err.stack : String(err));
   });
 }
 

From fcba545d94dcad3690df68d6ab0c87cbf0d3c80f Mon Sep 17 00:00:00 2001
From: Don Syme <dsyme@github.com>
Date: Sun, 7 Jun 2026 00:10:30 +0100
Subject: [PATCH 8/8] Load shim.cjs in safe_outputs_mcp_server so spawned child
 has global.core

When apply_samples.cjs spawns safe_outputs_mcp_server.cjs as a standalone
Node child process, handlers like create_pull_request.cjs that reference
core.info/warning/debug throw ReferenceError: core is not defined.
The shim is idempotent (guarded by 'if (!global.core)'), so loading it
unconditionally is safe when the module is required from a parent that
already initialized it.
---
 actions/setup/js/safe_outputs_mcp_server.cjs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/actions/setup/js/safe_outputs_mcp_server.cjs b/actions/setup/js/safe_outputs_mcp_server.cjs
index aca4f49cd93..3e1c2c54457 100644
--- a/actions/setup/js/safe_outputs_mcp_server.cjs
+++ b/actions/setup/js/safe_outputs_mcp_server.cjs
@@ -1,4 +1,5 @@
 // @ts-check
+/// <reference types="@actions/github-script" />
 
 // Safe Outputs MCP Server Module
 //
@@ -12,6 +13,11 @@
 //   const server = require("./safe_outputs_mcp_server.cjs");
 //   server.startSafeOutputsServer();
 
+// Load core/context shim so handlers that reference `core.*` (e.g.
+// create_pull_request.cjs) work when this file is spawned directly as a
+// child process (e.g. by apply_samples.cjs) outside the github-script runtime.
+require("./shim.cjs");
+
 const { createServer, registerTool, normalizeTool, start } = require("./mcp_server_core.cjs");
 const { createAppendFunction } = require("./safe_outputs_append.cjs");
 const { createHandlers } = require("./safe_outputs_handlers.cjs");