From 33a91e28852d5d74ad9dbc49c39d7797964ae40e Mon Sep 17 00:00:00 2001 From: Don Syme Date: Sat, 6 Jun 2026 17:31:42 +0100 Subject: [PATCH 1/8] feat(compile): add hidden --use-samples flag for deterministic safe-outputs replay Adds a hidden compile mode that replaces the agentic 'Execute coding agent' step with a deterministic driver that replays declarative `samples` entries through the real safe-outputs MCP server. Makes end-to-end tests deterministic without invoking any LLM. Frontmatter: safe-outputs: create-issue: samples: - title: "..." body: "..." Each entry conforms to the MCP tool inputSchema; recognized sidecar keys (`patch` for create-pull-request and push-to-pull-request-branch) are stripped before validation and consumed by the replay driver for branch + patch pre-staging. Hidden surface: - CLI flag `--use-samples` is hidden from `gh aw compile --help` - JSON schema description marks `samples` as 'Internal hidden feature' Implementation: - Static JSON Schema validation against safe_outputs_tools.json at compile time - Deterministic step ordering (sorted by SafeOutputsConfig struct field name) - New driver actions/setup/js/apply_samples.cjs spawns the real MCP server over stdio, sends one tools/call per sample, writes a synthetic terminal_reason: completed marker so handle_agent_failure recognizes success - Driver pre-stages git branches + patches for create_pull_request and push_to_pull_request_branch samples so the real handler can derive a diff Tests: - 5 unit tests covering validation, sidecar stripping, deterministic ordering, sidecar partitioning - 1 integration test verifying the agent step is replaced - 2 vitest specs driving the real MCP server end-to-end --- actions/setup/js/apply_samples.cjs | 354 ++++++++++ actions/setup/js/apply_samples.test.cjs | 115 +++ cmd/gh-aw/main.go | 4 + pkg/cli/compile_compiler_setup.go | 6 + pkg/cli/compile_config.go | 1 + pkg/parser/schemas/main_workflow_schema.json | 704 +++++++++++++++++++ pkg/workflow/compiler_types.go | 12 + pkg/workflow/compiler_validators.go | 1 + pkg/workflow/compiler_yaml_ai_execution.go | 8 + pkg/workflow/safe_outputs_config.go | 56 ++ pkg/workflow/samples_replay.go | 103 +++ pkg/workflow/samples_replay_test.go | 98 +++ pkg/workflow/samples_validation.go | 166 +++++ pkg/workflow/samples_validation_test.go | 170 +++++ pkg/workflow/workflow_builder.go | 1 + 15 files changed, 1799 insertions(+) create mode 100644 actions/setup/js/apply_samples.cjs create mode 100644 actions/setup/js/apply_samples.test.cjs create mode 100644 pkg/workflow/samples_replay.go create mode 100644 pkg/workflow/samples_replay_test.go create mode 100644 pkg/workflow/samples_validation.go create mode 100644 pkg/workflow/samples_validation_test.go diff --git a/actions/setup/js/apply_samples.cjs b/actions/setup/js/apply_samples.cjs new file mode 100644 index 00000000000..73311b28c0e --- /dev/null +++ b/actions/setup/js/apply_samples.cjs @@ -0,0 +1,354 @@ +#!/usr/bin/env node +// @ts-check + +// apply_samples.cjs +// +// Deterministic replay driver for `gh aw compile --use-samples`. +// +// Reads `GH_AW_SAMPLES` (a JSON array of `{tool, arguments, sidecars}` +// entries produced by the compiler), spawns the safe-outputs MCP server +// (`safe_outputs_mcp_server.cjs`) as a child process, sends one JSON-RPC +// `tools/call` per sample over stdio, and writes a synthetic `agent-stdio.log` +// so downstream log-parsing / failure-handling steps continue to work. +// +// For samples whose tool is `create_pull_request` or `push_to_pull_request_branch` +// and whose sidecars include `patch`, the driver pre-stages a branch and commits +// the patch into the workspace BEFORE invoking the MCP tool. This lets the +// real `create_pull_request` MCP handler (which derives a git diff against the +// base branch) produce a meaningful transport payload. +// +// Env contract: +// GH_AW_SAMPLES — JSON array of replay entries (required) +// GH_AW_AGENT_STDIO_LOG — path where the synthetic stdio log is written +// GH_AW_SAFE_OUTPUTS_CONFIG_PATH — path to the MCP server's config.json +// GH_AW_SAFE_OUTPUTS — path to the MCP server's outputs.jsonl +// GITHUB_WORKSPACE — git working directory for pre-staging (optional; +// falls back to cwd) + +const { spawn } = require("child_process"); +const fs = require("fs"); +const path = require("path"); +const os = require("os"); + +const DEFAULT_BASE_BRANCH = process.env.GH_AW_CUSTOM_BASE_BRANCH || process.env.GITHUB_BASE_REF || process.env.GITHUB_REF_NAME || "main"; +const PATCH_SIDECAR_TOOLS = new Set(["create_pull_request", "push_to_pull_request_branch"]); + +/** + * @typedef {Object} SampleEntry + * @property {string} tool + * @property {Record} arguments + * @property {Record} [sidecars] + */ + +/** + * Read and parse the GH_AW_SAMPLES env var. Returns an empty array (with a + * warning) when unset or empty so the workflow can still complete cleanly. + * @returns {SampleEntry[]} + */ +function loadSamples() { + const raw = process.env.GH_AW_SAMPLES; + if (!raw || !raw.trim()) { + console.error("apply_samples: GH_AW_SAMPLES is empty — no samples to replay."); + return []; + } + let parsed; + try { + parsed = JSON.parse(raw); + } catch (err) { + throw new Error(`apply_samples: failed to parse GH_AW_SAMPLES as JSON: ${/** @type {Error} */ err.message}`); + } + if (!Array.isArray(parsed)) { + throw new Error("apply_samples: GH_AW_SAMPLES must be a JSON array"); + } + for (const [i, entry] of parsed.entries()) { + if (!entry || typeof entry !== "object" || typeof entry.tool !== "string") { + throw new Error(`apply_samples: entry ${i} is missing a string "tool" field`); + } + if (!entry.arguments || typeof entry.arguments !== "object") { + throw new Error(`apply_samples: entry ${i} (tool=${entry.tool}) is missing an "arguments" object`); + } + } + return parsed; +} + +/** + * Run a git subcommand synchronously and return stdout. Throws on non-zero exit. + * @param {string[]} args + * @param {string} cwd + * @returns {string} + */ +function runGit(args, cwd) { + const { spawnSync } = require("child_process"); + const result = spawnSync("git", args, { cwd, encoding: "utf8" }); + if (result.status !== 0) { + throw new Error(`git ${args.join(" ")} failed (exit ${result.status}): ${result.stderr || result.stdout}`); + } + return result.stdout; +} + +/** + * Ensure git user.email / user.name are configured so commits succeed in CI. + * @param {string} cwd + */ +function ensureGitIdentity(cwd) { + try { + runGit(["config", "user.email"], cwd); + } catch { + runGit(["config", "user.email", "gh-aw-samples@github.com"], cwd); + } + try { + runGit(["config", "user.name"], cwd); + } catch { + runGit(["config", "user.name", "gh-aw samples"], cwd); + } +} + +/** + * Pre-stage a branch + patch for samples whose tool reads the workspace diff. + * Mutates `entry.arguments.branch` to the actual checked-out branch. + * @param {SampleEntry} entry + * @param {number} index + * @param {string} workspace + */ +function preStagePatch(entry, index, workspace) { + const patch = entry.sidecars && entry.sidecars.patch; + if (typeof patch !== "string" || !patch.trim()) { + return; + } + const branch = typeof entry.arguments.branch === "string" && entry.arguments.branch.trim() ? entry.arguments.branch.trim() : `gh-aw-sample-${index + 1}`; + entry.arguments.branch = branch; + + ensureGitIdentity(workspace); + + // Start from the base branch so the diff is meaningful. Tolerate the case + // where the base ref doesn't exist locally — fall back to HEAD. + try { + runGit(["checkout", DEFAULT_BASE_BRANCH], workspace); + } catch (err) { + console.error(`apply_samples: could not check out base branch ${DEFAULT_BASE_BRANCH}: ${/** @type {Error} */ err.message}; staying on current HEAD`); + } + + // Create the branch (or check it out if it already exists from a previous sample). + try { + runGit(["checkout", "-b", branch], workspace); + } catch { + runGit(["checkout", branch], workspace); + } + + // Write patch to a temp file and apply it. + const tmpPatch = path.join(os.tmpdir(), `gh-aw-sample-${index + 1}.patch`); + fs.writeFileSync(tmpPatch, patch.endsWith("\n") ? patch : patch + "\n"); + try { + runGit(["apply", "--whitespace=nowarn", tmpPatch], workspace); + } catch (err) { + // Fall back to --3way for patches that don't apply cleanly on top of an + // empty working tree (uncommon but possible for synthetic samples). + runGit(["apply", "--3way", "--whitespace=nowarn", tmpPatch], workspace); + } + + runGit(["add", "-A"], workspace); + runGit(["commit", "-m", `gh-aw sample ${index + 1}: ${entry.tool}`, "--allow-empty"], workspace); +} + +/** + * Send a single JSON-RPC request to the MCP server child process and resolve + * with the parsed JSON response (or reject on timeout). + * @param {import("child_process").ChildProcessWithoutNullStreams} child + * @param {NodeJS.WritableStream} stdin + * @param {object} request + * @param {AsyncIterableIterator} responseIterator + * @returns {Promise} + */ +async function sendJsonRpc(child, stdin, request, responseIterator) { + stdin.write(JSON.stringify(request) + "\n"); + const { value, done } = await responseIterator.next(); + if (done) { + throw new Error(`apply_samples: MCP server closed stdout before responding to request id=${request.id}`); + } + return JSON.parse(value); +} + +/** + * Turn the MCP server's stdout into an async iterator of line strings. + * @param {NodeJS.ReadableStream} stdout + */ +async function* lineIterator(stdout) { + let buffer = ""; + for await (const chunk of stdout) { + buffer += chunk.toString(); + let newlineIdx; + while ((newlineIdx = buffer.indexOf("\n")) !== -1) { + const line = buffer.slice(0, newlineIdx).trim(); + buffer = buffer.slice(newlineIdx + 1); + if (line) { + yield line; + } + } + } + if (buffer.trim()) { + yield buffer.trim(); + } +} + +/** + * Locate the safe_outputs_mcp_server.cjs script. The setup action copies it + * into ${RUNNER_TEMP}/gh-aw/actions/ alongside this driver; fall back to + * resolving via __dirname for local-execution / tests. + * @returns {string} + */ +function resolveMcpServerPath() { + const candidates = [ + path.join(__dirname, "safe_outputs_mcp_server.cjs"), + process.env.RUNNER_TEMP ? path.join(process.env.RUNNER_TEMP, "gh-aw", "actions", "safe_outputs_mcp_server.cjs") : null, + process.env.RUNNER_TEMP ? path.join(process.env.RUNNER_TEMP, "gh-aw", "safeoutputs", "safe_outputs_mcp_server.cjs") : null, + ].filter(/** @returns {p is string} */ p => typeof p === "string"); + for (const candidate of candidates) { + if (fs.existsSync(candidate)) { + return candidate; + } + } + throw new Error(`apply_samples: could not locate safe_outputs_mcp_server.cjs. Looked in: ${candidates.join(", ")}`); +} + +/** + * Append a synthetic terminal_reason: completed marker to the engine stdio log + * so downstream parsers / handle_agent_failure recognize the replay as a + * successful agent run. + * @param {string} logPath + * @param {number} sampleCount + */ +function writeSyntheticStdioLog(logPath, sampleCount) { + if (!logPath) return; + try { + fs.mkdirSync(path.dirname(logPath), { recursive: true }); + } catch { + /* ignore */ + } + const lines = [ + `gh-aw samples replay: ${sampleCount} MCP tools/call invocation(s) completed deterministically.`, + JSON.stringify({ + type: "result", + subtype: "success", + terminal_reason: "completed", + num_turns: sampleCount, + driver: "apply_samples", + }), + "", + ]; + fs.appendFileSync(logPath, lines.join("\n")); +} + +async function main() { + const samples = loadSamples(); + const workspace = process.env.GITHUB_WORKSPACE || process.cwd(); + const logPath = process.env.GH_AW_AGENT_STDIO_LOG || ""; + + // Pre-stage branches/patches. + samples.forEach((sample, i) => { + if (PATCH_SIDECAR_TOOLS.has(sample.tool)) { + preStagePatch(sample, i, workspace); + } + }); + + if (samples.length === 0) { + console.error("apply_samples: nothing to replay; exiting cleanly."); + writeSyntheticStdioLog(logPath, 0); + return; + } + + const serverPath = resolveMcpServerPath(); + console.error(`apply_samples: spawning MCP server ${serverPath}`); + const child = spawn(process.execPath, [serverPath], { + stdio: ["pipe", "pipe", "inherit"], + env: process.env, + }); + + const stdoutIter = lineIterator(child.stdout); + let nextId = 1; + const failures = []; + + try { + // Initialize handshake. + const initRsp = await sendJsonRpc( + child, + child.stdin, + { + jsonrpc: "2.0", + id: nextId++, + method: "initialize", + params: { + protocolVersion: "2025-06-18", + capabilities: {}, + clientInfo: { name: "apply_samples", version: "1.0.0" }, + }, + }, + stdoutIter + ); + if (initRsp.error) { + throw new Error(`MCP initialize failed: ${JSON.stringify(initRsp.error)}`); + } + + // Send one tools/call per sample. + for (const [i, sample] of samples.entries()) { + const callRsp = await sendJsonRpc( + child, + child.stdin, + { + jsonrpc: "2.0", + id: nextId++, + method: "tools/call", + params: { name: sample.tool, arguments: sample.arguments }, + }, + stdoutIter + ); + if (callRsp.error) { + failures.push(`sample[${i}] (tool=${sample.tool}): ${JSON.stringify(callRsp.error)}`); + continue; + } + const result = callRsp.result; + if (result && result.isError) { + const text = result.content && result.content[0] && result.content[0].text; + failures.push(`sample[${i}] (tool=${sample.tool}): ${text || JSON.stringify(result)}`); + } else { + console.error(`apply_samples: sample[${i}] (tool=${sample.tool}) ok`); + } + } + } finally { + try { + child.stdin.end(); + } catch { + /* ignore */ + } + // Give the server up to 2s to exit cleanly. + await new Promise(resolve => { + const timer = setTimeout(() => { + try { + child.kill("SIGTERM"); + } catch { + /* ignore */ + } + resolve(undefined); + }, 2000); + child.once("exit", () => { + clearTimeout(timer); + resolve(undefined); + }); + }); + } + + writeSyntheticStdioLog(logPath, samples.length); + + if (failures.length > 0) { + throw new Error(`apply_samples: ${failures.length} sample(s) failed:\n - ${failures.join("\n - ")}`); + } + console.error(`apply_samples: ${samples.length} sample(s) replayed successfully.`); +} + +if (require.main === module) { + main().catch(err => { + console.error(err && err.stack ? err.stack : String(err)); + process.exit(1); + }); +} + +module.exports = { main, loadSamples, preStagePatch, resolveMcpServerPath }; diff --git a/actions/setup/js/apply_samples.test.cjs b/actions/setup/js/apply_samples.test.cjs new file mode 100644 index 00000000000..a2210c54761 --- /dev/null +++ b/actions/setup/js/apply_samples.test.cjs @@ -0,0 +1,115 @@ +// @ts-check +// +// apply_samples.test.cjs +// +// Smoke test for the deterministic samples replay driver. Spawns the +// driver as a subprocess (so it actually launches the real MCP server) and +// asserts that: +// - the driver exits 0 +// - the MCP server appends the expected JSONL entry to GH_AW_SAFE_OUTPUTS +// - the synthetic agent-stdio log includes a `terminal_reason: completed` marker +// +// Tests intentionally use the simplest safe-output tool (`create_issue`) so we +// do not need to set up a git working tree for patch sidecars. + +import { describe, it, expect, beforeAll } from "vitest"; +import { spawnSync } from "child_process"; +import fs from "fs"; +import path from "path"; +import os from "os"; +import { fileURLToPath } from "url"; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); + +const driverPath = path.join(__dirname, "apply_samples.cjs"); + +function makeTempDir(prefix) { + return fs.mkdtempSync(path.join(os.tmpdir(), prefix)); +} + +describe.sequential("apply_samples.cjs", () => { + let tempDir; + let configPath; + let outputsPath; + let logPath; + + beforeAll(() => { + tempDir = makeTempDir("gh-aw-apply-samples-"); + configPath = path.join(tempDir, "config.json"); + outputsPath = path.join(tempDir, "outputs.jsonl"); + logPath = path.join(tempDir, "agent-stdio.log"); + + // Minimal safe-outputs config enabling only the `create_issue` tool. The + // bootstrap loader keys off the snake-case keys present here. + fs.writeFileSync( + configPath, + JSON.stringify({ + create_issue: { max: 1 }, + }) + ); + }); + + it("replays a create_issue sample through the real MCP server and emits a completed marker", () => { + const samples = [ + { + tool: "create_issue", + arguments: { + title: "Deterministic sample issue", + body: "This issue was emitted by the apply_samples driver during a unit test.", + }, + }, + ]; + + const result = spawnSync(process.execPath, [driverPath], { + env: { + ...process.env, + GH_AW_SAMPLES: JSON.stringify(samples), + GH_AW_SAFE_OUTPUTS_CONFIG_PATH: configPath, + GH_AW_SAFE_OUTPUTS: outputsPath, + GH_AW_AGENT_STDIO_LOG: logPath, + }, + encoding: "utf8", + timeout: 15000, + }); + + if (result.status !== 0) { + // Surface stderr so failures are diagnosable in CI. + throw new Error(`driver exited with status ${result.status}\nstderr:\n${result.stderr}\nstdout:\n${result.stdout}`); + } + + expect(fs.existsSync(outputsPath)).toBe(true); + const outputLines = fs + .readFileSync(outputsPath, "utf8") + .split("\n") + .filter(line => line.trim().length > 0); + expect(outputLines.length).toBeGreaterThanOrEqual(1); + + const firstEntry = JSON.parse(outputLines[0]); + expect(firstEntry.type).toBe("create_issue"); + expect(firstEntry.title).toBe("Deterministic sample issue"); + + expect(fs.existsSync(logPath)).toBe(true); + const logText = fs.readFileSync(logPath, "utf8"); + expect(logText).toContain("terminal_reason"); + expect(logText).toContain("completed"); + }); + + it("exits cleanly when GH_AW_SAMPLES is empty", () => { + const result = spawnSync(process.execPath, [driverPath], { + env: { + ...process.env, + GH_AW_SAMPLES: "[]", + GH_AW_SAFE_OUTPUTS_CONFIG_PATH: configPath, + GH_AW_SAFE_OUTPUTS: outputsPath, + GH_AW_AGENT_STDIO_LOG: path.join(tempDir, "empty-log.log"), + }, + encoding: "utf8", + timeout: 10000, + }); + + expect(result.status).toBe(0); + const logText = fs.readFileSync(path.join(tempDir, "empty-log.log"), "utf8"); + expect(logText).toContain("terminal_reason"); + }); +}); diff --git a/cmd/gh-aw/main.go b/cmd/gh-aw/main.go index 895fdb2beed..27748d7cee5 100644 --- a/cmd/gh-aw/main.go +++ b/cmd/gh-aw/main.go @@ -304,6 +304,7 @@ Examples: priorManifestFile, _ := cmd.Flags().GetString("prior-manifest-file") ghes, _ := cmd.Flags().GetBool("ghes") verbose, _ := cmd.Flags().GetBool("verbose") + useSamples, _ := cmd.Flags().GetBool("use-samples") if err := validateEngine(engineOverride); err != nil { return err } @@ -364,6 +365,7 @@ Examples: ValidateImages: validateImages, PriorManifestFile: priorManifestFile, GHESCompat: ghes, + UseSamples: useSamples, } if _, err := cli.CompileWorkflows(cmd.Context(), config); err != nil { // Return error as-is without additional formatting @@ -703,6 +705,8 @@ Use "` + string(constants.CLIExtensionPrefix) + ` help all" to show help for all compileCmd.Flags().Bool("strict", false, "Override frontmatter to enforce strict mode validation for all workflows (enforces action pinning, network config, safe-outputs, refuses write permissions and deprecated fields). Note: Workflows default to strict mode unless frontmatter sets strict: false") compileCmd.Flags().Bool("trial", false, "Enable trial mode compilation (modifies workflows for trial execution)") compileCmd.Flags().String("logical-repo", "", "Repository to simulate workflow execution against (for trial mode)") + compileCmd.Flags().Bool("use-samples", false, "Hidden: replace the agentic 'Execute coding agent' step with a deterministic driver that replays the workflow's safe-outputs `samples` frontmatter entries through the safe-outputs MCP server. Used to make end-to-end tests deterministic.") + _ = compileCmd.Flags().MarkHidden("use-samples") compileCmd.Flags().Bool("dependabot", false, "Generate dependency manifests (package.json, requirements.txt, go.mod) and Dependabot config when dependencies are detected") compileCmd.Flags().Bool("force", false, "Force overwrite of existing dependency files (e.g., dependabot.yml)") compileCmd.Flags().Bool("refresh-stop-time", false, "Force regeneration of stop-after times instead of preserving existing values from lock files") diff --git a/pkg/cli/compile_compiler_setup.go b/pkg/cli/compile_compiler_setup.go index bdc672aea13..f5853e752ec 100644 --- a/pkg/cli/compile_compiler_setup.go +++ b/pkg/cli/compile_compiler_setup.go @@ -147,6 +147,12 @@ func configureCompilerFlags(compiler *workflow.Compiler, config CompileConfig) { } } + // Replace the agentic step with a deterministic samples replay driver when requested (hidden feature). + if config.UseSamples { + compileCompilerSetupLog.Print("Enabling --use-samples: agentic step will be replaced by a deterministic replay driver") + compiler.SetUseSamples(true) + } + // Set refresh stop time flag compiler.SetRefreshStopTime(config.RefreshStopTime) if config.RefreshStopTime { diff --git a/pkg/cli/compile_config.go b/pkg/cli/compile_config.go index 24206c2d583..901c3ccb27e 100644 --- a/pkg/cli/compile_config.go +++ b/pkg/cli/compile_config.go @@ -13,6 +13,7 @@ type CompileConfig struct { Purge bool // Remove orphaned lock files TrialMode bool // Enable trial mode (suppress safe outputs) TrialLogicalRepoSlug string // Target repository for trial mode + UseSamples bool // Hidden: replace agentic step with a deterministic samples replay driver Strict bool // Enable strict mode validation Dependabot bool // Generate Dependabot manifests for npm dependencies ForceOverwrite bool // Force overwrite of existing files (dependabot.yml) diff --git a/pkg/parser/schemas/main_workflow_schema.json b/pkg/parser/schemas/main_workflow_schema.json index 27e69031485..9be825d9a2a 100644 --- a/pkg/parser/schemas/main_workflow_schema.json +++ b/pkg/parser/schemas/main_workflow_schema.json @@ -4394,6 +4394,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "additionalProperties": false @@ -4984,6 +5000,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "additionalProperties": false, @@ -5062,6 +5094,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "additionalProperties": false @@ -5117,6 +5165,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "additionalProperties": false @@ -5258,6 +5322,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "additionalProperties": false, @@ -5378,6 +5458,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "additionalProperties": false @@ -5428,6 +5524,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "additionalProperties": false, @@ -5558,6 +5670,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "additionalProperties": false, @@ -5657,6 +5785,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "additionalProperties": false, @@ -5742,6 +5886,22 @@ "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] + }, "github-token": { "$ref": "#/$defs/github_token", "description": "GitHub token to use for this specific output type. Overrides global github-token if specified." @@ -5814,6 +5974,22 @@ "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] + }, "state-reason": { "type": "string", "enum": ["completed", "not_planned", "duplicate"], @@ -5900,6 +6076,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "additionalProperties": false, @@ -5981,6 +6173,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "additionalProperties": false, @@ -6097,6 +6305,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "additionalProperties": false, @@ -6472,6 +6696,22 @@ "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] + }, "allow-workflows": { "type": "boolean", "description": "When true, adds workflows: write to the GitHub App token permissions. Required when allowed-files targets .github/workflows/ paths. Requires safe-outputs.github-app to be configured because the workflows permission is a GitHub App-only permission and cannot be granted via GITHUB_TOKEN.", @@ -6551,6 +6791,22 @@ "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] + }, "required-labels": { "type": "array", "items": { @@ -6644,6 +6900,22 @@ "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] + }, "required-labels": { "type": "array", "items": { @@ -6715,6 +6987,22 @@ "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] + }, "required-labels": { "type": "array", "items": { @@ -6781,6 +7069,22 @@ "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] + }, "required-labels": { "type": "array", "items": { @@ -6845,6 +7149,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "additionalProperties": false @@ -6884,6 +7204,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "additionalProperties": false @@ -6928,6 +7264,22 @@ "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] + }, "github-app": { "$ref": "#/$defs/github_app", "description": "GitHub App credentials for minting an installation access token scoped to checks:write for this handler. When set, a short-lived token is minted before the handler runs and revoked afterwards." @@ -7037,6 +7389,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "additionalProperties": false @@ -7122,6 +7490,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "additionalProperties": false @@ -7231,6 +7615,22 @@ "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] + }, "required-labels": { "type": "array", "items": { @@ -7305,6 +7705,22 @@ "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] + }, "required-labels": { "type": "array", "items": { @@ -7412,6 +7828,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "additionalProperties": false @@ -7486,6 +7918,22 @@ "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] + }, "required-labels": { "type": "array", "items": { @@ -7565,6 +8013,22 @@ "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] + }, "required-labels": { "type": "array", "items": { @@ -7656,6 +8120,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "additionalProperties": false @@ -7730,6 +8210,22 @@ "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] + }, "required-labels": { "type": "array", "items": { @@ -7818,6 +8314,22 @@ "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] + }, "required-labels": { "type": "array", "items": { @@ -8016,6 +8528,22 @@ "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] + }, "github-token-for-extra-empty-commit": { "type": "string", "description": "Token used to push an empty commit after pushing changes to trigger CI events. Works around the GITHUB_TOKEN limitation where pushes don't trigger workflow runs. Defaults to the magic secret GH_AW_CI_TRIGGER_TOKEN if set in the repository. Use a secret expression (e.g. '${{ secrets.CI_TOKEN }}') for a custom token, or 'app' for GitHub App auth." @@ -8201,6 +8729,22 @@ "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] + }, "required-labels": { "type": "array", "items": { @@ -8275,6 +8819,22 @@ "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] + }, "required-labels": { "type": "array", "items": { @@ -8347,6 +8907,22 @@ "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] + }, "required-labels": { "type": "array", "items": { @@ -8412,6 +8988,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "required": ["workflows"], @@ -8568,6 +9160,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "required": ["workflows"], @@ -8632,6 +9240,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "additionalProperties": false @@ -8694,6 +9318,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "additionalProperties": false @@ -8744,6 +9384,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "additionalProperties": false @@ -8809,6 +9465,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "additionalProperties": false @@ -8961,6 +9633,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "additionalProperties": false @@ -9654,6 +10342,22 @@ "type": "boolean", "description": "If true, emit step summary messages instead of making GitHub API calls for this specific output type (preview mode)", "examples": [true, false] + }, + "samples": { + "description": "Internal hidden feature. Optional list of declarative sample payloads that exercise this safe-output handler. Used by the hidden `gh aw compile --use-samples` flag to replace the agentic step with a deterministic replay through the safe-outputs MCP server. Each entry should conform to the corresponding MCP tool inputSchema; recognized sidecar keys (currently `patch` for create-pull-request and push-to-pull-request-branch) are stripped before schema validation and consumed by the replay driver.", + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + { + "type": "object", + "additionalProperties": true + } + ] } }, "additionalProperties": false diff --git a/pkg/workflow/compiler_types.go b/pkg/workflow/compiler_types.go index 9c0639c4977..4ea0310b547 100644 --- a/pkg/workflow/compiler_types.go +++ b/pkg/workflow/compiler_types.go @@ -70,6 +70,7 @@ type Compiler struct { forceStaged bool // If true, force all safe-outputs into staged mode trialMode bool // If true, suppress safe outputs for trial mode execution trialLogicalRepoSlug string // If set in trial mode, the logical repository to checkout + useSamples bool // If true, replace the agentic step with a deterministic samples replay driver (hidden feature) refreshStopTime bool // If true, regenerate stop-after times instead of preserving existing ones forceRefreshActionPins bool // If true, clear action cache and resolve all actions from GitHub API failFast bool // If true, stop at first validation error instead of collecting all errors @@ -203,6 +204,14 @@ func (c *Compiler) SetTrialLogicalRepoSlug(repo string) { c.trialLogicalRepoSlug = repo } +// SetUseSamples configures whether to replace the agentic step with a +// deterministic replay driver that feeds `samples` entries to the safe-outputs +// MCP server via real `tools/call` JSON-RPC. Hidden feature used by +// `gh aw compile --use-samples`. +func (c *Compiler) SetUseSamples(use bool) { + c.useSamples = use +} + // SetStrictMode configures whether to enable strict validation mode func (c *Compiler) SetStrictMode(strict bool) { c.strictMode = strict @@ -446,6 +455,7 @@ type WorkflowData struct { WorkflowID string // workflow identifier derived from markdown filename (basename without extension) TrialMode bool // whether the workflow is running in trial mode TrialLogicalRepo string // target repository slug for trial mode (owner/repo) + UseSamples bool // whether the agentic step should be replaced by a deterministic samples replay driver (hidden feature) FrontmatterName string // name field from frontmatter (for code scanning alert driver default) FrontmatterEmoji string // emoji field from frontmatter (for display in footers and UI) FrontmatterYAML string // raw frontmatter YAML content (rendered as comment in lock file for reference) @@ -627,6 +637,8 @@ type BaseSafeOutputConfig struct { GitHubApp *GitHubAppConfig `yaml:"github-app,omitempty"` // GitHub App credentials for minting a per-handler installation access token Staged bool `yaml:"staged,omitempty"` // If true, emit step summary messages instead of making GitHub API calls for this specific output type NormalizeClosingKeywords *bool `yaml:"normalize-closing-keywords,omitempty"` // When true for this output type, strip backticks from recognized issue-closing keywords in body fields. + // Samples carries deterministic replay samples for the hidden `gh aw compile --use-samples` flag. Each entry is the JSON object passed to the corresponding MCP tool's `tools/call` arguments. Sample-only sidecar fields (e.g. `patch` for create_pull_request) are stripped before the call and used by the replay driver. + Samples []map[string]any `yaml:"samples,omitempty"` } // SafeOutputsConfig holds configuration for automatic output routes diff --git a/pkg/workflow/compiler_validators.go b/pkg/workflow/compiler_validators.go index 316ba6a8b27..d1286dcd4bb 100644 --- a/pkg/workflow/compiler_validators.go +++ b/pkg/workflow/compiler_validators.go @@ -152,6 +152,7 @@ func (c *Compiler) validateCoreToolConfiguration(workflowData *WorkflowData, mar {logMessage: "Validating sandbox configuration", validateFn: func() error { return validateSandboxConfig(workflowData) }}, {logMessage: "Validating safe-outputs target fields", validateFn: func() error { return validateSafeOutputsTarget(workflowData.SafeOutputs) }}, {logMessage: "Validating safe-outputs max fields", validateFn: func() error { return validateSafeOutputsMax(workflowData.SafeOutputs) }}, + {logMessage: "Validating safe-outputs samples entries against MCP tool schemas", validateFn: func() error { return validateSafeOutputsSamples(workflowData.SafeOutputs) }}, {logMessage: "Validating safe-outputs allowed-domains", validateFn: func() error { return c.validateSafeOutputsAllowedDomains(workflowData.SafeOutputs) }}, {logMessage: "Validating safe-outputs merge-pull-request", validateFn: func() error { return validateSafeOutputsMergePullRequest(workflowData.SafeOutputs) }}, {logMessage: "Validating safe-outputs needs declarations", validateFn: func() error { return validateSafeOutputsNeeds(workflowData) }}, diff --git a/pkg/workflow/compiler_yaml_ai_execution.go b/pkg/workflow/compiler_yaml_ai_execution.go index b3b6826b74a..48473d9c883 100644 --- a/pkg/workflow/compiler_yaml_ai_execution.go +++ b/pkg/workflow/compiler_yaml_ai_execution.go @@ -9,6 +9,14 @@ import ( // generateEngineExecutionSteps generates the GitHub Actions steps for executing the AI engine func (c *Compiler) generateEngineExecutionSteps(yaml *strings.Builder, data *WorkflowData, engine CodingAgentEngine, logFile string) { + // --use-samples (hidden) replaces the agent step with a deterministic driver + // that replays the workflow's safe-outputs `samples` frontmatter entries + // through the safe-outputs MCP server. The engine is never invoked. + if data.UseSamples { + compilerYamlLog.Printf("Replacing engine execution with samples replay driver: engine=%s", engine.GetID()) + c.generateSamplesReplayStep(yaml, data, logFile) + return + } steps := engine.GetExecutionSteps(data, logFile) compilerYamlLog.Printf("Generating engine execution steps: engine=%s, steps=%d", engine.GetID(), len(steps)) diff --git a/pkg/workflow/safe_outputs_config.go b/pkg/workflow/safe_outputs_config.go index 58dc548537a..e3ad6fb024e 100644 --- a/pkg/workflow/safe_outputs_config.go +++ b/pkg/workflow/safe_outputs_config.go @@ -9,6 +9,7 @@ import ( "github.com/github/gh-aw/pkg/logger" "github.com/github/gh-aw/pkg/sliceutil" "github.com/github/gh-aw/pkg/typeutil" + "go.yaml.in/yaml/v3" ) var safeOutputsConfigLog = logger.New("workflow:safe_outputs_config") @@ -755,6 +756,61 @@ func (c *Compiler) parseBaseSafeOutputConfig(configMap map[string]any, config *B config.Staged = stagedBool } } + + // Parse samples list (hidden feature: deterministic replay samples for --use-samples). + // Accepts either a YAML list of objects, or a single object that is auto-wrapped + // into a one-element list, or a YAML string scalar containing a list (for + // authoring convenience with `|` block scalars in frontmatter). + if samples, exists := configMap["samples"]; exists { + parsed := parseSamplesValue(samples) + if len(parsed) > 0 { + safeOutputsConfigLog.Printf("Parsed %d samples entries", len(parsed)) + config.Samples = parsed + } + } +} + +// parseSamplesValue normalizes a `samples` frontmatter value into a list of +// objects. Accepted shapes (most-permissive first): +// - YAML list of mappings: returned as-is +// - single YAML mapping: wrapped into a one-element list +// - YAML string containing a list/mapping (authoring with `|` block scalar): +// parsed as YAML and re-normalized +// +// Any other shape returns an empty slice — schema validation will then report +// "no samples found". +func parseSamplesValue(samples any) []map[string]any { + switch v := samples.(type) { + case []any: + out := make([]map[string]any, 0, len(v)) + for _, item := range v { + if m, ok := item.(map[string]any); ok { + out = append(out, m) + } else if mStr, ok := item.(map[string]string); ok { + converted := make(map[string]any, len(mStr)) + for k, s := range mStr { + converted[k] = s + } + out = append(out, converted) + } + } + return out + case map[string]any: + return []map[string]any{v} + case string: + trimmed := strings.TrimSpace(v) + if trimmed == "" { + return nil + } + var nested any + if err := yaml.Unmarshal([]byte(trimmed), &nested); err != nil { + safeOutputsConfigLog.Printf("Failed to parse samples string as YAML: %v", err) + return nil + } + return parseSamplesValue(nested) + default: + return nil + } } // SafeOutputStepConfig holds configuration for building a single safe output step diff --git a/pkg/workflow/samples_replay.go b/pkg/workflow/samples_replay.go new file mode 100644 index 00000000000..c0f49d804a8 --- /dev/null +++ b/pkg/workflow/samples_replay.go @@ -0,0 +1,103 @@ +package workflow + +import ( + "encoding/json" + "fmt" + "sort" + "strings" +) + +// SampleEntry is the per-call payload consumed by apply_samples.cjs. +// Each entry corresponds to a single MCP `tools/call` invocation. +type SampleEntry struct { + // Tool is the snake_case MCP tool name (e.g. "create_pull_request"). + Tool string `json:"tool"` + // Arguments are passed verbatim as the MCP `tools/call` arguments. + // Sample sidecar fields (e.g. `patch`) have already been stripped. + Arguments map[string]any `json:"arguments"` + // Sidecars carries fields stripped from Arguments that need out-of-band + // pre-staging by the driver (e.g. `patch` for create_pull_request). + Sidecars map[string]any `json:"sidecars,omitempty"` +} + +// collectSampleEntries walks the safe-outputs config and flattens every +// configured `samples` entry into the order they will be sent to the MCP +// server. Iteration order is deterministic (sorted by struct field name) so +// that compiled YAML is stable across runs. +func collectSampleEntries(config *SafeOutputsConfig) []SampleEntry { + if config == nil { + return nil + } + + fieldNames := make([]string, 0, len(safeOutputFieldMapping)) + for fieldName := range safeOutputFieldMapping { + fieldNames = append(fieldNames, fieldName) + } + sort.Strings(fieldNames) + + var entries []SampleEntry + for _, fieldName := range fieldNames { + toolName := safeOutputFieldMapping[fieldName] + base := extractBaseSafeOutputConfig(config, fieldName) + if base == nil || len(base.Samples) == 0 { + continue + } + sidecarKeys := sampleSidecarFields[toolName] + for _, sample := range base.Samples { + args := make(map[string]any, len(sample)) + var sidecars map[string]any + for k, v := range sample { + if sidecarKeys[k] { + if sidecars == nil { + sidecars = make(map[string]any) + } + sidecars[k] = v + continue + } + args[k] = v + } + entries = append(entries, SampleEntry{ + Tool: toolName, + Arguments: args, + Sidecars: sidecars, + }) + } + } + return entries +} + +// generateSamplesReplayStep emits the YAML that replaces the agentic +// `Execute coding agent` step when the hidden `gh aw compile --use-samples` +// flag is used. It spawns the safe-outputs MCP server over stdio and feeds it +// a `tools/call` for every collected sample, after pre-staging branches/patches +// for samples that carry them. +func (c *Compiler) generateSamplesReplayStep(yaml *strings.Builder, data *WorkflowData, logFile string) { + entries := collectSampleEntries(data.SafeOutputs) + compilerYamlLog.Printf("Generating samples replay step: entries=%d", len(entries)) + + // Serialize entries to JSON for the driver. Always emit valid JSON even when + // empty so the driver can produce a clear `no samples configured` message + // rather than crashing on an empty env var. + payload, err := json.Marshal(entries) + if err != nil { + // Should never happen for map[string]any payloads; fall back to empty + // array so the workflow still compiles and the driver reports cleanly. + compilerYamlLog.Printf("Warning: failed to marshal samples entries: %v", err) + payload = []byte("[]") + } + + yaml.WriteString(" - name: Replay safe-outputs samples (deterministic)\n") + yaml.WriteString(" id: agentic_execution\n") + yaml.WriteString(" env:\n") + yaml.WriteString(" GH_AW_SAMPLES: |\n") + for _, line := range strings.Split(string(payload), "\n") { + fmt.Fprintf(yaml, " %s\n", line) + } + fmt.Fprintf(yaml, " GH_AW_AGENT_STDIO_LOG: %s\n", logFile) + yaml.WriteString(" GH_AW_SAFE_OUTPUTS_CONFIG_PATH: ${{ runner.temp }}/gh-aw/safeoutputs/config.json\n") + yaml.WriteString(" GH_AW_SAFE_OUTPUTS: ${{ runner.temp }}/gh-aw/safeoutputs/outputs.jsonl\n") + yaml.WriteString(" run: |\n") + yaml.WriteString(" set -euo pipefail\n") + yaml.WriteString(" mkdir -p \"$(dirname \"$GH_AW_AGENT_STDIO_LOG\")\"\n") + yaml.WriteString(" node \"${{ runner.temp }}/gh-aw/actions/apply_samples.cjs\"\n") +} diff --git a/pkg/workflow/samples_replay_test.go b/pkg/workflow/samples_replay_test.go new file mode 100644 index 00000000000..37096b20b00 --- /dev/null +++ b/pkg/workflow/samples_replay_test.go @@ -0,0 +1,98 @@ +//go:build integration + +package workflow + +import ( + "os" + "strings" + "testing" +) + +// TestUseSamplesReplacesAgentStep verifies that compiling with +// SetUseSamples(true) replaces the engine `Execute coding agent` step +// with the deterministic `Replay safe-outputs samples` step driven by +// apply_samples.cjs. +func TestUseSamplesReplacesAgentStep(t *testing.T) { + const md = `--- +on: + workflow_dispatch: +permissions: read-all +engine: + id: claude +safe-outputs: + create-issue: + samples: + - title: "Deterministic test issue" + body: "Issue body emitted by gh-aw samples replay." +--- + +Trivial workflow whose only job is to be compiled with --use-samples. +` + + tmpFile, err := os.CreateTemp("", "use-samples-*.md") + if err != nil { + t.Fatal(err) + } + defer os.Remove(tmpFile.Name()) + if _, err := tmpFile.WriteString(md); err != nil { + t.Fatal(err) + } + tmpFile.Close() + + t.Run("Default Mode", func(t *testing.T) { + compiler := NewCompiler() + if err := compiler.CompileWorkflow(tmpFile.Name()); err != nil { + t.Fatalf("compile failed: %v", err) + } + lockPath := strings.TrimSuffix(tmpFile.Name(), ".md") + ".lock.yml" + defer os.Remove(lockPath) + b, err := os.ReadFile(lockPath) + if err != nil { + t.Fatalf("read lock: %v", err) + } + lockContent := string(b) + if strings.Contains(lockContent, "Replay safe-outputs samples") { + t.Error("Did not expect samples replay step in default mode") + } + if strings.Contains(lockContent, "apply_samples.cjs") { + t.Error("Did not expect apply_samples driver in default mode") + } + }) + + t.Run("Use Samples Mode", func(t *testing.T) { + compiler := NewCompiler() + compiler.SetUseSamples(true) + if err := compiler.CompileWorkflow(tmpFile.Name()); err != nil { + t.Fatalf("compile failed: %v", err) + } + workflowData, err := compiler.ParseWorkflowFile(tmpFile.Name()) + if err != nil { + t.Fatalf("ParseWorkflowFile failed: %v", err) + } + if !workflowData.UseSamples { + t.Fatal("Expected workflowData.UseSamples to be true after SetUseSamples(true)") + } + lockPath := strings.TrimSuffix(tmpFile.Name(), ".md") + ".lock.yml" + defer os.Remove(lockPath) + b, _ := os.ReadFile(lockPath) + lockContent := string(b) + if !strings.Contains(lockContent, "Replay safe-outputs samples (deterministic)") { + t.Error("Expected `Replay safe-outputs samples (deterministic)` step in lock file") + } + if !strings.Contains(lockContent, "apply_samples.cjs") { + t.Error("Expected lock file to invoke apply_samples.cjs driver") + } + if !strings.Contains(lockContent, "GH_AW_SAMPLES:") { + t.Error("Expected GH_AW_SAMPLES env var in lock file") + } + if !strings.Contains(lockContent, `"tool":"create_issue"`) { + t.Error("Expected JSON-encoded create_issue tool entry in lock file") + } + if !strings.Contains(lockContent, "Deterministic test issue") { + t.Error("Expected sample title in lock file") + } + if !strings.Contains(lockContent, "id: agentic_execution") { + t.Error("Expected id: agentic_execution on the replay step") + } + }) +} diff --git a/pkg/workflow/samples_validation.go b/pkg/workflow/samples_validation.go new file mode 100644 index 00000000000..79bfbc0f5f0 --- /dev/null +++ b/pkg/workflow/samples_validation.go @@ -0,0 +1,166 @@ +package workflow + +import ( + "encoding/json" + "fmt" + "reflect" + "sort" + "strings" + "sync" + + "github.com/santhosh-tekuri/jsonschema/v6" +) + +// sampleSidecarFields lists fields recognized inside a `samples` entry +// that are NOT passed to the MCP tool's `tools/call` arguments. They are stripped +// from the sample before schema validation and consumed by the replay driver +// (e.g. to pre-stage a branch + patch on disk). +var sampleSidecarFields = map[string]map[string]bool{ + "create_pull_request": { + "patch": true, + }, + "push_to_pull_request_branch": { + "patch": true, + }, +} + +// compiledToolSchemas caches the per-tool jsonschema.Schema parsed from the +// embedded safe_outputs_tools.json. Compiled lazily on first use. +var ( + compiledToolSchemasOnce sync.Once + compiledToolSchemas map[string]*jsonschema.Schema + compiledToolSchemasErr error +) + +func getCompiledToolSchemas() (map[string]*jsonschema.Schema, error) { + compiledToolSchemasOnce.Do(func() { + var tools []struct { + Name string `json:"name"` + InputSchema json.RawMessage `json:"inputSchema"` + } + if err := json.Unmarshal([]byte(safeOutputsToolsJSONContent), &tools); err != nil { + compiledToolSchemasErr = fmt.Errorf("failed to parse safe_outputs_tools.json for samples validation: %w", err) + return + } + out := make(map[string]*jsonschema.Schema, len(tools)) + for _, t := range tools { + if len(t.InputSchema) == 0 { + continue + } + var schemaDoc any + if err := json.Unmarshal(t.InputSchema, &schemaDoc); err != nil { + compiledToolSchemasErr = fmt.Errorf("failed to parse inputSchema for tool %q: %w", t.Name, err) + return + } + compiler := jsonschema.NewCompiler() + schemaURL := fmt.Sprintf("inmem://safe-outputs-tools/%s.json", t.Name) + if err := compiler.AddResource(schemaURL, schemaDoc); err != nil { + compiledToolSchemasErr = fmt.Errorf("failed to add schema resource for tool %q: %w", t.Name, err) + return + } + schema, err := compiler.Compile(schemaURL) + if err != nil { + compiledToolSchemasErr = fmt.Errorf("failed to compile inputSchema for tool %q: %w", t.Name, err) + return + } + out[t.Name] = schema + } + compiledToolSchemas = out + }) + return compiledToolSchemas, compiledToolSchemasErr +} + +// validateSafeOutputsSamples validates every `samples` entry on every +// enabled safe-output handler against the corresponding MCP tool's inputSchema. +// Sample sidecar fields (e.g. `patch`) are stripped before validation. Returns +// the first error encountered; iteration order is deterministic (sorted by +// struct field name) so error messages are stable. +func validateSafeOutputsSamples(config *SafeOutputsConfig) error { + if config == nil { + return nil + } + + fieldNames := make([]string, 0, len(safeOutputFieldMapping)) + for fieldName := range safeOutputFieldMapping { + fieldNames = append(fieldNames, fieldName) + } + sort.Strings(fieldNames) + + for _, fieldName := range fieldNames { + toolName := safeOutputFieldMapping[fieldName] + base := extractBaseSafeOutputConfig(config, fieldName) + if base == nil || len(base.Samples) == 0 { + continue + } + if err := validateSamplesForTool(toolName, base.Samples); err != nil { + return err + } + } + return nil +} + +// extractBaseSafeOutputConfig returns the embedded BaseSafeOutputConfig of the +// non-nil safe-output config at SafeOutputsConfig., or nil if the +// field is unset or the struct does not embed BaseSafeOutputConfig. +func extractBaseSafeOutputConfig(config *SafeOutputsConfig, fieldName string) *BaseSafeOutputConfig { + field, ok := safeOutputPointerFieldValue(config, fieldName) + if !ok || field.IsNil() { + return nil + } + elem := field.Elem() + if elem.Kind() != reflect.Struct { + return nil + } + baseField := elem.FieldByName("BaseSafeOutputConfig") + if !baseField.IsValid() || !baseField.CanAddr() { + return nil + } + if base, ok := baseField.Addr().Interface().(*BaseSafeOutputConfig); ok { + return base + } + return nil +} + +// validateSamplesForTool validates each sample against the named MCP tool's +// inputSchema after stripping recognized sidecar fields. +func validateSamplesForTool(toolName string, samples []map[string]any) error { + schemas, err := getCompiledToolSchemas() + if err != nil { + return err + } + schema, found := schemas[toolName] + if !found { + return fmt.Errorf("samples: no MCP tool schema found for %q (yaml key %q). Available tools come from pkg/workflow/js/safe_outputs_tools.json", toolName, toolDisplayKey(toolName)) + } + displayKey := toolDisplayKey(toolName) + sidecars := sampleSidecarFields[toolName] + for i, sample := range samples { + stripped := stripSidecarFields(sample, sidecars) + if err := schema.Validate(stripped); err != nil { + return fmt.Errorf("safe-outputs.%s.samples[%d]: %w", displayKey, i, err) + } + } + return nil +} + +// stripSidecarFields returns a shallow copy of sample with sidecar keys removed. +// The original map is not modified. +func stripSidecarFields(sample map[string]any, sidecars map[string]bool) map[string]any { + if len(sidecars) == 0 { + return sample + } + out := make(map[string]any, len(sample)) + for k, v := range sample { + if sidecars[k] { + continue + } + out[k] = v + } + return out +} + +// toolDisplayKey converts a snake_case MCP tool name into the hyphenated YAML +// frontmatter key (e.g. "create_pull_request" -> "create-pull-request"). +func toolDisplayKey(toolName string) string { + return strings.ReplaceAll(toolName, "_", "-") +} diff --git a/pkg/workflow/samples_validation_test.go b/pkg/workflow/samples_validation_test.go new file mode 100644 index 00000000000..394d35d665d --- /dev/null +++ b/pkg/workflow/samples_validation_test.go @@ -0,0 +1,170 @@ +package workflow + +import ( + "strings" + "testing" +) + +// TestValidateSafeOutputsSamples_Valid covers the happy path for the +// strict schema validation of samples entries. We use create_issue (no +// sidecars, just title/body) and create_pull_request (with the `patch` sidecar +// that must be stripped before validation). +func TestValidateSafeOutputsSamples_Valid(t *testing.T) { + cfg := &SafeOutputsConfig{ + CreateIssues: &CreateIssuesConfig{ + BaseSafeOutputConfig: BaseSafeOutputConfig{ + Samples: []map[string]any{ + { + "title": "Sample issue", + "body": "Sample body", + }, + }, + }, + }, + CreatePullRequests: &CreatePullRequestsConfig{ + BaseSafeOutputConfig: BaseSafeOutputConfig{ + Samples: []map[string]any{ + { + "title": "Sample PR", + "body": "Sample PR body", + "branch": "gh-aw-sample-pr", + // patch is a sidecar — must be stripped before validation + // and must NOT cause an `additionalProperties` failure. + "patch": "diff --git a/foo b/foo\nnew file mode 100644\n--- /dev/null\n+++ b/foo\n@@ -0,0 +1 @@\n+hi\n", + }, + }, + }, + }, + } + if err := validateSafeOutputsSamples(cfg); err != nil { + t.Fatalf("expected no validation error, got: %v", err) + } +} + +// TestValidateSafeOutputsSamples_MissingRequired verifies that omitting a +// required field (title) surfaces a stable, parseable error. +func TestValidateSafeOutputsSamples_MissingRequired(t *testing.T) { + cfg := &SafeOutputsConfig{ + CreateIssues: &CreateIssuesConfig{ + BaseSafeOutputConfig: BaseSafeOutputConfig{ + Samples: []map[string]any{ + { + // title intentionally missing + "body": "Body without title", + }, + }, + }, + }, + } + err := validateSafeOutputsSamples(cfg) + if err == nil { + t.Fatal("expected validation error for missing title, got nil") + } + msg := err.Error() + if !strings.Contains(msg, "create-issue") { + t.Errorf("expected error to reference the YAML key `create-issue`, got: %s", msg) + } + if !strings.Contains(msg, "samples[0]") { + t.Errorf("expected error to reference `samples[0]`, got: %s", msg) + } +} + +// TestValidateSafeOutputsSamples_SidecarStripped verifies that the `patch` +// sidecar is stripped before validation, so a create_pull_request sample with +// only the schema-required fields PLUS a patch validates cleanly. +func TestValidateSafeOutputsSamples_SidecarStripped(t *testing.T) { + cfg := &SafeOutputsConfig{ + CreatePullRequests: &CreatePullRequestsConfig{ + BaseSafeOutputConfig: BaseSafeOutputConfig{ + Samples: []map[string]any{ + { + "title": "PR", + "body": "PR body", + "branch": "gh-aw-x", + "patch": "diff --git a/x b/x\n", + }, + }, + }, + }, + } + if err := validateSafeOutputsSamples(cfg); err != nil { + t.Fatalf("expected sidecar to be stripped and validation to pass, got: %v", err) + } +} + +// TestCollectSampleEntries_DeterministicOrdering verifies that entries are +// emitted in a stable order across runs (sorted by SafeOutputsConfig field name) +// so that compiled YAML is deterministic. +func TestCollectSampleEntries_DeterministicOrdering(t *testing.T) { + cfg := &SafeOutputsConfig{ + CreateIssues: &CreateIssuesConfig{ + BaseSafeOutputConfig: BaseSafeOutputConfig{ + Samples: []map[string]any{ + {"title": "A", "body": "A"}, + }, + }, + }, + AddComments: &AddCommentsConfig{ + BaseSafeOutputConfig: BaseSafeOutputConfig{ + Samples: []map[string]any{ + {"body": "comment-A"}, + }, + }, + }, + } + first := collectSampleEntries(cfg) + second := collectSampleEntries(cfg) + + if len(first) != 2 { + t.Fatalf("expected 2 entries, got %d", len(first)) + } + if first[0].Tool != second[0].Tool || first[1].Tool != second[1].Tool { + t.Errorf("expected deterministic ordering across runs, got first=%v second=%v", first, second) + } + // Sorted by struct field name: AddComments < CreateIssues. + if first[0].Tool != "add_comment" { + t.Errorf("expected first entry tool to be add_comment (alphabetical struct field order), got %q", first[0].Tool) + } + if first[1].Tool != "create_issue" { + t.Errorf("expected second entry tool to be create_issue, got %q", first[1].Tool) + } +} + +// TestCollectSampleEntries_SidecarPartitioning verifies that sidecar fields +// land in Sidecars (not Arguments) so the driver knows what to pre-stage. +func TestCollectSampleEntries_SidecarPartitioning(t *testing.T) { + cfg := &SafeOutputsConfig{ + CreatePullRequests: &CreatePullRequestsConfig{ + BaseSafeOutputConfig: BaseSafeOutputConfig{ + Samples: []map[string]any{ + { + "title": "PR", + "body": "Body", + "branch": "br", + "patch": "diff --git a/x b/x\n", + }, + }, + }, + }, + } + entries := collectSampleEntries(cfg) + if len(entries) != 1 { + t.Fatalf("expected 1 entry, got %d", len(entries)) + } + e := entries[0] + if e.Tool != "create_pull_request" { + t.Errorf("expected tool create_pull_request, got %q", e.Tool) + } + if _, hasPatchInArgs := e.Arguments["patch"]; hasPatchInArgs { + t.Error("expected patch to be stripped from Arguments") + } + if e.Arguments["title"] != "PR" || e.Arguments["body"] != "Body" || e.Arguments["branch"] != "br" { + t.Errorf("expected title/body/branch to remain in Arguments, got %#v", e.Arguments) + } + if e.Sidecars == nil { + t.Fatal("expected Sidecars to be non-nil") + } + if patch, ok := e.Sidecars["patch"].(string); !ok || !strings.HasPrefix(patch, "diff --git") { + t.Errorf("expected patch to be present in Sidecars as a git diff string, got %#v", e.Sidecars["patch"]) + } +} diff --git a/pkg/workflow/workflow_builder.go b/pkg/workflow/workflow_builder.go index 762de84d3ea..2a87ff848a3 100644 --- a/pkg/workflow/workflow_builder.go +++ b/pkg/workflow/workflow_builder.go @@ -67,6 +67,7 @@ func (c *Compiler) buildInitialWorkflowData( ToolsStartupTimeout: toolsResult.toolsStartupTimeout, TrialMode: c.trialMode, TrialLogicalRepo: c.trialLogicalRepoSlug, + UseSamples: c.useSamples, StrictMode: c.strictMode, AllowActionRefs: c.allowActionRefs, ValidateAWFConfig: !c.skipValidation, From 310d9b8aaa34448a269310e145285bbdb8c0e0b8 Mon Sep 17 00:00:00 2001 From: Don Syme Date: Sat, 6 Jun 2026 17:43:18 +0100 Subject: [PATCH 2/8] feat(compile): force-disable threat-detection under --use-samples MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The deterministic samples replay driver emits synthetic safe-outputs purely to exercise downstream handlers in end-to-end tests. Running the LLM-backed threat-detection job against those fabricated payloads defeats determinism, costs tokens, and can spuriously flag the test fixtures. When --use-samples is set, extractSafeOutputsConfig now nils out SafeOutputsConfig.ThreatDetection unconditionally — overriding both the implicit default and any explicit threat-detection: true. The override is logged. Tests: - new TestExtractSafeOutputsConfig_UseSamplesDisablesThreatDetection covers default mode (detection enabled), --use-samples + default (disabled), and --use-samples + explicit true (still disabled) - TestUseSamplesReplacesAgentStep additionally asserts no detection: job appears in the compiled lock file --- pkg/workflow/safe_outputs_config.go | 8 +++ pkg/workflow/samples_replay_test.go | 5 ++ pkg/workflow/samples_threat_detection_test.go | 63 +++++++++++++++++++ 3 files changed, 76 insertions(+) create mode 100644 pkg/workflow/samples_threat_detection_test.go diff --git a/pkg/workflow/safe_outputs_config.go b/pkg/workflow/safe_outputs_config.go index e3ad6fb024e..1a7541c8eef 100644 --- a/pkg/workflow/safe_outputs_config.go +++ b/pkg/workflow/safe_outputs_config.go @@ -694,6 +694,14 @@ func (c *Compiler) extractSafeOutputsConfig(frontmatter map[string]any) *SafeOut } } + // Force-disable threat detection when --use-samples is active: the replay driver + // emits synthetic outputs solely for deterministic end-to-end tests, and running + // an LLM-backed detection pass would defeat that determinism. + if config != nil && c.useSamples && config.ThreatDetection != nil { + safeOutputsConfigLog.Print("Disabling threat-detection because --use-samples is set") + config.ThreatDetection = nil + } + if config != nil { safeOutputsConfigLog.Print("Successfully extracted safe-outputs configuration") } else { diff --git a/pkg/workflow/samples_replay_test.go b/pkg/workflow/samples_replay_test.go index 37096b20b00..b603ce1fd99 100644 --- a/pkg/workflow/samples_replay_test.go +++ b/pkg/workflow/samples_replay_test.go @@ -94,5 +94,10 @@ Trivial workflow whose only job is to be compiled with --use-samples. if !strings.Contains(lockContent, "id: agentic_execution") { t.Error("Expected id: agentic_execution on the replay step") } + // Threat detection must be force-disabled under --use-samples so the + // deterministic replay isn't perturbed by an LLM-backed detection job. + if strings.Contains(lockContent, "\n detection:\n") { + t.Error("Expected no `detection:` job under --use-samples") + } }) } diff --git a/pkg/workflow/samples_threat_detection_test.go b/pkg/workflow/samples_threat_detection_test.go new file mode 100644 index 00000000000..ba7c082d2be --- /dev/null +++ b/pkg/workflow/samples_threat_detection_test.go @@ -0,0 +1,63 @@ +package workflow + +import "testing" + +// TestExtractSafeOutputsConfig_UseSamplesDisablesThreatDetection verifies +// that --use-samples force-disables threat detection so the deterministic +// replay isn't perturbed by an LLM-backed detection job. +func TestExtractSafeOutputsConfig_UseSamplesDisablesThreatDetection(t *testing.T) { + frontmatter := map[string]any{ + "safe-outputs": map[string]any{ + "create-issue": map[string]any{ + "samples": []any{ + map[string]any{"title": "x", "body": "y"}, + }, + }, + }, + } + + t.Run("default mode applies threat-detection", func(t *testing.T) { + c := NewCompiler() + cfg := c.extractSafeOutputsConfig(frontmatter) + if cfg == nil { + t.Fatal("expected non-nil SafeOutputsConfig") + } + if cfg.ThreatDetection == nil { + t.Fatal("expected default threat-detection to be applied in default mode") + } + }) + + t.Run("use-samples disables threat-detection (default)", func(t *testing.T) { + c := NewCompiler() + c.SetUseSamples(true) + cfg := c.extractSafeOutputsConfig(frontmatter) + if cfg == nil { + t.Fatal("expected non-nil SafeOutputsConfig") + } + if cfg.ThreatDetection != nil { + t.Fatal("expected threat-detection to be force-disabled under --use-samples") + } + }) + + t.Run("use-samples disables threat-detection (explicit true)", func(t *testing.T) { + fm := map[string]any{ + "safe-outputs": map[string]any{ + "threat-detection": true, + "create-issue": map[string]any{ + "samples": []any{ + map[string]any{"title": "x", "body": "y"}, + }, + }, + }, + } + c := NewCompiler() + c.SetUseSamples(true) + cfg := c.extractSafeOutputsConfig(fm) + if cfg == nil { + t.Fatal("expected non-nil SafeOutputsConfig") + } + if cfg.ThreatDetection != nil { + t.Fatal("expected explicit threat-detection: true to be force-disabled under --use-samples") + } + }) +} From 9284a627c7d97f47933bab183c182d190e7e215e Mon Sep 17 00:00:00 2001 From: Don Syme Date: Sat, 6 Jun 2026 18:01:53 +0100 Subject: [PATCH 3/8] test(samples): cover preStagePatch end-to-end for create_pull_request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds three vitest specs that drive the apply_samples driver's preStagePatch path against a real, throwaway git working tree: 1. create_pull_request with a 'patch' sidecar checks out the requested branch, applies the diff, and commits it — and the resulting diff is visible via 'git diff main...', which is precisely what the downstream MCP create_pull_request handler reads when generating its bundle/patch payload. 2. push_to_pull_request_branch without an explicit 'branch' falls back to 'gh-aw-sample-' and still applies the patch. 3. preStagePatch is a no-op when called with a tool that has no patch sidecar (defense in depth around the PATCH_SIDECAR_TOOLS gate in main()). Together with the existing Go unit tests for sidecar partitioning and schema-stripping, this closes the testing gap around the patch-sidecar flow that was previously only covered structurally. --- actions/setup/js/apply_samples.test.cjs | 143 ++++++++++++++++++++++++ 1 file changed, 143 insertions(+) diff --git a/actions/setup/js/apply_samples.test.cjs b/actions/setup/js/apply_samples.test.cjs index a2210c54761..1546b8435de 100644 --- a/actions/setup/js/apply_samples.test.cjs +++ b/actions/setup/js/apply_samples.test.cjs @@ -14,6 +14,7 @@ import { describe, it, expect, beforeAll } from "vitest"; import { spawnSync } from "child_process"; +import { createRequire } from "module"; import fs from "fs"; import path from "path"; import os from "os"; @@ -23,11 +24,29 @@ const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); const driverPath = path.join(__dirname, "apply_samples.cjs"); +const require = createRequire(import.meta.url); function makeTempDir(prefix) { return fs.mkdtempSync(path.join(os.tmpdir(), prefix)); } +function git(args, cwd) { + const r = spawnSync("git", args, { cwd, encoding: "utf8" }); + if (r.status !== 0) { + throw new Error(`git ${args.join(" ")} failed: ${r.stderr || r.stdout}`); + } + return r.stdout; +} + +function initRepo(dir, defaultBranch) { + git(["init", "-q", "-b", defaultBranch], dir); + git(["config", "user.email", "ghaw-test@example.com"], dir); + git(["config", "user.name", "ghaw test"], dir); + fs.writeFileSync(path.join(dir, "README.md"), "# seed\n"); + git(["add", "."], dir); + git(["commit", "-q", "-m", "seed"], dir); +} + describe.sequential("apply_samples.cjs", () => { let tempDir; let configPath; @@ -113,3 +132,127 @@ describe.sequential("apply_samples.cjs", () => { expect(logText).toContain("terminal_reason"); }); }); + +describe("apply_samples.cjs preStagePatch (create_pull_request / push_to_pull_request_branch)", () => { + // Load the module under test directly so we can drive preStagePatch in + // isolation against a real, throwaway git working tree. This is the + // critical code path that turns a `patch` sidecar on a sample entry into + // a real branch + commit that the downstream MCP `create_pull_request` + // handler (which derives a git diff) can act on. + const { preStagePatch } = require("./apply_samples.cjs"); + + /** + * Build a unified diff that adds a brand-new file. Synthetic but realistic. + */ + function newFileDiff(filePath, contents) { + const lines = contents.split("\n"); + // Strip trailing empty element produced by a terminating "\n" so the + // hunk header line count matches what git apply expects. + if (lines[lines.length - 1] === "") lines.pop(); + const body = lines.map(l => "+" + l).join("\n"); + return `diff --git a/${filePath} b/${filePath}\n` + `new file mode 100644\n` + `index 0000000..1111111\n` + `--- /dev/null\n` + `+++ b/${filePath}\n` + `@@ -0,0 +1,${lines.length} @@\n` + body + "\n"; + } + + it("checks out the requested branch and commits the patch on it (create_pull_request)", () => { + const workspace = makeTempDir("gh-aw-prestage-cpr-"); + initRepo(workspace, "main"); + + const branchName = "feat/gh-aw-sample-branch"; + const fileToAdd = "sample-feature.txt"; + const fileBody = "hello from a deterministic sample\nsecond line\n"; + const entry = { + tool: "create_pull_request", + arguments: { + title: "Sample PR", + body: "Sample PR body", + branch: branchName, + }, + sidecars: { patch: newFileDiff(fileToAdd, fileBody) }, + }; + + // GH_AW_CUSTOM_BASE_BRANCH steers preStagePatch to check out the right + // base ref inside our fresh repo (default is GITHUB_BASE_REF / "main"). + const prev = process.env.GH_AW_CUSTOM_BASE_BRANCH; + process.env.GH_AW_CUSTOM_BASE_BRANCH = "main"; + try { + preStagePatch(entry, 0, workspace); + } finally { + if (prev === undefined) delete process.env.GH_AW_CUSTOM_BASE_BRANCH; + else process.env.GH_AW_CUSTOM_BASE_BRANCH = prev; + } + + // 1. Branch name on the entry is preserved (driver must forward it to MCP). + expect(entry.arguments.branch).toBe(branchName); + + // 2. The named branch exists in the working repo. + const branches = git(["branch", "--list", branchName], workspace).trim(); + expect(branches).toContain(branchName); + + // 3. Current HEAD is that branch. + const head = git(["rev-parse", "--abbrev-ref", "HEAD"], workspace).trim(); + expect(head).toBe(branchName); + + // 4. The patch was applied AND committed (not just sitting in the worktree). + const status = git(["status", "--porcelain"], workspace).trim(); + expect(status).toBe(""); + expect(fs.existsSync(path.join(workspace, fileToAdd))).toBe(true); + expect(fs.readFileSync(path.join(workspace, fileToAdd), "utf8")).toBe(fileBody); + + // 5. The commit message identifies the sample so failures are diagnosable. + const lastMsg = git(["log", "-1", "--pretty=%s"], workspace).trim(); + expect(lastMsg).toMatch(/gh-aw sample 1: create_pull_request/); + + // 6. The new file shows up as a real diff against the base branch — this is + // precisely what the downstream MCP create_pull_request handler will read. + const diff = git(["diff", "main..." + branchName, "--", fileToAdd], workspace); + expect(diff).toContain("+hello from a deterministic sample"); + }); + + it("defaults the branch name to gh-aw-sample- when none is supplied", () => { + const workspace = makeTempDir("gh-aw-prestage-default-"); + initRepo(workspace, "main"); + + const entry = { + tool: "push_to_pull_request_branch", + arguments: { + body: "Sample push body", + // branch intentionally omitted — driver should synthesize one. + }, + sidecars: { patch: newFileDiff("push-feature.txt", "from push sample\n") }, + }; + + const prev = process.env.GH_AW_CUSTOM_BASE_BRANCH; + process.env.GH_AW_CUSTOM_BASE_BRANCH = "main"; + try { + preStagePatch(entry, 2, workspace); + } finally { + if (prev === undefined) delete process.env.GH_AW_CUSTOM_BASE_BRANCH; + else process.env.GH_AW_CUSTOM_BASE_BRANCH = prev; + } + + // Index in preStagePatch is zero-based; the default uses i+1 → "gh-aw-sample-3". + expect(entry.arguments.branch).toBe("gh-aw-sample-3"); + const head = git(["rev-parse", "--abbrev-ref", "HEAD"], workspace).trim(); + expect(head).toBe("gh-aw-sample-3"); + expect(fs.existsSync(path.join(workspace, "push-feature.txt"))).toBe(true); + }); + + it("is a no-op when the sample tool isn't in the patch-sidecar set", () => { + // We assert this at the driver level (PATCH_SIDECAR_TOOLS gate in main()), + // but preStagePatch itself should also be a no-op when called with an + // entry that has no patch sidecar — protecting against misuse. + const workspace = makeTempDir("gh-aw-prestage-noop-"); + initRepo(workspace, "main"); + + const entry = { + tool: "create_issue", + arguments: { title: "x", body: "y" }, + }; + preStagePatch(entry, 0, workspace); + + // Still on main, no extra commits, no new files. + expect(git(["rev-parse", "--abbrev-ref", "HEAD"], workspace).trim()).toBe("main"); + const log = git(["log", "--pretty=%s"], workspace).trim().split("\n"); + expect(log).toEqual(["seed"]); + }); +}); From e262eb36292310d14763acbf51fb7b18b9baf774 Mon Sep 17 00:00:00 2001 From: Don Syme Date: Sat, 6 Jun 2026 18:04:24 +0100 Subject: [PATCH 4/8] test(samples): e2e smoke test for create-pull-request + patch sidecar MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiles a workflow whose only safe-output is `create-pull-request` with a samples entry carrying a multi-line `patch:` block scalar, then inspects the generated lock.yml. Extracts the GH_AW_SAMPLES JSON literal block out of the compiled YAML and asserts: - the agentic step is replaced by the replay step - the entry tool is "create_pull_request" - the patch is partitioned into sidecars, NOT arguments — the MCP create_pull_request handler must not receive a literal patch argument; it derives the diff from the working tree - title/body/branch are preserved in arguments - the patch payload (including the diff header and the added line) survives YAML emission verbatim so the driver can git-apply it - no detection: job is emitted This closes the loop from frontmatter -> compiled YAML for the patch-sidecar flow, complementing the existing Go unit tests (sidecar partitioning) and the vitest preStagePatch specs (which exercise the runtime side against a real git repo). --- pkg/workflow/samples_replay_test.go | 173 ++++++++++++++++++++++++++++ 1 file changed, 173 insertions(+) diff --git a/pkg/workflow/samples_replay_test.go b/pkg/workflow/samples_replay_test.go index b603ce1fd99..609fd58be45 100644 --- a/pkg/workflow/samples_replay_test.go +++ b/pkg/workflow/samples_replay_test.go @@ -3,6 +3,7 @@ package workflow import ( + "encoding/json" "os" "strings" "testing" @@ -101,3 +102,175 @@ Trivial workflow whose only job is to be compiled with --use-samples. } }) } + +// TestUseSamplesCreatePullRequestWithPatch is the end-to-end smoke test for +// the create-pull-request + patch sidecar flow. It compiles a workflow whose +// only safe-output is `create-pull-request` with a `samples` entry carrying +// a `patch` sidecar, then inspects the generated lock.yml to verify that: +// +// 1. The agentic step is replaced by the deterministic replay step +// 2. GH_AW_SAMPLES contains a JSON-encoded create_pull_request entry +// 3. The patch is partitioned into `sidecars`, NOT into `arguments` +// (the MCP server's create_pull_request handler must NOT receive `patch` +// as a tool argument — it derives the diff from the working tree) +// 4. The branch name and other PR fields land in `arguments` +// 5. The actual diff payload is preserved verbatim in the lock file +// (so the driver can `git apply` it at replay time) +// 6. No `detection:` job is emitted +func TestUseSamplesCreatePullRequestWithPatch(t *testing.T) { + const patch = "diff --git a/sample.txt b/sample.txt\nnew file mode 100644\nindex 0000000..1111111\n--- /dev/null\n+++ b/sample.txt\n@@ -0,0 +1 @@\n+hello from gh-aw samples\n" + + md := `--- +on: + workflow_dispatch: +permissions: read-all +engine: + id: claude +safe-outputs: + create-pull-request: + samples: + - title: "Sample PR from gh-aw" + body: "PR body emitted by samples replay." + branch: "feat/gh-aw-sample-pr" + patch: | +` + indentBlock(patch, " ") + `--- + +Trivial workflow exercising create-pull-request via --use-samples. +` + + tmpFile, err := os.CreateTemp("", "use-samples-cpr-*.md") + if err != nil { + t.Fatal(err) + } + defer os.Remove(tmpFile.Name()) + if _, err := tmpFile.WriteString(md); err != nil { + t.Fatal(err) + } + tmpFile.Close() + + compiler := NewCompiler() + compiler.SetUseSamples(true) + if err := compiler.CompileWorkflow(tmpFile.Name()); err != nil { + t.Fatalf("compile failed: %v", err) + } + lockPath := strings.TrimSuffix(tmpFile.Name(), ".md") + ".lock.yml" + defer os.Remove(lockPath) + b, err := os.ReadFile(lockPath) + if err != nil { + t.Fatalf("read lock: %v", err) + } + lock := string(b) + + // 1. Agentic step replaced + if !strings.Contains(lock, "Replay safe-outputs samples (deterministic)") { + t.Error("Expected `Replay safe-outputs samples (deterministic)` step in lock file") + } + if !strings.Contains(lock, "apply_samples.cjs") { + t.Error("Expected lock file to invoke apply_samples.cjs driver") + } + + // 2. GH_AW_SAMPLES contains a create_pull_request entry + if !strings.Contains(lock, "GH_AW_SAMPLES:") { + t.Fatal("Expected GH_AW_SAMPLES env var in lock file") + } + if !strings.Contains(lock, `"tool":"create_pull_request"`) { + t.Error("Expected JSON-encoded create_pull_request tool entry in lock file") + } + + // Extract the GH_AW_SAMPLES JSON block from the YAML for structural assertions. + samplesJSON := extractGHAWSamplesJSON(t, lock) + var entries []map[string]any + if err := json.Unmarshal([]byte(samplesJSON), &entries); err != nil { + t.Fatalf("failed to parse GH_AW_SAMPLES JSON: %v\nRaw:\n%s", err, samplesJSON) + } + if len(entries) != 1 { + t.Fatalf("expected exactly one sample entry, got %d", len(entries)) + } + entry := entries[0] + + // 3. Patch is in sidecars, NOT in arguments + args, _ := entry["arguments"].(map[string]any) + sidecars, _ := entry["sidecars"].(map[string]any) + if args == nil { + t.Fatal("expected entry.arguments to be an object") + } + if _, hasPatchInArgs := args["patch"]; hasPatchInArgs { + t.Error("patch must be stripped from arguments — MCP create_pull_request handler must not receive it") + } + if sidecars == nil { + t.Fatal("expected entry.sidecars to be present (patch should land here)") + } + gotPatch, _ := sidecars["patch"].(string) + if gotPatch == "" { + t.Fatal("expected sidecars.patch to be a non-empty string") + } + + // 4. PR fields preserved in arguments + if args["title"] != "Sample PR from gh-aw" { + t.Errorf("arguments.title = %q, want %q", args["title"], "Sample PR from gh-aw") + } + if args["body"] != "PR body emitted by samples replay." { + t.Errorf("arguments.body = %q, want %q", args["body"], "PR body emitted by samples replay.") + } + if args["branch"] != "feat/gh-aw-sample-pr" { + t.Errorf("arguments.branch = %q, want %q", args["branch"], "feat/gh-aw-sample-pr") + } + + // 5. Patch payload preserved verbatim + if !strings.Contains(gotPatch, "diff --git a/sample.txt b/sample.txt") { + t.Errorf("sidecars.patch missing diff header; got: %q", gotPatch) + } + if !strings.Contains(gotPatch, "+hello from gh-aw samples") { + t.Errorf("sidecars.patch missing payload line; got: %q", gotPatch) + } + + // 6. No detection job + if strings.Contains(lock, "\n detection:\n") { + t.Error("Expected no `detection:` job under --use-samples") + } +} + +// indentBlock prefixes every line of s with prefix. Used to embed a multi-line +// patch under a YAML block scalar in the test fixture. +func indentBlock(s, prefix string) string { + lines := strings.Split(strings.TrimRight(s, "\n"), "\n") + for i, line := range lines { + lines[i] = prefix + line + } + return strings.Join(lines, "\n") + "\n" +} + +// extractGHAWSamplesJSON pulls the literal block scalar value of GH_AW_SAMPLES +// out of the compiled YAML and returns the unindented JSON text. This avoids +// pulling in a full YAML parser for what is a tightly-controlled emit format. +func extractGHAWSamplesJSON(t *testing.T, lock string) string { + t.Helper() + const marker = "GH_AW_SAMPLES: |\n" + start := strings.Index(lock, marker) + if start < 0 { + t.Fatalf("could not find %q in lock file", marker) + } + start += len(marker) + // Determine indentation from the first content line. + rest := lock[start:] + firstNL := strings.Index(rest, "\n") + if firstNL < 0 { + t.Fatal("malformed GH_AW_SAMPLES block: no newline after first line") + } + firstLine := rest[:firstNL] + indent := firstLine[:len(firstLine)-len(strings.TrimLeft(firstLine, " "))] + if indent == "" { + t.Fatal("malformed GH_AW_SAMPLES block: expected indented content") + } + // Collect lines until we hit one that no longer starts with the same indent + // (i.e. the next YAML key like GH_AW_AGENT_STDIO_LOG). + var out strings.Builder + for _, line := range strings.Split(rest, "\n") { + if !strings.HasPrefix(line, indent) { + break + } + out.WriteString(strings.TrimPrefix(line, indent)) + out.WriteString("\n") + } + return strings.TrimSpace(out.String()) +} From 5194f4bef2915b932ff63ab5e8a3b250760d2bd0 Mon Sep 17 00:00:00 2001 From: Don Syme Date: Sat, 6 Jun 2026 19:42:17 +0100 Subject: [PATCH 5/8] fix(samples): emit `[]` not `null` when no samples configured Observed in CI: Error: apply_samples: GH_AW_SAMPLES must be a JSON array at loadSamples (apply_samples.cjs:61:11) Root cause: when a workflow opts into --use-samples but configures no `samples:` entries (or only on disabled handlers), collectSampleEntries returns a nil Go slice. json.Marshal(nil) produces the literal string "null", which the driver rightly refuses to treat as an array. Compiler fix (pkg/workflow/samples_replay.go): normalize a nil entries slice to an empty []SampleEntry{} before marshaling so GH_AW_SAMPLES is always emitted as a valid JSON array ("[]" in the empty case). Driver defense (actions/setup/js/apply_samples.cjs): also tolerate a literal JSON `null` payload and treat it as "no samples to replay", so an older compiler against a newer driver doesn't crash either. Tests: - new Go integration test TestUseSamplesEmitsEmptyArrayWhenNoSamplesConfigured compiles a workflow that uses --use-samples with safe-outputs but no samples entries, then asserts GH_AW_SAMPLES is exactly "[]" (and emphatically not "null") - new vitest spec verifies the driver exits 0 on GH_AW_SAMPLES="null" and logs "GH_AW_SAMPLES is null" --- actions/setup/js/apply_samples.cjs | 6 +++ actions/setup/js/apply_samples.test.cjs | 25 ++++++++++ pkg/workflow/samples_replay.go | 9 ++++ pkg/workflow/samples_replay_test.go | 62 +++++++++++++++++++++++++ 4 files changed, 102 insertions(+) diff --git a/actions/setup/js/apply_samples.cjs b/actions/setup/js/apply_samples.cjs index 73311b28c0e..ce54bf29e07 100644 --- a/actions/setup/js/apply_samples.cjs +++ b/actions/setup/js/apply_samples.cjs @@ -57,6 +57,12 @@ function loadSamples() { } catch (err) { throw new Error(`apply_samples: failed to parse GH_AW_SAMPLES as JSON: ${/** @type {Error} */ err.message}`); } + // Tolerate a literal JSON `null` payload (older compiler emitted it for + // workflows with --use-samples but no `samples:` entries). Treat as empty. + if (parsed === null) { + console.error("apply_samples: GH_AW_SAMPLES is null — treating as no samples to replay."); + return []; + } if (!Array.isArray(parsed)) { throw new Error("apply_samples: GH_AW_SAMPLES must be a JSON array"); } diff --git a/actions/setup/js/apply_samples.test.cjs b/actions/setup/js/apply_samples.test.cjs index 1546b8435de..9b2963bdead 100644 --- a/actions/setup/js/apply_samples.test.cjs +++ b/actions/setup/js/apply_samples.test.cjs @@ -131,6 +131,31 @@ describe.sequential("apply_samples.cjs", () => { const logText = fs.readFileSync(path.join(tempDir, "empty-log.log"), "utf8"); expect(logText).toContain("terminal_reason"); }); + + // Defense in depth: an older compiler that marshaled a nil Go slice would + // emit `null` into GH_AW_SAMPLES. Newer drivers must tolerate that and + // treat it as "no samples", not crash with `must be a JSON array`. + it("exits cleanly when GH_AW_SAMPLES is the literal `null`", () => { + const logPath = path.join(tempDir, "null-log.log"); + const result = spawnSync(process.execPath, [driverPath], { + env: { + ...process.env, + GH_AW_SAMPLES: "null", + GH_AW_SAFE_OUTPUTS_CONFIG_PATH: configPath, + GH_AW_SAFE_OUTPUTS: outputsPath, + GH_AW_AGENT_STDIO_LOG: logPath, + }, + encoding: "utf8", + timeout: 10000, + }); + + if (result.status !== 0) { + throw new Error(`driver exited with status ${result.status}\nstderr:\n${result.stderr}\nstdout:\n${result.stdout}`); + } + expect(result.stderr).toContain("GH_AW_SAMPLES is null"); + const logText = fs.readFileSync(logPath, "utf8"); + expect(logText).toContain("terminal_reason"); + }); }); describe("apply_samples.cjs preStagePatch (create_pull_request / push_to_pull_request_branch)", () => { diff --git a/pkg/workflow/samples_replay.go b/pkg/workflow/samples_replay.go index c0f49d804a8..fec6a477f6b 100644 --- a/pkg/workflow/samples_replay.go +++ b/pkg/workflow/samples_replay.go @@ -75,6 +75,15 @@ func (c *Compiler) generateSamplesReplayStep(yaml *strings.Builder, data *Workfl entries := collectSampleEntries(data.SafeOutputs) compilerYamlLog.Printf("Generating samples replay step: entries=%d", len(entries)) + // Normalize a nil slice to an empty slice so json.Marshal emits "[]" not "null". + // The driver rejects anything that isn't a JSON array; emitting "null" here + // would crash the replay step with `GH_AW_SAMPLES must be a JSON array` for + // workflows that opt into --use-samples but configure no samples (or whose + // configured samples all live on disabled handlers). + if entries == nil { + entries = []SampleEntry{} + } + // Serialize entries to JSON for the driver. Always emit valid JSON even when // empty so the driver can produce a clear `no samples configured` message // rather than crashing on an empty env var. diff --git a/pkg/workflow/samples_replay_test.go b/pkg/workflow/samples_replay_test.go index 609fd58be45..df1ac109d3f 100644 --- a/pkg/workflow/samples_replay_test.go +++ b/pkg/workflow/samples_replay_test.go @@ -274,3 +274,65 @@ func extractGHAWSamplesJSON(t *testing.T, lock string) string { } return strings.TrimSpace(out.String()) } + +// TestUseSamplesEmitsEmptyArrayWhenNoSamplesConfigured guards against a +// regression where compiling with --use-samples but no `samples:` entries on +// any enabled handler caused json.Marshal of a nil Go slice to emit the +// literal string "null" into GH_AW_SAMPLES, which the driver rightly +// rejected with `GH_AW_SAMPLES must be a JSON array`. The compiler must +// emit "[]" instead so the driver can exit cleanly with `no samples to +// replay`. +func TestUseSamplesEmitsEmptyArrayWhenNoSamplesConfigured(t *testing.T) { + // Workflow opts into --use-samples and configures safe-outputs but has + // no `samples:` entries on the create-issue handler. + const md = `--- +on: + workflow_dispatch: +permissions: read-all +engine: + id: claude +safe-outputs: + create-issue: + title-prefix: "[no-samples] " +--- + +Workflow with safe-outputs but no samples — should still compile and +emit a valid empty-array GH_AW_SAMPLES under --use-samples. +` + + tmpFile, err := os.CreateTemp("", "use-samples-empty-*.md") + if err != nil { + t.Fatal(err) + } + defer os.Remove(tmpFile.Name()) + if _, err := tmpFile.WriteString(md); err != nil { + t.Fatal(err) + } + tmpFile.Close() + + compiler := NewCompiler() + compiler.SetUseSamples(true) + if err := compiler.CompileWorkflow(tmpFile.Name()); err != nil { + t.Fatalf("compile failed: %v", err) + } + lockPath := strings.TrimSuffix(tmpFile.Name(), ".md") + ".lock.yml" + defer os.Remove(lockPath) + b, err := os.ReadFile(lockPath) + if err != nil { + t.Fatalf("read lock: %v", err) + } + lock := string(b) + + // Must still emit the replay step. + if !strings.Contains(lock, "Replay safe-outputs samples (deterministic)") { + t.Fatal("Expected replay step in lock file even with no samples configured") + } + + samplesJSON := extractGHAWSamplesJSON(t, lock) + if samplesJSON == "null" { + t.Fatalf("GH_AW_SAMPLES must not be the literal `null` (driver would reject it); got %q", samplesJSON) + } + if samplesJSON != "[]" { + t.Fatalf("GH_AW_SAMPLES = %q, want %q", samplesJSON, "[]") + } +} From 9253ebe1011eb14aeae0b9ce1639dd25bc8882f7 Mon Sep 17 00:00:00 2001 From: Don Syme Date: Sat, 6 Jun 2026 19:58:28 +0100 Subject: [PATCH 6/8] fix(samples): CI failures + review feedback CI fixes: - pkg/workflow/samples_replay.go: switch to strings.SplitSeq per the modernize linter (lint-go was failing) - actions/setup/js/apply_samples.cjs: weaken the JSDoc type on sendJsonRpc's child parameter from ChildProcessWithoutNullStreams to ChildProcess so the value returned by spawn() with stdio: ["pipe", "pipe", "inherit"] (which has a null stderr) type-checks (js-typecheck was failing) Review feedback (all Copilot inline comments): - apply_samples.cjs: replace the /** @type {Error} */ casts on catch bindings with the shared getErrorMessage(err) helper so catch-unknown narrowing is actually safe under @ts-check - samples_replay_test.go: stop swallowing the ReadFile error in the Use-Samples-Mode subtest; t.Fatalf on failure like the default-mode subtest does - samples_validation.go: stripSidecarFields now always returns a fresh map, matching its doc comment (no more accidental aliasing of the caller's input when sidecars is empty) - safe_outputs_config.go: drop the YAML-string branch of parseSamplesValue; the JSON schema for samples only allows array/object, so the string form would be rejected upstream before this code runs. Removes the now-unused yaml import. The Copilot comment about collectSampleEntries emitting null was addressed in the prior commit (5194f4bef2) which normalizes nil to []SampleEntry{} before json.Marshal. --- actions/setup/js/apply_samples.cjs | 7 ++++--- pkg/workflow/safe_outputs_config.go | 24 +++++------------------- pkg/workflow/samples_replay.go | 2 +- pkg/workflow/samples_replay_test.go | 5 ++++- pkg/workflow/samples_validation.go | 6 ++---- 5 files changed, 16 insertions(+), 28 deletions(-) diff --git a/actions/setup/js/apply_samples.cjs b/actions/setup/js/apply_samples.cjs index ce54bf29e07..c5d3f6924b2 100644 --- a/actions/setup/js/apply_samples.cjs +++ b/actions/setup/js/apply_samples.cjs @@ -29,6 +29,7 @@ const { spawn } = require("child_process"); const fs = require("fs"); const path = require("path"); const os = require("os"); +const { getErrorMessage } = require("./error_helpers.cjs"); const DEFAULT_BASE_BRANCH = process.env.GH_AW_CUSTOM_BASE_BRANCH || process.env.GITHUB_BASE_REF || process.env.GITHUB_REF_NAME || "main"; const PATCH_SIDECAR_TOOLS = new Set(["create_pull_request", "push_to_pull_request_branch"]); @@ -55,7 +56,7 @@ function loadSamples() { try { parsed = JSON.parse(raw); } catch (err) { - throw new Error(`apply_samples: failed to parse GH_AW_SAMPLES as JSON: ${/** @type {Error} */ err.message}`); + throw new Error(`apply_samples: failed to parse GH_AW_SAMPLES as JSON: ${getErrorMessage(err)}`); } // Tolerate a literal JSON `null` payload (older compiler emitted it for // workflows with --use-samples but no `samples:` entries). Treat as empty. @@ -131,7 +132,7 @@ function preStagePatch(entry, index, workspace) { try { runGit(["checkout", DEFAULT_BASE_BRANCH], workspace); } catch (err) { - console.error(`apply_samples: could not check out base branch ${DEFAULT_BASE_BRANCH}: ${/** @type {Error} */ err.message}; staying on current HEAD`); + console.error(`apply_samples: could not check out base branch ${DEFAULT_BASE_BRANCH}: ${getErrorMessage(err)}; staying on current HEAD`); } // Create the branch (or check it out if it already exists from a previous sample). @@ -159,7 +160,7 @@ function preStagePatch(entry, index, workspace) { /** * Send a single JSON-RPC request to the MCP server child process and resolve * with the parsed JSON response (or reject on timeout). - * @param {import("child_process").ChildProcessWithoutNullStreams} child + * @param {import("child_process").ChildProcess} child * @param {NodeJS.WritableStream} stdin * @param {object} request * @param {AsyncIterableIterator} responseIterator diff --git a/pkg/workflow/safe_outputs_config.go b/pkg/workflow/safe_outputs_config.go index 1a7541c8eef..3899b7c28e2 100644 --- a/pkg/workflow/safe_outputs_config.go +++ b/pkg/workflow/safe_outputs_config.go @@ -9,7 +9,6 @@ import ( "github.com/github/gh-aw/pkg/logger" "github.com/github/gh-aw/pkg/sliceutil" "github.com/github/gh-aw/pkg/typeutil" - "go.yaml.in/yaml/v3" ) var safeOutputsConfigLog = logger.New("workflow:safe_outputs_config") @@ -767,8 +766,8 @@ func (c *Compiler) parseBaseSafeOutputConfig(configMap map[string]any, config *B // Parse samples list (hidden feature: deterministic replay samples for --use-samples). // Accepts either a YAML list of objects, or a single object that is auto-wrapped - // into a one-element list, or a YAML string scalar containing a list (for - // authoring convenience with `|` block scalars in frontmatter). + // into a one-element list. The JSON schema rejects scalar/string shapes so we + // don't need a defensive YAML-string branch here. if samples, exists := configMap["samples"]; exists { parsed := parseSamplesValue(samples) if len(parsed) > 0 { @@ -779,14 +778,12 @@ func (c *Compiler) parseBaseSafeOutputConfig(configMap map[string]any, config *B } // parseSamplesValue normalizes a `samples` frontmatter value into a list of -// objects. Accepted shapes (most-permissive first): +// objects. Accepted shapes: // - YAML list of mappings: returned as-is // - single YAML mapping: wrapped into a one-element list -// - YAML string containing a list/mapping (authoring with `|` block scalar): -// parsed as YAML and re-normalized // -// Any other shape returns an empty slice — schema validation will then report -// "no samples found". +// Any other shape returns an empty slice — schema validation rejects those +// shapes upstream and we keep this parser strict to match. func parseSamplesValue(samples any) []map[string]any { switch v := samples.(type) { case []any: @@ -805,17 +802,6 @@ func parseSamplesValue(samples any) []map[string]any { return out case map[string]any: return []map[string]any{v} - case string: - trimmed := strings.TrimSpace(v) - if trimmed == "" { - return nil - } - var nested any - if err := yaml.Unmarshal([]byte(trimmed), &nested); err != nil { - safeOutputsConfigLog.Printf("Failed to parse samples string as YAML: %v", err) - return nil - } - return parseSamplesValue(nested) default: return nil } diff --git a/pkg/workflow/samples_replay.go b/pkg/workflow/samples_replay.go index fec6a477f6b..28f16afe1c4 100644 --- a/pkg/workflow/samples_replay.go +++ b/pkg/workflow/samples_replay.go @@ -99,7 +99,7 @@ func (c *Compiler) generateSamplesReplayStep(yaml *strings.Builder, data *Workfl yaml.WriteString(" id: agentic_execution\n") yaml.WriteString(" env:\n") yaml.WriteString(" GH_AW_SAMPLES: |\n") - for _, line := range strings.Split(string(payload), "\n") { + for line := range strings.SplitSeq(string(payload), "\n") { fmt.Fprintf(yaml, " %s\n", line) } fmt.Fprintf(yaml, " GH_AW_AGENT_STDIO_LOG: %s\n", logFile) diff --git a/pkg/workflow/samples_replay_test.go b/pkg/workflow/samples_replay_test.go index df1ac109d3f..ac8232fd825 100644 --- a/pkg/workflow/samples_replay_test.go +++ b/pkg/workflow/samples_replay_test.go @@ -75,7 +75,10 @@ Trivial workflow whose only job is to be compiled with --use-samples. } lockPath := strings.TrimSuffix(tmpFile.Name(), ".md") + ".lock.yml" defer os.Remove(lockPath) - b, _ := os.ReadFile(lockPath) + b, err := os.ReadFile(lockPath) + if err != nil { + t.Fatalf("read lock: %v", err) + } lockContent := string(b) if !strings.Contains(lockContent, "Replay safe-outputs samples (deterministic)") { t.Error("Expected `Replay safe-outputs samples (deterministic)` step in lock file") diff --git a/pkg/workflow/samples_validation.go b/pkg/workflow/samples_validation.go index 79bfbc0f5f0..b04219aa23c 100644 --- a/pkg/workflow/samples_validation.go +++ b/pkg/workflow/samples_validation.go @@ -144,11 +144,9 @@ func validateSamplesForTool(toolName string, samples []map[string]any) error { } // stripSidecarFields returns a shallow copy of sample with sidecar keys removed. -// The original map is not modified. +// The original map is never modified, even when no sidecars are configured — +// callers may mutate the returned map without affecting the caller's input. func stripSidecarFields(sample map[string]any, sidecars map[string]bool) map[string]any { - if len(sidecars) == 0 { - return sample - } out := make(map[string]any, len(sample)) for k, v := range sample { if sidecars[k] { From a7798979d5e8f06a2e5beee9373ae57164afd552 Mon Sep 17 00:00:00 2001 From: Don Syme Date: Sun, 7 Jun 2026 00:04:20 +0100 Subject: [PATCH 7/8] Address pelikhan review: require shim.cjs and use core.* in apply_samples driver --- actions/setup/js/apply_samples.cjs | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/actions/setup/js/apply_samples.cjs b/actions/setup/js/apply_samples.cjs index c5d3f6924b2..0a3dc7844cf 100644 --- a/actions/setup/js/apply_samples.cjs +++ b/actions/setup/js/apply_samples.cjs @@ -1,5 +1,6 @@ #!/usr/bin/env node // @ts-check +/// // apply_samples.cjs // @@ -25,6 +26,8 @@ // GITHUB_WORKSPACE — git working directory for pre-staging (optional; // falls back to cwd) +require("./shim.cjs"); + const { spawn } = require("child_process"); const fs = require("fs"); const path = require("path"); @@ -49,7 +52,7 @@ const PATCH_SIDECAR_TOOLS = new Set(["create_pull_request", "push_to_pull_reques function loadSamples() { const raw = process.env.GH_AW_SAMPLES; if (!raw || !raw.trim()) { - console.error("apply_samples: GH_AW_SAMPLES is empty — no samples to replay."); + core.warning("apply_samples: GH_AW_SAMPLES is empty — no samples to replay."); return []; } let parsed; @@ -61,7 +64,7 @@ function loadSamples() { // Tolerate a literal JSON `null` payload (older compiler emitted it for // workflows with --use-samples but no `samples:` entries). Treat as empty. if (parsed === null) { - console.error("apply_samples: GH_AW_SAMPLES is null — treating as no samples to replay."); + core.warning("apply_samples: GH_AW_SAMPLES is null — treating as no samples to replay."); return []; } if (!Array.isArray(parsed)) { @@ -132,7 +135,7 @@ function preStagePatch(entry, index, workspace) { try { runGit(["checkout", DEFAULT_BASE_BRANCH], workspace); } catch (err) { - console.error(`apply_samples: could not check out base branch ${DEFAULT_BASE_BRANCH}: ${getErrorMessage(err)}; staying on current HEAD`); + core.warning(`apply_samples: could not check out base branch ${DEFAULT_BASE_BRANCH}: ${getErrorMessage(err)}; staying on current HEAD`); } // Create the branch (or check it out if it already exists from a previous sample). @@ -258,13 +261,13 @@ async function main() { }); if (samples.length === 0) { - console.error("apply_samples: nothing to replay; exiting cleanly."); + core.info("apply_samples: nothing to replay; exiting cleanly."); writeSyntheticStdioLog(logPath, 0); return; } const serverPath = resolveMcpServerPath(); - console.error(`apply_samples: spawning MCP server ${serverPath}`); + core.info(`apply_samples: spawning MCP server ${serverPath}`); const child = spawn(process.execPath, [serverPath], { stdio: ["pipe", "pipe", "inherit"], env: process.env, @@ -317,7 +320,7 @@ async function main() { const text = result.content && result.content[0] && result.content[0].text; failures.push(`sample[${i}] (tool=${sample.tool}): ${text || JSON.stringify(result)}`); } else { - console.error(`apply_samples: sample[${i}] (tool=${sample.tool}) ok`); + core.info(`apply_samples: sample[${i}] (tool=${sample.tool}) ok`); } } } finally { @@ -348,13 +351,12 @@ async function main() { if (failures.length > 0) { throw new Error(`apply_samples: ${failures.length} sample(s) failed:\n - ${failures.join("\n - ")}`); } - console.error(`apply_samples: ${samples.length} sample(s) replayed successfully.`); + core.info(`apply_samples: ${samples.length} sample(s) replayed successfully.`); } if (require.main === module) { main().catch(err => { - console.error(err && err.stack ? err.stack : String(err)); - process.exit(1); + core.setFailed(err && err.stack ? err.stack : String(err)); }); } From fcba545d94dcad3690df68d6ab0c87cbf0d3c80f Mon Sep 17 00:00:00 2001 From: Don Syme Date: Sun, 7 Jun 2026 00:10:30 +0100 Subject: [PATCH 8/8] Load shim.cjs in safe_outputs_mcp_server so spawned child has global.core When apply_samples.cjs spawns safe_outputs_mcp_server.cjs as a standalone Node child process, handlers like create_pull_request.cjs that reference core.info/warning/debug throw ReferenceError: core is not defined. The shim is idempotent (guarded by 'if (!global.core)'), so loading it unconditionally is safe when the module is required from a parent that already initialized it. --- actions/setup/js/safe_outputs_mcp_server.cjs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/actions/setup/js/safe_outputs_mcp_server.cjs b/actions/setup/js/safe_outputs_mcp_server.cjs index aca4f49cd93..3e1c2c54457 100644 --- a/actions/setup/js/safe_outputs_mcp_server.cjs +++ b/actions/setup/js/safe_outputs_mcp_server.cjs @@ -1,4 +1,5 @@ // @ts-check +/// // Safe Outputs MCP Server Module // @@ -12,6 +13,11 @@ // const server = require("./safe_outputs_mcp_server.cjs"); // server.startSafeOutputsServer(); +// Load core/context shim so handlers that reference `core.*` (e.g. +// create_pull_request.cjs) work when this file is spawned directly as a +// child process (e.g. by apply_samples.cjs) outside the github-script runtime. +require("./shim.cjs"); + const { createServer, registerTool, normalizeTool, start } = require("./mcp_server_core.cjs"); const { createAppendFunction } = require("./safe_outputs_append.cjs"); const { createHandlers } = require("./safe_outputs_handlers.cjs");