From edbc6a8fad5f62583ea544f38c3953ec8f89cb46 Mon Sep 17 00:00:00 2001 From: Danijel Martinek Date: Wed, 13 May 2026 19:43:11 +0200 Subject: [PATCH] feat(work): dispatch loops + auto-ticks state on approve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously the orchestrator ran exactly one implementer + reviewer pair, printed "(Automatic state mutation by the orchestrator is v2.)", and exited — the human had to tick the bullet, flip story status, rebuild state, and re-invoke for every slice. V2 closes the loop: - Parses the JSON the implementer + reviewer prompts ask the agents to emit (`parseAgentJson` — tolerates both ```json fenced and bare trailing { ... } shapes). The reviewer's `decision` and the implementer's `status` are the orchestrator's discriminators. - On approve: ticks the bullet in `_story.md` and writes it back. If the story now has zero unchecked bullets, flips its frontmatter `status: in-progress → done`; if all sibling stories are also done, flips the epic's frontmatter the same way. Commits the mutation on the host as a separate `chore(work): tick/finish ...` commit so the implementer's slice commit stays clean. `_state.json` regenerates via the existing pre-commit `rebuild-state` hook. - On reject: re-dispatches the implementer with the reviewer's notes appended to TASK_FILE_CONTENT, bounded by SANDCASTLE_MAX_ATTEMPTS (default 3). On the (max+1)th reject the loop exits 1 with the last notes printed. - After every approved slice, calls findNextTask again and dispatches the next ready bullet — including across story boundaries (the state-builder treats any non-done story with satisfied deps as ready, so flipping story 01 to done unblocks story 02 automatically). - Flags: `--once` (legacy single-slice behavior) and `--max-tasks N` bound the loop. Default is unlimited — matches the continuous-execution preference. Auth/sandbox setup is now pulled out of the per-iteration path so the loop reuses one sandbox across slices. --- .env.example | 6 + scripts/work/cli.mjs | 10 +- scripts/work/dispatch.mjs | 488 ++++++++++++++++++++++++++++++-------- 3 files changed, 398 insertions(+), 106 deletions(-) diff --git a/.env.example b/.env.example index 2980ba0..9341a9d 100644 --- a/.env.example +++ b/.env.example @@ -82,3 +82,9 @@ CMS_URL=http://localhost:3001 # SANDCASTLE_DECOMPOSE_ITERATIONS=10 # decompose: read PRD, write epic + stories, commit # SANDCASTLE_IMPLEMENTER_ITERATIONS=30 # implementer: full TDD slice (red test → green impl → gates → commit) # SANDCASTLE_REVIEWER_ITERATIONS=10 # reviewer: read diff + task, return decision + +# Reject-cycle cap. After this many reviewer rejects on the same slice, the +# dispatch loop gives up on that slice and exits 1 with the last rejection +# notes printed. Bump for tricky slices; lower for fast-feedback iteration. +# +# SANDCASTLE_MAX_ATTEMPTS=3 diff --git a/scripts/work/cli.mjs b/scripts/work/cli.mjs index 24a8311..dab72b3 100644 --- a/scripts/work/cli.mjs +++ b/scripts/work/cli.mjs @@ -13,7 +13,13 @@ * ready Prints every ready story * blocked Prints every blocked story + what each is waiting on * dispatch Print the next dispatch plan; with --execute invokes - * sandcastle to run the implementer + reviewer pair + * sandcastle to run the implementer + reviewer pair. + * --execute LOOPS through every ready task by default; + * bound with --once or --max-tasks N. After each + * approved slice the orchestrator ticks the bullet, + * flips story/epic status if complete, and commits + * the state mutation as `chore(work): ...` on top of + * the implementer's slice commit. * decompose Validate an approved PRD + print the decompose plan; * with --execute invokes sandcastle's decomposer agent * to write the epic folder + per-story files @@ -135,7 +141,7 @@ function usage() { "Usage: pnpm work ", ); console.log( - " dispatch Print the next dispatch plan (use --execute to invoke sandcastle)", + " dispatch Print the next dispatch plan (use --execute to invoke sandcastle; loops by default — bound with --once / --max-tasks N)", ); console.log( " decompose Decompose an approved PRD into epic + stories (use --execute to invoke sandcastle)", diff --git a/scripts/work/dispatch.mjs b/scripts/work/dispatch.mjs index 473cacd..d532d91 100644 --- a/scripts/work/dispatch.mjs +++ b/scripts/work/dispatch.mjs @@ -12,7 +12,7 @@ import fs from "node:fs"; import os from "node:os"; import path from "node:path"; import { fileURLToPath } from "node:url"; -import { execSync } from "node:child_process"; +import { execSync, execFileSync } from "node:child_process"; import { buildState } from "./state-builder.mjs"; const __dirname = path.dirname(fileURLToPath(import.meta.url)); @@ -70,10 +70,12 @@ export function findFirstUncheckedBullet(content) { /** * Builds the task spec string passed to sandcastle as TASK_FILE_CONTENT. - * The implementer prompt template uses this verbatim. + * The implementer prompt template uses this verbatim. An optional + * `rejection_notes` argument is appended when the orchestrator re-dispatches + * the implementer after a reviewer reject. */ -export function buildTaskSpec(next) { - return `# Current task +export function buildTaskSpec(next, rejectionNotes = null) { + const base = `# Current task ## Epic ${next.epic} @@ -87,6 +89,131 @@ ${next.bulletLine.trim()} ## Full story for context ${next.storyContent}`; + if (!rejectionNotes) return base; + return `${base} + +## Previous attempt was REJECTED — fix these before re-committing + +${rejectionNotes}`; +} + +/** + * Extract the LAST structured JSON object emitted by the agent. The + * implementer + reviewer prompts both ask the agent to return JSON; in + * practice agents wrap it in a \`\`\`json ... \`\`\` fence, but we tolerate + * a bare \`{ ... }\` block at the end of stdout too. Returns null on no + * parsable match. + */ +export function parseAgentJson(stdout) { + if (!stdout) return null; + // 1. Code-fenced JSON: take the LAST ```json ... ``` block. + const fenceMatches = [...stdout.matchAll(/```json\s*\n([\s\S]*?)\n\s*```/g)]; + if (fenceMatches.length > 0) { + const inner = fenceMatches[fenceMatches.length - 1][1].trim(); + try { + return JSON.parse(inner); + } catch { + // fall through to bare-brace fallback + } + } + // 2. Bare braces: walk backwards from the last "}" to its match. Defensive + // against partial output or extra trailing characters from the completion + // signal. + const lastClose = stdout.lastIndexOf("}"); + if (lastClose === -1) return null; + let depth = 0; + for (let i = lastClose; i >= 0; i--) { + if (stdout[i] === "}") depth++; + else if (stdout[i] === "{") depth--; + if (depth === 0) { + const candidate = stdout.slice(i, lastClose + 1); + try { + return JSON.parse(candidate); + } catch { + return null; + } + } + } + return null; +} + +/** + * Replace the `- [ ]` checkbox at the given line index with `- [x]`. Pure + * over the file's text — returns the new content. + */ +export function tickBulletInContent(content, bulletIndex) { + const lines = content.split("\n"); + if (bulletIndex < 0 || bulletIndex >= lines.length) return content; + lines[bulletIndex] = lines[bulletIndex].replace(/\[\s\]/, "[x]"); + return lines.join("\n"); +} + +/** + * Count remaining `- [ ]` checkboxes inside the `## Tasks` section. + */ +export function countUncheckedBullets(content) { + const lines = content.split("\n"); + let inTasks = false; + let count = 0; + for (const line of lines) { + if (line.startsWith("## ")) { + inTasks = /^##\s+Tasks\b/i.test(line); + continue; + } + if (!inTasks) continue; + if (/^[\s>-]*\[\s\]/.test(line)) count++; + } + return count; +} + +/** + * Edit the `status:` line inside the leading `---\n...\n---` frontmatter + * block. Returns the new content, or the original if no frontmatter or no + * `status:` key was found. + */ +export function setFrontmatterStatus(content, newStatus) { + const fmMatch = content.match(/^(---\n)([\s\S]+?)(\n---)/); + if (!fmMatch) return content; + const [full, openDelim, body, closeDelim] = fmMatch; + if (!/^status:\s*/m.test(body)) return content; + const newBody = body.replace(/^status:\s*.*$/m, `status: ${newStatus}`); + return content.replace(full, `${openDelim}${newBody}${closeDelim}`); +} + +/** + * Read the `status:` value from frontmatter. Returns null on no frontmatter + * or no key. + */ +export function readFrontmatterStatus(content) { + const fmMatch = content.match(/^---\n([\s\S]+?)\n---/); + if (!fmMatch) return null; + const m = fmMatch[1].match(/^status:\s*(.*)$/m); + return m ? m[1].trim() : null; +} + +/** + * If all stories in an epic are `status: done`, flip the epic's own + * frontmatter to `status: done`. Returns true if it flipped, false otherwise. + */ +export function flipEpicDoneIfAllStoriesDone(workRoot, epicId) { + const epicDir = path.join(workRoot, epicId); + const epicFile = path.join(epicDir, "_epic.md"); + if (!fs.existsSync(epicFile)) return false; + const epicContent = fs.readFileSync(epicFile, "utf8"); + if (readFrontmatterStatus(epicContent) === "done") return false; + + for (const sub of fs.readdirSync(epicDir)) { + const subPath = path.join(epicDir, sub); + if (!fs.statSync(subPath).isDirectory()) continue; + const storyFile = path.join(subPath, "_story.md"); + if (!fs.existsSync(storyFile)) continue; + const storyStatus = readFrontmatterStatus( + fs.readFileSync(storyFile, "utf8"), + ); + if (storyStatus !== "done") return false; + } + fs.writeFileSync(epicFile, setFrontmatterStatus(epicContent, "done")); + return true; } /** @@ -158,18 +285,206 @@ function printPlan() { " - With API key: `ANTHROPIC_API_KEY=... pnpm work dispatch --execute`", ); console.log(); + console.log( + "By default --execute LOOPS through every ready task. Flags to bound it:", + ); + console.log(" --once stop after one approved slice"); + console.log(" --max-tasks N stop after N approved slices"); + console.log(); console.log( "(Execute mode requires @ai-hero/sandcastle, a sandbox provider, and auth — see above.)", ); } -async function executeDispatch() { - const next = findNextTask(); - if (!next) { - console.log("No ready task to dispatch."); - process.exit(0); +/** + * Print the macOS / sandcastle-image / auth hints when a sandcastle run + * blows up. Shared between implementer + reviewer error paths. + */ +function explainSandcastleError(stage, e) { + console.error(`✗ ${stage} dispatch failed:`, e.message); + if (/Image '.+' not found locally/.test(e.message ?? "")) { + console.error(" One-time setup: pnpm exec sandcastle docker build-image"); + } + if ( + /Not logged in|Please run \/login/.test(e.message ?? "") && + process.platform === "darwin" + ) { + console.error( + " macOS users: Claude Code stores credentials in the Keychain, not in ~/.claude/. Extract once:", + ); + console.error( + ` security find-generic-password -s "Claude Code-credentials" -a "$USER" -w > ~/.claude/.credentials.json`, + ); + console.error(" chmod 600 ~/.claude/.credentials.json"); + console.error( + " OR fall back to API key: export ANTHROPIC_API_KEY=sk-ant-...", + ); + } + console.error(" See docs/guides/runbook.md → 'Using Sandcastle' for setup."); +} + +/** + * Run one slice end-to-end: implementer + reviewer, with a fix-up cycle on + * reject (capped at maxAttempts). Returns: + * { outcome: "approved", attempts, implJson, reviewJson } + * { outcome: "rejected-final", attempts, lastRejectNotes } + * { outcome: "blocked", attempts, implJson } + * { outcome: "error", attempts, reason } + */ +async function runOneSlice({ sandcastleRoot, sandbox, agent, next }) { + const maxAttempts = Number(process.env.SANDCASTLE_MAX_ATTEMPTS ?? 3); + const implementerPrompt = path.join(SANDCASTLE_DIR, "implementer.prompt.md"); + const reviewerPrompt = path.join(SANDCASTLE_DIR, "reviewer.prompt.md"); + + let rejectionNotes = null; + let lastRejectNotes = null; + let attempts = 0; + while (attempts < maxAttempts) { + attempts++; + const taskSpec = buildTaskSpec(next, rejectionNotes); + + let implResult; + try { + implResult = await sandcastleRoot.run({ + agent, + sandbox, + promptFile: implementerPrompt, + promptArgs: { TASK_FILE_CONTENT: taskSpec }, + cwd: REPO_ROOT, + // Implementer runs a full TDD slice (read context, red test, green + // impl, run all five gates, commit). 30 iterations matches typical + // slice shape. Tune via env SANDCASTLE_IMPLEMENTER_ITERATIONS. + maxIterations: Number( + process.env.SANDCASTLE_IMPLEMENTER_ITERATIONS ?? 30, + ), + // Stop iterating the moment the agent emits this marker. Without + // it, sandcastle re-invokes the model up to maxIterations even + // when the work is already done. + completionSignal: "COMPLETE", + }); + } catch (e) { + explainSandcastleError("Implementer", e); + return { outcome: "error", attempts, reason: e.message }; + } + console.log( + `Implementer returned. Branch: ${implResult.branch}, Commits: ${implResult.commits.length}`, + ); + const implJson = parseAgentJson(implResult.stdout); + if ( + implJson?.status === "blocked" || + implJson?.status === "needs-clarification" + ) { + return { outcome: "blocked", attempts, implJson }; + } + + let diff = ""; + try { + diff = execSync(`git diff main..${implResult.branch}`, { + encoding: "utf8", + cwd: REPO_ROOT, + }); + } catch { + diff = "(diff unavailable)"; + } + + let reviewResult; + try { + reviewResult = await sandcastleRoot.run({ + agent, + sandbox, + promptFile: reviewerPrompt, + promptArgs: { TASK_FILE_CONTENT: taskSpec, DIFF: diff }, + cwd: REPO_ROOT, + // Reviewer reads the diff + task spec and decides (approve/reject). + // Smaller surface than the implementer; 10 iterations is plenty. + // Tune via env SANDCASTLE_REVIEWER_ITERATIONS. + maxIterations: Number(process.env.SANDCASTLE_REVIEWER_ITERATIONS ?? 10), + // See implementer comment above. + completionSignal: "COMPLETE", + }); + } catch (e) { + explainSandcastleError("Reviewer", e); + return { outcome: "error", attempts, reason: e.message }; + } + const reviewJson = parseAgentJson(reviewResult.stdout); + + if (reviewJson?.decision === "approve") { + return { outcome: "approved", attempts, implJson, reviewJson }; + } + if (reviewJson?.decision === "reject") { + rejectionNotes = + reviewJson.notes ?? "(reviewer rejected without notes — re-attempt)"; + lastRejectNotes = rejectionNotes; + console.log( + `↺ Attempt ${attempts}/${maxAttempts} rejected. Re-dispatching implementer with notes.`, + ); + continue; + } + // Reviewer produced unparseable / non-decision output — treat as error. + return { + outcome: "error", + attempts, + reason: `reviewer returned no parseable decision; stdout:\n${reviewResult.stdout}`, + }; + } + return { outcome: "rejected-final", attempts, lastRejectNotes }; +} + +/** + * After an `approved` slice: tick the bullet, flip the story status if all + * bullets are now ticked (or todo→in-progress on the first tick), flip the + * epic status if all its stories are done, and commit the mutation on the + * host. The implementer's slice commit is already on main; this is a + * separate bookkeeping commit so the slice commit stays clean. + */ +function applyApprovedState(next) { + let content = fs.readFileSync(next.storyPath, "utf8"); + content = tickBulletInContent(content, next.bulletIndex); + + const currentStatus = readFrontmatterStatus(content); + let storyFlipped = false; + if (countUncheckedBullets(content) === 0 && currentStatus !== "done") { + content = setFrontmatterStatus(content, "done"); + storyFlipped = true; + } else if (currentStatus === "todo") { + content = setFrontmatterStatus(content, "in-progress"); + } + fs.writeFileSync(next.storyPath, content); + + let epicFlipped = false; + if (storyFlipped) { + epicFlipped = flipEpicDoneIfAllStoriesDone(WORK_ROOT, next.epic); } + const filesToStage = [path.relative(REPO_ROOT, next.storyPath)]; + if (epicFlipped) { + filesToStage.push( + path.relative(REPO_ROOT, path.join(WORK_ROOT, next.epic, "_epic.md")), + ); + } + const commitMsg = epicFlipped + ? `chore(work): finish epic ${next.epic}` + : storyFlipped + ? `chore(work): finish ${next.story}` + : `chore(work): tick task in ${next.story}`; + + execFileSync("git", ["add", ...filesToStage], { cwd: REPO_ROOT }); + execFileSync("git", ["commit", "-m", commitMsg], { + cwd: REPO_ROOT, + stdio: "inherit", + }); + console.log(`✓ ${commitMsg}`); +} + +/** + * Pick a slice, dispatch implementer + reviewer (with reject fix-up cycle), + * apply state mutation on approve, loop until exhausted or a cap is hit. + * + * Flags: + * --once stop after one slice (legacy behavior) + * --max-tasks N stop after N approved slices (default: unlimited) + */ +async function executeDispatch({ maxTasks }) { const auth = resolveClaudeAuth(); if (auth.mode === "missing") { console.error("✗ --execute requires either:"); @@ -186,10 +501,6 @@ async function executeDispatch() { console.log( `Auth mode: ${auth.mode === "subscription" ? `subscription (mounting ${auth.hostPath})` : "api-key"}`, ); - console.log( - `Dispatching: ${next.epic} / ${next.story} / ${next.bulletLine.trim()}`, - ); - const taskSpec = buildTaskSpec(next); let sandcastleRoot; let dockerProvider; @@ -204,7 +515,6 @@ async function executeDispatch() { process.exit(1); } - // Build sandbox + agent providers based on auth mode const dockerOpts = {}; const agentOpts = {}; if (auth.mode === "subscription") { @@ -221,111 +531,81 @@ async function executeDispatch() { const sandbox = dockerProvider(dockerOpts); const agent = sandcastleRoot.claudeCode("claude-sonnet-4-6", agentOpts); - // Implementer - const implementerPrompt = path.join(SANDCASTLE_DIR, "implementer.prompt.md"); - let implResult; - try { - implResult = await sandcastleRoot.run({ - agent, - sandbox, - promptFile: implementerPrompt, - promptArgs: { TASK_FILE_CONTENT: taskSpec }, - cwd: REPO_ROOT, - // Implementer runs a full TDD slice (read context, red test, green - // impl, run all five gates, commit). 30 iterations matches typical - // slice shape. Tune via env SANDCASTLE_IMPLEMENTER_ITERATIONS. - maxIterations: Number( - process.env.SANDCASTLE_IMPLEMENTER_ITERATIONS ?? 30, - ), - // Stop iterating the moment the agent emits this marker. Without it, - // sandcastle re-invokes the model up to maxIterations even when the - // work is already done — the prompt instructs the agent to emit - // COMPLETE on its final line. - completionSignal: "COMPLETE", - }); - } catch (e) { - console.error("✗ Implementer dispatch failed:", e.message); - if (/Image '.+' not found locally/.test(e.message ?? "")) { - console.error( - " One-time setup: pnpm exec sandcastle docker build-image", - ); + let approved = 0; + while (true) { + if (maxTasks !== null && approved >= maxTasks) { + console.log(`\nHit --max-tasks=${maxTasks} cap; stopping.`); + break; } - if ( - /Not logged in|Please run \/login/.test(e.message ?? "") && - process.platform === "darwin" - ) { - console.error( - " macOS users: Claude Code stores credentials in the Keychain, not in ~/.claude/. Extract once:", - ); - console.error( - ` security find-generic-password -s "Claude Code-credentials" -a "$USER" -w > ~/.claude/.credentials.json`, - ); - console.error(" chmod 600 ~/.claude/.credentials.json"); - console.error( - " OR fall back to API key: export ANTHROPIC_API_KEY=sk-ant-...", - ); + const next = findNextTask(); + if (!next) { + console.log("\nNo more ready tasks. Dispatch loop complete."); + break; } - console.error( - " See docs/guides/runbook.md → 'Using Sandcastle' for setup.", + console.log( + `\n--- Slice ${approved + 1}: ${next.epic} / ${next.story} ---`, ); + console.log(` Bullet: ${next.bulletLine.trim()}`); + + const result = await runOneSlice({ sandcastleRoot, sandbox, agent, next }); + if (result.outcome === "approved") { + applyApprovedState(next); + approved++; + continue; + } + if (result.outcome === "rejected-final") { + console.error( + `\n✗ Slice rejected after ${result.attempts} attempts. Stopping dispatch loop.`, + ); + if (result.lastRejectNotes) { + console.error(`Last rejection notes:\n${result.lastRejectNotes}`); + } + process.exit(1); + } + if (result.outcome === "blocked") { + console.error( + `\n✗ Implementer reported ${result.implJson?.status ?? "blocked"}. Stopping dispatch loop.`, + ); + if (result.implJson?.notes) { + console.error(`Implementer notes:\n${result.implJson.notes}`); + } + process.exit(1); + } + // outcome === "error" + console.error(`\n✗ Slice errored: ${result.reason ?? "(no reason)"}`); process.exit(1); } - console.log( - `Implementer returned. Branch: ${implResult.branch}, Commits: ${implResult.commits.length}`, - ); - - // Reviewer - let diff = ""; - try { - diff = execSync(`git diff main..${implResult.branch}`, { - encoding: "utf8", - cwd: REPO_ROOT, - }); - } catch { - diff = "(diff unavailable)"; - } - const reviewerPrompt = path.join(SANDCASTLE_DIR, "reviewer.prompt.md"); - const reviewResult = await sandcastleRoot.run({ - agent, - sandbox, - promptFile: reviewerPrompt, - promptArgs: { TASK_FILE_CONTENT: taskSpec, DIFF: diff }, - cwd: REPO_ROOT, - // Reviewer reads the diff + task spec and decides (approve/reject). - // Smaller surface than the implementer; 10 iterations is plenty. - // Tune via env SANDCASTLE_REVIEWER_ITERATIONS. - maxIterations: Number(process.env.SANDCASTLE_REVIEWER_ITERATIONS ?? 10), - // Stop iterating the moment the agent emits this marker. Without it, - // sandcastle re-invokes the model up to maxIterations even when the - // decision has already been returned — the prompt instructs the agent - // to emit COMPLETE on its final line. - completionSignal: "COMPLETE", - }); - console.log(`Reviewer returned. stdout follows:\n${reviewResult.stdout}`); - - // V1: orchestrator does NOT auto-mutate state. Print what should happen. - console.log(); - console.log("=== Suggested state mutation ==="); - console.log(` Edit ${next.storyPath} — tick the bullet:`); - console.log(` ${next.bulletLine.trim().replace("[ ]", "[x]")}`); - console.log( - ` Then: pnpm work rebuild-state && git add -A && git commit -m "feat(...): ..."`, - ); - console.log(); - console.log("(Automatic state mutation by the orchestrator is v2.)"); + console.log(`\nDispatched ${approved} slice(s).`); } /** * Explicit CLI entry. Exported so cli.mjs can dispatch into this module * without relying on a top-level side effect (which would also fire when * sibling work scripts import `resolveClaudeAuth`, etc.). + * + * Flags: + * --execute run sandcastle (default: print plan only) + * --once stop after one approved slice (default: loop until done) + * --max-tasks N stop after N approved slices */ export async function runCli(args) { - if (args.includes("--execute")) { - await executeDispatch(); - } else { + if (!args.includes("--execute")) { printPlan(); + return; } + let maxTasks = null; + if (args.includes("--once")) maxTasks = 1; + const maxTasksFlagIdx = args.indexOf("--max-tasks"); + if (maxTasksFlagIdx !== -1) { + const raw = args[maxTasksFlagIdx + 1]; + const parsed = Number(raw); + if (!Number.isFinite(parsed) || parsed < 1) { + console.error(`✗ --max-tasks expects a positive integer, got: ${raw}`); + process.exit(2); + } + maxTasks = parsed; + } + await executeDispatch({ maxTasks }); } // When invoked directly (`node scripts/work/dispatch.mjs ...`), run the CLI.