Files
agentic-dev/scripts/work/dispatch.mjs
Danijel Martinek 756e36c720 refactor(work): move epic folders into docs/work/epics/
The previous layout placed epic folders directly under docs/work/
alongside prds/ and _system/. Tightening: epics now live in their
own docs/work/epics/ subfolder, peer to prds/ and _system/. Same
shape as the existing prds/ bucket.

Final docs/work/ layout:
  README.md
  prds/<slug>.prd.md
  _system/_state.json
  epics/<slug>/_epic.md + <story-folder>/_story.md

Renames (git mv preserves history):
- docs/work/binder-wrap-helper/
    -> docs/work/epics/binder-wrap-helper/
- docs/work/library-evaluation-policy/
    -> docs/work/epics/library-evaluation-policy/
- docs/work/ci-security-and-supply-chain/
    -> docs/work/epics/ci-security-and-supply-chain/

Tooling updates:
- state-builder.mjs walks workRoot/epics/ directly; SKIP_FOLDERS
  obsoleted (no more sibling folders to filter out).
- dispatch.mjs's findNextTask, tickStoryBulletInEpic, and
  flipEpicDoneIfAllStoriesDone all join with "epics" segment.
- prd-ship.mjs's deriveShippingCommits walks workRoot/epics/ and
  git-logs docs/work/epics/<epic>/.
- decomposer.prompt.md emits epics under docs/work/epics/<epic-id>/.
- handoff + grill-with-docs glossary references updated.
- Glossary entry for Epic updated.

Reserved future shape: when a task-tracker integration (ClickUp,
Linear) ships, the epics/ subfolder hosts <task-id>-<slug>/
folders. Today it just hosts bare slugs.
2026-05-14 21:21:51 +02:00

667 lines
22 KiB
JavaScript

#!/usr/bin/env node
/**
* pnpm work dispatch — orchestrator that picks the next ready task and
* (with --execute) invokes sandcastle to run the implementer then reviewer.
*
* Default mode prints the dispatch plan without invoking sandcastle —
* safe to run anywhere. --execute requires EITHER:
* 1. Claude Code logged in on host (~/.claude/ — recommended for subscribers)
* 2. ANTHROPIC_API_KEY or OPENAI_API_KEY in env (fallback)
*/
import fs from "node:fs";
import os from "node:os";
import path from "node:path";
import { fileURLToPath } from "node:url";
import { execSync, execFileSync } from "node:child_process";
import { buildState } from "./state-builder.mjs";
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const REPO_ROOT = path.resolve(__dirname, "..", "..");
const WORK_ROOT = path.join(REPO_ROOT, "docs", "work");
const SANDCASTLE_DIR = path.join(REPO_ROOT, ".sandcastle");
/**
* Returns the first ready story's first unchecked AC bullet, or null if
* there's no work to dispatch.
*
* Shape: { epic, story, title, storyPath, storyContent, bulletLine, bulletIndex }
*/
export function findNextTask(workRoot = WORK_ROOT) {
const state = buildState(workRoot);
if (state.ready.length === 0) return null;
const next = state.ready[0];
const storyPath = path.join(
workRoot,
"epics",
next.epic,
next.story,
"_story.md",
);
if (!fs.existsSync(storyPath)) return null;
const storyContent = fs.readFileSync(storyPath, "utf8");
const { bulletLine, bulletIndex } = findFirstUncheckedBullet(storyContent);
if (bulletLine === null) return null;
return {
epic: next.epic,
story: next.story,
title: next.title,
storyPath,
storyContent,
bulletLine,
bulletIndex,
};
}
/**
* Scans the story content for the first `- [ ]` bullet INSIDE the `## Tasks`
* section. Returns the matched line + its 0-based index within the file's
* line array (used by the orchestrator if it later tick-edits the file).
*/
export function findFirstUncheckedBullet(content) {
const lines = content.split("\n");
let inTasks = false;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (line.startsWith("## ")) {
inTasks = /^##\s+Tasks\b/i.test(line);
continue;
}
if (!inTasks) continue;
if (/^[\s>-]*\[\s\]/.test(line)) {
return { bulletLine: line, bulletIndex: i };
}
}
return { bulletLine: null, bulletIndex: -1 };
}
/**
* Builds the task spec string passed to sandcastle as TASK_FILE_CONTENT.
* The implementer prompt template uses this verbatim. An optional
* `rejection_notes` argument is appended when the orchestrator re-dispatches
* the implementer after a reviewer reject.
*/
export function buildTaskSpec(next, rejectionNotes = null) {
const base = `# Current task
## Epic
${next.epic}
## Story
${next.story}${next.title}
## Current bullet
${next.bulletLine.trim()}
## Full story for context
${next.storyContent}`;
if (!rejectionNotes) return base;
return `${base}
## Previous attempt was REJECTED — fix these before re-committing
${rejectionNotes}`;
}
/**
* Extract the LAST structured JSON object emitted by the agent. The
* implementer + reviewer prompts both ask the agent to return JSON; in
* practice agents wrap it in a \`\`\`json ... \`\`\` fence, but we tolerate
* a bare \`{ ... }\` block at the end of stdout too. Returns null on no
* parsable match.
*/
export function parseAgentJson(stdout) {
if (!stdout) return null;
// 1. Code-fenced JSON: take the LAST ```json ... ``` block.
const fenceMatches = [...stdout.matchAll(/```json\s*\n([\s\S]*?)\n\s*```/g)];
if (fenceMatches.length > 0) {
const inner = fenceMatches[fenceMatches.length - 1][1].trim();
try {
return JSON.parse(inner);
} catch {
// fall through to bare-brace fallback
}
}
// 2. Bare braces: walk backwards from the last "}" to its match. Defensive
// against partial output or extra trailing characters from the completion
// signal.
const lastClose = stdout.lastIndexOf("}");
if (lastClose === -1) return null;
let depth = 0;
for (let i = lastClose; i >= 0; i--) {
if (stdout[i] === "}") depth++;
else if (stdout[i] === "{") depth--;
if (depth === 0) {
const candidate = stdout.slice(i, lastClose + 1);
try {
return JSON.parse(candidate);
} catch {
return null;
}
}
}
return null;
}
/**
* Replace the `- [ ]` checkbox at the given line index with `- [x]`. Pure
* over the file's text — returns the new content.
*/
export function tickBulletInContent(content, bulletIndex) {
const lines = content.split("\n");
if (bulletIndex < 0 || bulletIndex >= lines.length) return content;
lines[bulletIndex] = lines[bulletIndex].replace(/\[\s\]/, "[x]");
return lines.join("\n");
}
/**
* Count remaining `- [ ]` checkboxes inside the `## Tasks` section.
*/
export function countUncheckedBullets(content) {
const lines = content.split("\n");
let inTasks = false;
let count = 0;
for (const line of lines) {
if (line.startsWith("## ")) {
inTasks = /^##\s+Tasks\b/i.test(line);
continue;
}
if (!inTasks) continue;
if (/^[\s>-]*\[\s\]/.test(line)) count++;
}
return count;
}
/**
* Edit the `status:` line inside the leading `---\n...\n---` frontmatter
* block. Returns the new content, or the original if no frontmatter or no
* `status:` key was found.
*/
export function setFrontmatterStatus(content, newStatus) {
const fmMatch = content.match(/^(---\n)([\s\S]+?)(\n---)/);
if (!fmMatch) return content;
const [full, openDelim, body, closeDelim] = fmMatch;
if (!/^status:\s*/m.test(body)) return content;
const newBody = body.replace(/^status:\s*.*$/m, `status: ${newStatus}`);
return content.replace(full, `${openDelim}${newBody}${closeDelim}`);
}
/**
* Read the `status:` value from frontmatter. Returns null on no frontmatter
* or no key.
*/
export function readFrontmatterStatus(content) {
const fmMatch = content.match(/^---\n([\s\S]+?)\n---/);
if (!fmMatch) return null;
const m = fmMatch[1].match(/^status:\s*(.*)$/m);
return m ? m[1].trim() : null;
}
/**
* Tick the bullet in an epic's `## Stories` section that links to the given
* story folder. Idempotent: returns false if the bullet is already ticked or
* not present. Mirrors the per-task tick that already happens inside story
* files, applied at the parent-epic granularity.
*/
export function tickStoryBulletInEpic(workRoot, epicId, storyId) {
const epicFile = path.join(workRoot, "epics", epicId, "_epic.md");
if (!fs.existsSync(epicFile)) return false;
const content = fs.readFileSync(epicFile, "utf8");
const lines = content.split("\n");
let inStories = false;
let changed = false;
for (let i = 0; i < lines.length; i++) {
if (lines[i].startsWith("## ")) {
inStories = /^##\s+Stories\b/i.test(lines[i]);
continue;
}
if (!inStories) continue;
if (
lines[i].includes(`(${storyId}/_story.md)`) ||
lines[i].includes(`(./${storyId}/_story.md)`)
) {
if (/\[\s\]/.test(lines[i])) {
lines[i] = lines[i].replace(/\[\s\]/, "[x]");
changed = true;
}
break;
}
}
if (changed) fs.writeFileSync(epicFile, lines.join("\n"));
return changed;
}
/**
* If all stories in an epic are `status: done`, flip the epic's own
* frontmatter to `status: done`. Returns true if it flipped, false otherwise.
*/
export function flipEpicDoneIfAllStoriesDone(workRoot, epicId) {
const epicDir = path.join(workRoot, "epics", epicId);
const epicFile = path.join(epicDir, "_epic.md");
if (!fs.existsSync(epicFile)) return false;
const epicContent = fs.readFileSync(epicFile, "utf8");
if (readFrontmatterStatus(epicContent) === "done") return false;
for (const sub of fs.readdirSync(epicDir)) {
const subPath = path.join(epicDir, sub);
if (!fs.statSync(subPath).isDirectory()) continue;
const storyFile = path.join(subPath, "_story.md");
if (!fs.existsSync(storyFile)) continue;
const storyStatus = readFrontmatterStatus(
fs.readFileSync(storyFile, "utf8"),
);
if (storyStatus !== "done") return false;
}
fs.writeFileSync(epicFile, setFrontmatterStatus(epicContent, "done"));
return true;
}
/**
* Resolve the auth method for sandcastle dispatch.
*
* Priority:
* 1. Subscription (primary) — mount host's ~/.claude/ into the sandbox.
* Active when the host's Claude creds directory exists. The path
* defaults to ~/.claude/ and can be overridden via the
* SANDCASTLE_CLAUDE_CREDS_DIR env var.
* 2. API key (fallback) — pass ANTHROPIC_API_KEY (or OPENAI_API_KEY)
* through to the sandbox env.
* 3. Neither available → returns { mode: "missing" } and the dispatcher
* prints a clear error before exiting.
*
* Returns: { mode: "subscription", hostPath, sandboxPath }
* | { mode: "api-key", env }
* | { mode: "missing" }
*/
export function resolveClaudeAuth({
env = process.env,
home = os.homedir(),
} = {}) {
// 1. Subscription path
const credsHostPath =
env.SANDCASTLE_CLAUDE_CREDS_DIR ?? path.join(home, ".claude");
if (fs.existsSync(credsHostPath)) {
return {
mode: "subscription",
hostPath: credsHostPath,
// Inside the sandbox, claude looks at the agent user's home — tilde
// expansion in MountConfig handles the actual /home/agent/.claude
// resolution.
sandboxPath: "~/.claude",
};
}
// 2. API key fallback
if (env.ANTHROPIC_API_KEY) {
return {
mode: "api-key",
env: { ANTHROPIC_API_KEY: env.ANTHROPIC_API_KEY },
};
}
if (env.OPENAI_API_KEY) {
return { mode: "api-key", env: { OPENAI_API_KEY: env.OPENAI_API_KEY } };
}
// 3. Neither available
return { mode: "missing" };
}
function printPlan() {
const next = findNextTask();
if (!next) {
console.log("No ready task to dispatch.");
console.log("Run `pnpm work blocked` to see what's waiting on what.");
process.exit(0);
}
console.log("=== Dispatch plan ===");
console.log(` Epic: ${next.epic}`);
console.log(` Story: ${next.story}${next.title}`);
console.log(` Bullet: ${next.bulletLine.trim()}`);
console.log(` Prompt: .sandcastle/implementer.prompt.md`);
console.log();
console.log("To execute this dispatch:");
console.log(
" - With Claude subscription: `claude login` (one-time) then `pnpm work dispatch --execute`",
);
console.log(
" - With API key: `ANTHROPIC_API_KEY=... pnpm work dispatch --execute`",
);
console.log();
console.log(
"By default --execute LOOPS through every ready task. Flags to bound it:",
);
console.log(" --once stop after one approved slice");
console.log(" --max-tasks N stop after N approved slices");
console.log();
console.log(
"(Execute mode requires @ai-hero/sandcastle, a sandbox provider, and auth — see above.)",
);
}
/**
* Print the macOS / sandcastle-image / auth hints when a sandcastle run
* blows up. Shared between implementer + reviewer error paths.
*/
function explainSandcastleError(stage, e) {
console.error(`${stage} dispatch failed:`, e.message);
if (/Image '.+' not found locally/.test(e.message ?? "")) {
console.error(" One-time setup: pnpm exec sandcastle docker build-image");
}
if (
/Not logged in|Please run \/login/.test(e.message ?? "") &&
process.platform === "darwin"
) {
console.error(
" macOS users: Claude Code stores credentials in the Keychain, not in ~/.claude/. Extract once:",
);
console.error(
` security find-generic-password -s "Claude Code-credentials" -a "$USER" -w > ~/.claude/.credentials.json`,
);
console.error(" chmod 600 ~/.claude/.credentials.json");
console.error(
" OR fall back to API key: export ANTHROPIC_API_KEY=sk-ant-...",
);
}
console.error(" See docs/guides/runbook.md → 'Using Sandcastle' for setup.");
}
/**
* Run one slice end-to-end: implementer + reviewer, with a fix-up cycle on
* reject (capped at maxAttempts). Each slice is an independent sandcastle
* session — sandcastle's `resumeSession` is incompatible with the
* multi-iteration budgets a TDD slice requires (applies to iteration 1 only).
*
* Outcome variants:
* "approved" (implJson, reviewJson)
* "rejected-final" (lastRejectNotes)
* "blocked" (implJson)
* "error" (reason)
*/
async function runOneSlice({ sandcastleRoot, sandbox, agent, next }) {
const maxAttempts = Number(process.env.SANDCASTLE_MAX_ATTEMPTS ?? 3);
const implementerPrompt = path.join(SANDCASTLE_DIR, "implementer.prompt.md");
const reviewerPrompt = path.join(SANDCASTLE_DIR, "reviewer.prompt.md");
let rejectionNotes = null;
let lastRejectNotes = null;
let attempts = 0;
while (attempts < maxAttempts) {
attempts++;
const taskSpec = buildTaskSpec(next, rejectionNotes);
let implResult;
try {
implResult = await sandcastleRoot.run({
agent,
sandbox,
promptFile: implementerPrompt,
promptArgs: { TASK_FILE_CONTENT: taskSpec },
cwd: REPO_ROOT,
// Implementer runs a full TDD slice (read context, red test, green
// impl, run all five gates, commit). 30 iterations matches typical
// slice shape. Tune via env SANDCASTLE_IMPLEMENTER_ITERATIONS.
maxIterations: Number(
process.env.SANDCASTLE_IMPLEMENTER_ITERATIONS ?? 30,
),
// Stop iterating the moment the agent emits this marker. Without
// it, sandcastle re-invokes the model up to maxIterations even
// when the work is already done.
completionSignal: "<promise>COMPLETE</promise>",
});
} catch (e) {
explainSandcastleError("Implementer", e);
return { outcome: "error", attempts, reason: e.message };
}
console.log(
`Implementer returned. Branch: ${implResult.branch}, Commits: ${implResult.commits.length}`,
);
const implJson = parseAgentJson(implResult.stdout);
if (
implJson?.status === "blocked" ||
implJson?.status === "needs-clarification"
) {
return { outcome: "blocked", attempts, implJson };
}
let diff = "";
try {
diff = execSync(`git diff main..${implResult.branch}`, {
encoding: "utf8",
cwd: REPO_ROOT,
});
} catch {
diff = "(diff unavailable)";
}
let reviewResult;
try {
reviewResult = await sandcastleRoot.run({
agent,
sandbox,
promptFile: reviewerPrompt,
promptArgs: { TASK_FILE_CONTENT: taskSpec, DIFF: diff },
cwd: REPO_ROOT,
// Reviewer reads the diff + task spec and decides (approve/reject).
// Smaller surface than the implementer; 10 iterations is plenty.
// Tune via env SANDCASTLE_REVIEWER_ITERATIONS.
maxIterations: Number(process.env.SANDCASTLE_REVIEWER_ITERATIONS ?? 10),
// See implementer comment above.
completionSignal: "<promise>COMPLETE</promise>",
});
} catch (e) {
explainSandcastleError("Reviewer", e);
return { outcome: "error", attempts, reason: e.message };
}
const reviewJson = parseAgentJson(reviewResult.stdout);
if (reviewJson?.decision === "approve") {
return { outcome: "approved", attempts, implJson, reviewJson };
}
if (reviewJson?.decision === "reject") {
rejectionNotes =
reviewJson.notes ?? "(reviewer rejected without notes — re-attempt)";
lastRejectNotes = rejectionNotes;
console.log(
`↺ Attempt ${attempts}/${maxAttempts} rejected. Re-dispatching implementer with notes.`,
);
continue;
}
return {
outcome: "error",
attempts,
reason: `reviewer returned no parseable decision; stdout:\n${reviewResult.stdout}`,
};
}
return { outcome: "rejected-final", attempts, lastRejectNotes };
}
/**
* After an `approved` slice: tick the bullet, flip the story status if all
* bullets are now ticked (or todo→in-progress on the first tick), flip the
* epic status if all its stories are done, and commit the mutation on the
* host. The implementer's slice commit is already on main; this is a
* separate bookkeeping commit so the slice commit stays clean.
*/
function applyApprovedState(next) {
let content = fs.readFileSync(next.storyPath, "utf8");
content = tickBulletInContent(content, next.bulletIndex);
const currentStatus = readFrontmatterStatus(content);
let storyFlipped = false;
if (countUncheckedBullets(content) === 0 && currentStatus !== "done") {
content = setFrontmatterStatus(content, "done");
storyFlipped = true;
} else if (currentStatus === "todo") {
content = setFrontmatterStatus(content, "in-progress");
}
fs.writeFileSync(next.storyPath, content);
let epicFlipped = false;
let epicBulletTicked = false;
if (storyFlipped) {
epicBulletTicked = tickStoryBulletInEpic(WORK_ROOT, next.epic, next.story);
epicFlipped = flipEpicDoneIfAllStoriesDone(WORK_ROOT, next.epic);
}
const filesToStage = [path.relative(REPO_ROOT, next.storyPath)];
if (epicFlipped || epicBulletTicked) {
filesToStage.push(
path.relative(
REPO_ROOT,
path.join(WORK_ROOT, "epics", next.epic, "_epic.md"),
),
);
}
const commitMsg = epicFlipped
? `chore(work): finish epic ${next.epic}`
: storyFlipped
? `chore(work): finish ${next.story}`
: `chore(work): tick task in ${next.story}`;
execFileSync("git", ["add", ...filesToStage], { cwd: REPO_ROOT });
execFileSync("git", ["commit", "-m", commitMsg], {
cwd: REPO_ROOT,
stdio: "inherit",
});
console.log(`${commitMsg}`);
}
/**
* Pick a slice, dispatch implementer + reviewer (with reject fix-up cycle),
* apply state mutation on approve, loop until exhausted or a cap is hit.
*
* Flags:
* --once stop after one slice (legacy behavior)
* --max-tasks N stop after N approved slices (default: unlimited)
*/
async function executeDispatch({ maxTasks }) {
const auth = resolveClaudeAuth();
if (auth.mode === "missing") {
console.error("✗ --execute requires either:");
console.error(
" 1. Claude Code logged in on host (run `claude login` first; ~/.claude/ becomes the auth source — this is the recommended path for Pro/Max subscribers)",
);
console.error(" 2. ANTHROPIC_API_KEY or OPENAI_API_KEY in env (fallback)");
console.error("");
console.error(
" Override Claude creds path via SANDCASTLE_CLAUDE_CREDS_DIR.",
);
process.exit(1);
}
console.log(
`Auth mode: ${auth.mode === "subscription" ? `subscription (mounting ${auth.hostPath})` : "api-key"}`,
);
let sandcastleRoot;
let dockerProvider;
try {
sandcastleRoot = await import("@ai-hero/sandcastle");
const dockerModule = await import("@ai-hero/sandcastle/sandboxes/docker");
dockerProvider = dockerModule.docker;
} catch {
console.error(
"✗ @ai-hero/sandcastle is not installed. Run `pnpm install` first.",
);
process.exit(1);
}
const dockerOpts = {};
const agentOpts = {};
if (auth.mode === "subscription") {
dockerOpts.mounts = [
{
hostPath: auth.hostPath,
sandboxPath: auth.sandboxPath,
readonly: false,
},
];
} else if (auth.mode === "api-key") {
agentOpts.env = auth.env;
}
const sandbox = dockerProvider(dockerOpts);
const agent = sandcastleRoot.claudeCode("claude-sonnet-4-6", agentOpts);
let approved = 0;
while (true) {
if (maxTasks !== null && approved >= maxTasks) {
console.log(`\nHit --max-tasks=${maxTasks} cap; stopping.`);
break;
}
const next = findNextTask();
if (!next) {
console.log("\nNo more ready tasks. Dispatch loop complete.");
break;
}
console.log(
`\n--- Slice ${approved + 1}: ${next.epic} / ${next.story} ---`,
);
console.log(` Bullet: ${next.bulletLine.trim()}`);
const result = await runOneSlice({ sandcastleRoot, sandbox, agent, next });
if (result.outcome === "approved") {
applyApprovedState(next);
approved++;
continue;
}
if (result.outcome === "rejected-final") {
console.error(
`\n✗ Slice rejected after ${result.attempts} attempts. Stopping dispatch loop.`,
);
if (result.lastRejectNotes) {
console.error(`Last rejection notes:\n${result.lastRejectNotes}`);
}
process.exit(1);
}
if (result.outcome === "blocked") {
console.error(
`\n✗ Implementer reported ${result.implJson?.status ?? "blocked"}. Stopping dispatch loop.`,
);
if (result.implJson?.notes) {
console.error(`Implementer notes:\n${result.implJson.notes}`);
}
process.exit(1);
}
// outcome === "error"
console.error(`\n✗ Slice errored: ${result.reason ?? "(no reason)"}`);
process.exit(1);
}
console.log(`\nDispatched ${approved} slice(s).`);
}
/**
* Explicit CLI entry. Exported so cli.mjs can dispatch into this module
* without relying on a top-level side effect (which would also fire when
* sibling work scripts import `resolveClaudeAuth`, etc.).
*
* Flags:
* --execute run sandcastle (default: print plan only)
* --once stop after one approved slice (default: loop until done)
* --max-tasks N stop after N approved slices
*/
export async function runCli(args) {
if (!args.includes("--execute")) {
printPlan();
return;
}
let maxTasks = null;
if (args.includes("--once")) maxTasks = 1;
const maxTasksFlagIdx = args.indexOf("--max-tasks");
if (maxTasksFlagIdx !== -1) {
const raw = args[maxTasksFlagIdx + 1];
const parsed = Number(raw);
if (!Number.isFinite(parsed) || parsed < 1) {
console.error(`✗ --max-tasks expects a positive integer, got: ${raw}`);
process.exit(2);
}
maxTasks = parsed;
}
await executeDispatch({ maxTasks });
}
// When invoked directly (`node scripts/work/dispatch.mjs ...`), run the CLI.
// When imported by cli.mjs or any sibling, do nothing — the caller decides.
const invokedDirectly = import.meta.url === `file://${process.argv[1]}`;
if (invokedDirectly) {
runCli(process.argv.slice(2));
}