agentic-dev/scripts/work/dispatch.mjs

#!/usr/bin/env node
/**
 * pnpm work dispatch — orchestrator that picks the next ready task and
 * (with --execute) invokes sandcastle to run the implementer then reviewer.
 *
 * Default mode prints the dispatch plan without invoking sandcastle —
 * safe to run anywhere. --execute requires EITHER:
 *   1. Claude Code logged in on host (~/.claude/ — recommended for subscribers)
 *   2. ANTHROPIC_API_KEY or OPENAI_API_KEY in env (fallback)
 */
import fs from "node:fs";
import os from "node:os";
import path from "node:path";
import { fileURLToPath } from "node:url";
import { execSync, execFileSync } from "node:child_process";
import { buildState } from "./state-builder.mjs";

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const REPO_ROOT = path.resolve(__dirname, "..", "..");
const WORK_ROOT = path.join(REPO_ROOT, "docs", "work");
const SANDCASTLE_DIR = path.join(REPO_ROOT, ".sandcastle");

/**
 * Returns the first ready story's first unchecked AC bullet, or null if
 * there's no work to dispatch.
 *
 * Shape: { epic, story, title, storyPath, storyContent, bulletLine, bulletIndex }
 */
export function findNextTask(workRoot = WORK_ROOT) {
  const state = buildState(workRoot);
  if (state.ready.length === 0) return null;
  const next = state.ready[0];
  const storyPath = path.join(
    workRoot,
    "epics",
    next.epic,
    next.story,
    "_story.md",
  );
  if (!fs.existsSync(storyPath)) return null;
  const storyContent = fs.readFileSync(storyPath, "utf8");
  const { bulletLine, bulletIndex } = findFirstUncheckedBullet(storyContent);
  if (bulletLine === null) return null;
  return {
    epic: next.epic,
    story: next.story,
    title: next.title,
    storyPath,
    storyContent,
    bulletLine,
    bulletIndex,
  };
}

/**
 * Scans the story content for the first `- [ ]` bullet INSIDE the `## Tasks`
 * section. Returns the matched line + its 0-based index within the file's
 * line array (used by the orchestrator if it later tick-edits the file).
 */
export function findFirstUncheckedBullet(content) {
  const lines = content.split("\n");
  let inTasks = false;
  for (let i = 0; i < lines.length; i++) {
    const line = lines[i];
    if (line.startsWith("## ")) {
      inTasks = /^##\s+Tasks\b/i.test(line);
      continue;
    }
    if (!inTasks) continue;
    if (/^[\s>-]*\[\s\]/.test(line)) {
      return { bulletLine: line, bulletIndex: i };
    }
  }
  return { bulletLine: null, bulletIndex: -1 };
}

/**
 * Builds the task spec string passed to sandcastle as TASK_FILE_CONTENT.
 * The implementer prompt template uses this verbatim. An optional
 * `rejection_notes` argument is appended when the orchestrator re-dispatches
 * the implementer after a reviewer reject.
 */
export function buildTaskSpec(next, rejectionNotes = null) {
  const base = `# Current task

## Epic
${next.epic}

## Story
${next.story} — ${next.title}

## Current bullet
${next.bulletLine.trim()}

## Full story for context

${next.storyContent}`;
  if (!rejectionNotes) return base;
  return `${base}

## Previous attempt was REJECTED — fix these before re-committing

${rejectionNotes}`;
}

/**
 * Extract the LAST structured JSON object emitted by the agent. The
 * implementer + reviewer prompts both ask the agent to return JSON; in
 * practice agents wrap it in a \`\`\`json ... \`\`\` fence, but we tolerate
 * a bare \`{ ... }\` block at the end of stdout too. Returns null on no
 * parsable match.
 */
export function parseAgentJson(stdout) {
  if (!stdout) return null;
  // 1. Code-fenced JSON: take the LAST ```json ... ``` block.
  const fenceMatches = [...stdout.matchAll(/```json\s*\n([\s\S]*?)\n\s*```/g)];
  if (fenceMatches.length > 0) {
    const inner = fenceMatches[fenceMatches.length - 1][1].trim();
    try {
      return JSON.parse(inner);
    } catch {
      // fall through to bare-brace fallback
    }
  }
  // 2. Bare braces: walk backwards from the last "}" to its match. Defensive
  // against partial output or extra trailing characters from the completion
  // signal.
  const lastClose = stdout.lastIndexOf("}");
  if (lastClose === -1) return null;
  let depth = 0;
  for (let i = lastClose; i >= 0; i--) {
    if (stdout[i] === "}") depth++;
    else if (stdout[i] === "{") depth--;
    if (depth === 0) {
      const candidate = stdout.slice(i, lastClose + 1);
      try {
        return JSON.parse(candidate);
      } catch {
        return null;
      }
    }
  }
  return null;
}

/**
 * Replace the `- [ ]` checkbox at the given line index with `- [x]`. Pure
 * over the file's text — returns the new content.
 */
export function tickBulletInContent(content, bulletIndex) {
  const lines = content.split("\n");
  if (bulletIndex < 0 || bulletIndex >= lines.length) return content;
  lines[bulletIndex] = lines[bulletIndex].replace(/\[\s\]/, "[x]");
  return lines.join("\n");
}

/**
 * Count remaining `- [ ]` checkboxes inside the `## Tasks` section.
 */
export function countUncheckedBullets(content) {
  const lines = content.split("\n");
  let inTasks = false;
  let count = 0;
  for (const line of lines) {
    if (line.startsWith("## ")) {
      inTasks = /^##\s+Tasks\b/i.test(line);
      continue;
    }
    if (!inTasks) continue;
    if (/^[\s>-]*\[\s\]/.test(line)) count++;
  }
  return count;
}

/**
 * Edit the `status:` line inside the leading `---\n...\n---` frontmatter
 * block. Returns the new content, or the original if no frontmatter or no
 * `status:` key was found.
 */
export function setFrontmatterStatus(content, newStatus) {
  const fmMatch = content.match(/^(---\n)([\s\S]+?)(\n---)/);
  if (!fmMatch) return content;
  const [full, openDelim, body, closeDelim] = fmMatch;
  if (!/^status:\s*/m.test(body)) return content;
  const newBody = body.replace(/^status:\s*.*$/m, `status: ${newStatus}`);
  return content.replace(full, `${openDelim}${newBody}${closeDelim}`);
}

/**
 * Read the `status:` value from frontmatter. Returns null on no frontmatter
 * or no key.
 */
export function readFrontmatterStatus(content) {
  const fmMatch = content.match(/^---\n([\s\S]+?)\n---/);
  if (!fmMatch) return null;
  const m = fmMatch[1].match(/^status:\s*(.*)$/m);
  return m ? m[1].trim() : null;
}

/**
 * Tick the bullet in an epic's `## Stories` section that links to the given
 * story folder. Idempotent: returns false if the bullet is already ticked or
 * not present. Mirrors the per-task tick that already happens inside story
 * files, applied at the parent-epic granularity.
 */
export function tickStoryBulletInEpic(workRoot, epicId, storyId) {
  const epicFile = path.join(workRoot, "epics", epicId, "_epic.md");
  if (!fs.existsSync(epicFile)) return false;
  const content = fs.readFileSync(epicFile, "utf8");
  const lines = content.split("\n");
  let inStories = false;
  let changed = false;
  for (let i = 0; i < lines.length; i++) {
    if (lines[i].startsWith("## ")) {
      inStories = /^##\s+Stories\b/i.test(lines[i]);
      continue;
    }
    if (!inStories) continue;
    if (
      lines[i].includes(`(${storyId}/_story.md)`) ||
      lines[i].includes(`(./${storyId}/_story.md)`)
    ) {
      if (/\[\s\]/.test(lines[i])) {
        lines[i] = lines[i].replace(/\[\s\]/, "[x]");
        changed = true;
      }
      break;
    }
  }
  if (changed) fs.writeFileSync(epicFile, lines.join("\n"));
  return changed;
}

/**
 * If all stories in an epic are `status: done`, flip the epic's own
 * frontmatter to `status: done`. Returns true if it flipped, false otherwise.
 */
export function flipEpicDoneIfAllStoriesDone(workRoot, epicId) {
  const epicDir = path.join(workRoot, "epics", epicId);
  const epicFile = path.join(epicDir, "_epic.md");
  if (!fs.existsSync(epicFile)) return false;
  const epicContent = fs.readFileSync(epicFile, "utf8");
  if (readFrontmatterStatus(epicContent) === "done") return false;

  for (const sub of fs.readdirSync(epicDir)) {
    const subPath = path.join(epicDir, sub);
    if (!fs.statSync(subPath).isDirectory()) continue;
    const storyFile = path.join(subPath, "_story.md");
    if (!fs.existsSync(storyFile)) continue;
    const storyStatus = readFrontmatterStatus(
      fs.readFileSync(storyFile, "utf8"),
    );
    if (storyStatus !== "done") return false;
  }
  fs.writeFileSync(epicFile, setFrontmatterStatus(epicContent, "done"));
  return true;
}

/**
 * Resolve the auth method for sandcastle dispatch.
 *
 * Priority:
 *   1. Subscription (primary) — mount host's ~/.claude/ into the sandbox.
 *      Active when the host's Claude creds directory exists. The path
 *      defaults to ~/.claude/ and can be overridden via the
 *      SANDCASTLE_CLAUDE_CREDS_DIR env var.
 *   2. API key (fallback) — pass ANTHROPIC_API_KEY (or OPENAI_API_KEY)
 *      through to the sandbox env.
 *   3. Neither available → returns { mode: "missing" } and the dispatcher
 *      prints a clear error before exiting.
 *
 * Returns: { mode: "subscription", hostPath, sandboxPath }
 *        | { mode: "api-key", env }
 *        | { mode: "missing" }
 */
export function resolveClaudeAuth({
  env = process.env,
  home = os.homedir(),
} = {}) {
  // 1. Subscription path
  const credsHostPath =
    env.SANDCASTLE_CLAUDE_CREDS_DIR ?? path.join(home, ".claude");
  if (fs.existsSync(credsHostPath)) {
    return {
      mode: "subscription",
      hostPath: credsHostPath,
      // Inside the sandbox, claude looks at the agent user's home — tilde
      // expansion in MountConfig handles the actual /home/agent/.claude
      // resolution.
      sandboxPath: "~/.claude",
    };
  }
  // 2. API key fallback
  if (env.ANTHROPIC_API_KEY) {
    return {
      mode: "api-key",
      env: { ANTHROPIC_API_KEY: env.ANTHROPIC_API_KEY },
    };
  }
  if (env.OPENAI_API_KEY) {
    return { mode: "api-key", env: { OPENAI_API_KEY: env.OPENAI_API_KEY } };
  }
  // 3. Neither available
  return { mode: "missing" };
}

function printPlan() {
  const next = findNextTask();
  if (!next) {
    console.log("No ready task to dispatch.");
    console.log("Run `pnpm work blocked` to see what's waiting on what.");
    process.exit(0);
  }
  console.log("=== Dispatch plan ===");
  console.log(`  Epic:     ${next.epic}`);
  console.log(`  Story:    ${next.story} — ${next.title}`);
  console.log(`  Bullet:   ${next.bulletLine.trim()}`);
  console.log(`  Prompt:   .sandcastle/implementer.prompt.md`);
  console.log();
  console.log("To execute this dispatch:");
  console.log(
    "  - With Claude subscription: `claude login` (one-time) then `pnpm work dispatch --execute`",
  );
  console.log(
    "  - With API key:             `ANTHROPIC_API_KEY=... pnpm work dispatch --execute`",
  );
  console.log();
  console.log(
    "By default --execute LOOPS through every ready task. Flags to bound it:",
  );
  console.log("  --once             stop after one approved slice");
  console.log("  --max-tasks N      stop after N approved slices");
  console.log();
  console.log(
    "(Execute mode requires @ai-hero/sandcastle, a sandbox provider, and auth — see above.)",
  );
}

/**
 * Print the macOS / sandcastle-image / auth hints when a sandcastle run
 * blows up. Shared between implementer + reviewer error paths.
 */
function explainSandcastleError(stage, e) {
  console.error(`✗ ${stage} dispatch failed:`, e.message);
  if (/Image '.+' not found locally/.test(e.message ?? "")) {
    console.error("  One-time setup: pnpm exec sandcastle docker build-image");
  }
  if (
    /Not logged in|Please run \/login/.test(e.message ?? "") &&
    process.platform === "darwin"
  ) {
    console.error(
      "  macOS users: Claude Code stores credentials in the Keychain, not in ~/.claude/. Extract once:",
    );
    console.error(
      `    security find-generic-password -s "Claude Code-credentials" -a "$USER" -w > ~/.claude/.credentials.json`,
    );
    console.error("    chmod 600 ~/.claude/.credentials.json");
    console.error(
      "  OR fall back to API key: export ANTHROPIC_API_KEY=sk-ant-...",
    );
  }
  console.error("  See docs/guides/runbook.md → 'Using Sandcastle' for setup.");
}

/**
 * Run one slice end-to-end: implementer + reviewer, with a fix-up cycle on
 * reject (capped at maxAttempts). Each slice is an independent sandcastle
 * session — sandcastle's `resumeSession` is incompatible with the
 * multi-iteration budgets a TDD slice requires (applies to iteration 1 only).
 *
 * Outcome variants:
 *   "approved"       (implJson, reviewJson)
 *   "rejected-final" (lastRejectNotes)
 *   "blocked"        (implJson)
 *   "error"          (reason)
 */
async function runOneSlice({ sandcastleRoot, sandbox, agent, next }) {
  const maxAttempts = Number(process.env.SANDCASTLE_MAX_ATTEMPTS ?? 3);
  const implementerPrompt = path.join(SANDCASTLE_DIR, "implementer.prompt.md");
  const reviewerPrompt = path.join(SANDCASTLE_DIR, "reviewer.prompt.md");

  let rejectionNotes = null;
  let lastRejectNotes = null;
  let attempts = 0;

  while (attempts < maxAttempts) {
    attempts++;
    const taskSpec = buildTaskSpec(next, rejectionNotes);

    let implResult;
    try {
      implResult = await sandcastleRoot.run({
        agent,
        sandbox,
        promptFile: implementerPrompt,
        promptArgs: { TASK_FILE_CONTENT: taskSpec },
        cwd: REPO_ROOT,
        // Implementer runs a full TDD slice (read context, red test, green
        // impl, run all five gates, commit). 30 iterations matches typical
        // slice shape. Tune via env SANDCASTLE_IMPLEMENTER_ITERATIONS.
        maxIterations: Number(
          process.env.SANDCASTLE_IMPLEMENTER_ITERATIONS ?? 30,
        ),
        // Stop iterating the moment the agent emits this marker. Without
        // it, sandcastle re-invokes the model up to maxIterations even
        // when the work is already done.
        completionSignal: "<promise>COMPLETE</promise>",
      });
    } catch (e) {
      explainSandcastleError("Implementer", e);
      return { outcome: "error", attempts, reason: e.message };
    }
    console.log(
      `Implementer returned. Branch: ${implResult.branch}, Commits: ${implResult.commits.length}`,
    );

    const implJson = parseAgentJson(implResult.stdout);
    if (
      implJson?.status === "blocked" ||
      implJson?.status === "needs-clarification"
    ) {
      return { outcome: "blocked", attempts, implJson };
    }

    let diff = "";
    try {
      diff = execSync(`git diff main..${implResult.branch}`, {
        encoding: "utf8",
        cwd: REPO_ROOT,
      });
    } catch {
      diff = "(diff unavailable)";
    }

    let reviewResult;
    try {
      reviewResult = await sandcastleRoot.run({
        agent,
        sandbox,
        promptFile: reviewerPrompt,
        promptArgs: { TASK_FILE_CONTENT: taskSpec, DIFF: diff },
        cwd: REPO_ROOT,
        // Reviewer reads the diff + task spec and decides (approve/reject).
        // Smaller surface than the implementer; 10 iterations is plenty.
        // Tune via env SANDCASTLE_REVIEWER_ITERATIONS.
        maxIterations: Number(process.env.SANDCASTLE_REVIEWER_ITERATIONS ?? 10),
        // See implementer comment above.
        completionSignal: "<promise>COMPLETE</promise>",
      });
    } catch (e) {
      explainSandcastleError("Reviewer", e);
      return { outcome: "error", attempts, reason: e.message };
    }
    const reviewJson = parseAgentJson(reviewResult.stdout);

    if (reviewJson?.decision === "approve") {
      return { outcome: "approved", attempts, implJson, reviewJson };
    }
    if (reviewJson?.decision === "reject") {
      rejectionNotes =
        reviewJson.notes ?? "(reviewer rejected without notes — re-attempt)";
      lastRejectNotes = rejectionNotes;
      console.log(
        `↺ Attempt ${attempts}/${maxAttempts} rejected. Re-dispatching implementer with notes.`,
      );
      continue;
    }
    return {
      outcome: "error",
      attempts,
      reason: `reviewer returned no parseable decision; stdout:\n${reviewResult.stdout}`,
    };
  }
  return { outcome: "rejected-final", attempts, lastRejectNotes };
}

/**
 * After an `approved` slice: tick the bullet, flip the story status if all
 * bullets are now ticked (or todo→in-progress on the first tick), flip the
 * epic status if all its stories are done, and commit the mutation on the
 * host. The implementer's slice commit is already on main; this is a
 * separate bookkeeping commit so the slice commit stays clean.
 */
function applyApprovedState(next) {
  let content = fs.readFileSync(next.storyPath, "utf8");
  content = tickBulletInContent(content, next.bulletIndex);

  const currentStatus = readFrontmatterStatus(content);
  let storyFlipped = false;
  if (countUncheckedBullets(content) === 0 && currentStatus !== "done") {
    content = setFrontmatterStatus(content, "done");
    storyFlipped = true;
  } else if (currentStatus === "todo") {
    content = setFrontmatterStatus(content, "in-progress");
  }
  fs.writeFileSync(next.storyPath, content);

  let epicFlipped = false;
  let epicBulletTicked = false;
  if (storyFlipped) {
    epicBulletTicked = tickStoryBulletInEpic(WORK_ROOT, next.epic, next.story);
    epicFlipped = flipEpicDoneIfAllStoriesDone(WORK_ROOT, next.epic);
  }

  const filesToStage = [path.relative(REPO_ROOT, next.storyPath)];
  if (epicFlipped || epicBulletTicked) {
    filesToStage.push(
      path.relative(
        REPO_ROOT,
        path.join(WORK_ROOT, "epics", next.epic, "_epic.md"),
      ),
    );
  }
  const commitMsg = epicFlipped
    ? `chore(work): finish epic ${next.epic}`
    : storyFlipped
      ? `chore(work): finish ${next.story}`
      : `chore(work): tick task in ${next.story}`;

  execFileSync("git", ["add", ...filesToStage], { cwd: REPO_ROOT });
  execFileSync("git", ["commit", "-m", commitMsg], {
    cwd: REPO_ROOT,
    stdio: "inherit",
  });
  console.log(`✓ ${commitMsg}`);
}

/**
 * Pick a slice, dispatch implementer + reviewer (with reject fix-up cycle),
 * apply state mutation on approve, loop until exhausted or a cap is hit.
 *
 * Flags:
 *   --once         stop after one slice (legacy behavior)
 *   --max-tasks N  stop after N approved slices (default: unlimited)
 */
async function executeDispatch({ maxTasks }) {
  const auth = resolveClaudeAuth();
  if (auth.mode === "missing") {
    console.error("✗ --execute requires either:");
    console.error(
      "  1. Claude Code logged in on host (run `claude login` first; ~/.claude/ becomes the auth source — this is the recommended path for Pro/Max subscribers)",
    );
    console.error("  2. ANTHROPIC_API_KEY or OPENAI_API_KEY in env (fallback)");
    console.error("");
    console.error(
      "  Override Claude creds path via SANDCASTLE_CLAUDE_CREDS_DIR.",
    );
    process.exit(1);
  }
  console.log(
    `Auth mode: ${auth.mode === "subscription" ? `subscription (mounting ${auth.hostPath})` : "api-key"}`,
  );

  let sandcastleRoot;
  let dockerProvider;
  try {
    sandcastleRoot = await import("@ai-hero/sandcastle");
    const dockerModule = await import("@ai-hero/sandcastle/sandboxes/docker");
    dockerProvider = dockerModule.docker;
  } catch {
    console.error(
      "✗ @ai-hero/sandcastle is not installed. Run `pnpm install` first.",
    );
    process.exit(1);
  }

  const dockerOpts = {};
  const agentOpts = {};
  if (auth.mode === "subscription") {
    dockerOpts.mounts = [
      {
        hostPath: auth.hostPath,
        sandboxPath: auth.sandboxPath,
        readonly: false,
      },
    ];
  } else if (auth.mode === "api-key") {
    agentOpts.env = auth.env;
  }
  const sandbox = dockerProvider(dockerOpts);
  const agent = sandcastleRoot.claudeCode("claude-sonnet-4-6", agentOpts);

  let approved = 0;
  while (true) {
    if (maxTasks !== null && approved >= maxTasks) {
      console.log(`\nHit --max-tasks=${maxTasks} cap; stopping.`);
      break;
    }
    const next = findNextTask();
    if (!next) {
      console.log("\nNo more ready tasks. Dispatch loop complete.");
      break;
    }
    console.log(
      `\n--- Slice ${approved + 1}: ${next.epic} / ${next.story} ---`,
    );
    console.log(`    Bullet: ${next.bulletLine.trim()}`);

    const result = await runOneSlice({ sandcastleRoot, sandbox, agent, next });
    if (result.outcome === "approved") {
      applyApprovedState(next);
      approved++;
      continue;
    }
    if (result.outcome === "rejected-final") {
      console.error(
        `\n✗ Slice rejected after ${result.attempts} attempts. Stopping dispatch loop.`,
      );
      if (result.lastRejectNotes) {
        console.error(`Last rejection notes:\n${result.lastRejectNotes}`);
      }
      process.exit(1);
    }
    if (result.outcome === "blocked") {
      console.error(
        `\n✗ Implementer reported ${result.implJson?.status ?? "blocked"}. Stopping dispatch loop.`,
      );
      if (result.implJson?.notes) {
        console.error(`Implementer notes:\n${result.implJson.notes}`);
      }
      process.exit(1);
    }
    // outcome === "error"
    console.error(`\n✗ Slice errored: ${result.reason ?? "(no reason)"}`);
    process.exit(1);
  }
  console.log(`\nDispatched ${approved} slice(s).`);
}

/**
 * Explicit CLI entry. Exported so cli.mjs can dispatch into this module
 * without relying on a top-level side effect (which would also fire when
 * sibling work scripts import `resolveClaudeAuth`, etc.).
 *
 * Flags:
 *   --execute        run sandcastle (default: print plan only)
 *   --once           stop after one approved slice (default: loop until done)
 *   --max-tasks N    stop after N approved slices
 */
export async function runCli(args) {
  if (!args.includes("--execute")) {
    printPlan();
    return;
  }
  let maxTasks = null;
  if (args.includes("--once")) maxTasks = 1;
  const maxTasksFlagIdx = args.indexOf("--max-tasks");
  if (maxTasksFlagIdx !== -1) {
    const raw = args[maxTasksFlagIdx + 1];
    const parsed = Number(raw);
    if (!Number.isFinite(parsed) || parsed < 1) {
      console.error(`✗ --max-tasks expects a positive integer, got: ${raw}`);
      process.exit(2);
    }
    maxTasks = parsed;
  }
  await executeDispatch({ maxTasks });
}

// When invoked directly (`node scripts/work/dispatch.mjs ...`), run the CLI.
// When imported by cli.mjs or any sibling, do nothing — the caller decides.
const invokedDirectly = import.meta.url === `file://${process.argv[1]}`;
if (invokedDirectly) {
  runCli(process.argv.slice(2));
}