diff --git a/.env.example b/.env.example
index 9341a9d..2acf48b 100644
--- a/.env.example
+++ b/.env.example
@@ -88,3 +88,14 @@ CMS_URL=http://localhost:3001
# notes printed. Bump for tricky slices; lower for fast-feedback iteration.
#
# SANDCASTLE_MAX_ATTEMPTS=3
+
+# Session-resume token threshold. The orchestrator passes the prior
+# implementer's session ID into the next slice's run() via sandcastle's
+# `resumeSession` — the agent walks into task 2 already knowing where
+# helpers live, what the prior diff looked like, which gates passed.
+# When the prior iteration's total input tokens (input + cacheRead +
+# cacheCreation) crosses this threshold the orchestrator drops the
+# session and starts the next task fresh, avoiding mid-slice context
+# exhaustion. Default 140000 ≈ 70% of Sonnet 4.6's 200k window.
+#
+# SANDCASTLE_SESSION_TOKEN_RESET=140000
diff --git a/scripts/work/dispatch.mjs b/scripts/work/dispatch.mjs
index d532d91..b10f56b 100644
--- a/scripts/work/dispatch.mjs
+++ b/scripts/work/dispatch.mjs
@@ -324,14 +324,59 @@ function explainSandcastleError(stage, e) {
}
/**
- * Run one slice end-to-end: implementer + reviewer, with a fix-up cycle on
- * reject (capped at maxAttempts). Returns:
- * { outcome: "approved", attempts, implJson, reviewJson }
- * { outcome: "rejected-final", attempts, lastRejectNotes }
- * { outcome: "blocked", attempts, implJson }
- * { outcome: "error", attempts, reason }
+ * Sum of all input-token classes counted against the model's context
+ * window for one iteration. `cacheReadInputTokens` and
+ * `cacheCreationInputTokens` are cheap dollar-wise but still occupy the
+ * window, so they all count for the reset-threshold check.
*/
-async function runOneSlice({ sandcastleRoot, sandbox, agent, next }) {
+function totalInputTokens(usage) {
+ if (!usage) return 0;
+ return (
+ (usage.inputTokens ?? 0) +
+ (usage.cacheCreationInputTokens ?? 0) +
+ (usage.cacheReadInputTokens ?? 0)
+ );
+}
+
+/**
+ * Detect Claude / sandcastle errors that indicate the agent's input
+ * exceeded the model's context window. The orchestrator handles this by
+ * dropping the resumed session and retrying once with a fresh session.
+ */
+function isContextExhaustedError(e) {
+ const msg = String(e?.message ?? e ?? "");
+ return (
+ /prompt is too long/i.test(msg) ||
+ /context_length_exceeded/i.test(msg) ||
+ /context window/i.test(msg) ||
+ /too many tokens/i.test(msg)
+ );
+}
+
+/**
+ * Run one slice end-to-end: implementer + reviewer, with a fix-up cycle on
+ * reject (capped at maxAttempts). The implementer is invoked with
+ * `resumeSession` so its prior context (file reads, helper signatures,
+ * gate output) carries forward; the reviewer always runs fresh so each
+ * approve/reject decision is independent.
+ *
+ * Returns:
+ * { outcome, attempts, sessionId, usage, ... }
+ * where sessionId / usage are from the implementer's LAST iteration of
+ * its LAST attempt (used by the caller for the next slice's resume +
+ * threshold check). Outcome variants:
+ * "approved" (implJson, reviewJson)
+ * "rejected-final" (lastRejectNotes)
+ * "blocked" (implJson)
+ * "error" (reason)
+ */
+async function runOneSlice({
+ sandcastleRoot,
+ sandbox,
+ agent,
+ next,
+ resumeSession,
+}) {
const maxAttempts = Number(process.env.SANDCASTLE_MAX_ATTEMPTS ?? 3);
const implementerPrompt = path.join(SANDCASTLE_DIR, "implementer.prompt.md");
const reviewerPrompt = path.join(SANDCASTLE_DIR, "reviewer.prompt.md");
@@ -339,6 +384,14 @@ async function runOneSlice({ sandcastleRoot, sandbox, agent, next }) {
let rejectionNotes = null;
let lastRejectNotes = null;
let attempts = 0;
+ // Across retries within this slice, the implementer resumes from the
+ // most recent session — so attempt N sees attempt N-1's reasoning + the
+ // reviewer's rejection notes. Caller's resumeSession (from the prior
+ // slice) seeds the first attempt; null = fresh session.
+ let currentSession = resumeSession ?? null;
+ let lastUsage = null;
+ let contextResetUsedThisSlice = false;
+
while (attempts < maxAttempts) {
attempts++;
const taskSpec = buildTaskSpec(next, rejectionNotes);
@@ -361,20 +414,55 @@ async function runOneSlice({ sandcastleRoot, sandbox, agent, next }) {
// it, sandcastle re-invokes the model up to maxIterations even
// when the work is already done.
completionSignal: "COMPLETE",
+ // Resume from the prior slice's (or prior attempt's) session so
+ // the implementer doesn't re-explore the repo for every task in
+ // the same story.
+ resumeSession: currentSession ?? undefined,
});
} catch (e) {
+ // Context-exhaustion safety net: drop the resumed session and retry
+ // the same attempt fresh, exactly once per slice.
+ if (
+ isContextExhaustedError(e) &&
+ currentSession &&
+ !contextResetUsedThisSlice
+ ) {
+ console.log(
+ "↺ Context window exhausted; dropping resumed session and retrying fresh.",
+ );
+ currentSession = null;
+ contextResetUsedThisSlice = true;
+ attempts--; // not counted against SANDCASTLE_MAX_ATTEMPTS
+ continue;
+ }
explainSandcastleError("Implementer", e);
- return { outcome: "error", attempts, reason: e.message };
+ return {
+ outcome: "error",
+ attempts,
+ reason: e.message,
+ sessionId: currentSession,
+ usage: lastUsage,
+ };
}
console.log(
`Implementer returned. Branch: ${implResult.branch}, Commits: ${implResult.commits.length}`,
);
+ const finalIter = implResult.iterations[implResult.iterations.length - 1];
+ currentSession = finalIter?.sessionId ?? currentSession;
+ lastUsage = finalIter?.usage ?? lastUsage;
+
const implJson = parseAgentJson(implResult.stdout);
if (
implJson?.status === "blocked" ||
implJson?.status === "needs-clarification"
) {
- return { outcome: "blocked", attempts, implJson };
+ return {
+ outcome: "blocked",
+ attempts,
+ implJson,
+ sessionId: currentSession,
+ usage: lastUsage,
+ };
}
let diff = "";
@@ -401,15 +489,30 @@ async function runOneSlice({ sandcastleRoot, sandbox, agent, next }) {
maxIterations: Number(process.env.SANDCASTLE_REVIEWER_ITERATIONS ?? 10),
// See implementer comment above.
completionSignal: "COMPLETE",
+ // Reviewer always runs fresh — each approve/reject decision should
+ // be independent of prior tasks to keep the gate honest.
});
} catch (e) {
explainSandcastleError("Reviewer", e);
- return { outcome: "error", attempts, reason: e.message };
+ return {
+ outcome: "error",
+ attempts,
+ reason: e.message,
+ sessionId: currentSession,
+ usage: lastUsage,
+ };
}
const reviewJson = parseAgentJson(reviewResult.stdout);
if (reviewJson?.decision === "approve") {
- return { outcome: "approved", attempts, implJson, reviewJson };
+ return {
+ outcome: "approved",
+ attempts,
+ implJson,
+ reviewJson,
+ sessionId: currentSession,
+ usage: lastUsage,
+ };
}
if (reviewJson?.decision === "reject") {
rejectionNotes =
@@ -420,14 +523,21 @@ async function runOneSlice({ sandcastleRoot, sandbox, agent, next }) {
);
continue;
}
- // Reviewer produced unparseable / non-decision output — treat as error.
return {
outcome: "error",
attempts,
reason: `reviewer returned no parseable decision; stdout:\n${reviewResult.stdout}`,
+ sessionId: currentSession,
+ usage: lastUsage,
};
}
- return { outcome: "rejected-final", attempts, lastRejectNotes };
+ return {
+ outcome: "rejected-final",
+ attempts,
+ lastRejectNotes,
+ sessionId: currentSession,
+ usage: lastUsage,
+ };
}
/**
@@ -531,7 +641,17 @@ async function executeDispatch({ maxTasks }) {
const sandbox = dockerProvider(dockerOpts);
const agent = sandcastleRoot.claudeCode("claude-sonnet-4-6", agentOpts);
+ // Session is carried forward across slices WITHIN the same story so the
+ // implementer doesn't re-explore the repo each task. Reset on story
+ // boundaries (different repo area, different context) and when the prior
+ // session's input-token usage crosses the threshold (avoids hitting the
+ // model's context-window limit mid-slice).
+ const tokenResetThreshold = Number(
+ process.env.SANDCASTLE_SESSION_TOKEN_RESET ?? 140000,
+ );
let approved = 0;
+ let currentStory = null;
+ let currentSession = null;
while (true) {
if (maxTasks !== null && approved >= maxTasks) {
console.log(`\nHit --max-tasks=${maxTasks} cap; stopping.`);
@@ -542,13 +662,40 @@ async function executeDispatch({ maxTasks }) {
console.log("\nNo more ready tasks. Dispatch loop complete.");
break;
}
+ if (next.story !== currentStory) {
+ if (currentStory !== null && currentSession) {
+ console.log(
+ `\n(Story boundary — resetting implementer session from ${currentStory} → ${next.story})`,
+ );
+ }
+ currentSession = null;
+ currentStory = next.story;
+ }
console.log(
`\n--- Slice ${approved + 1}: ${next.epic} / ${next.story} ---`,
);
console.log(` Bullet: ${next.bulletLine.trim()}`);
+ if (currentSession) {
+ console.log(` Resuming session: ${currentSession.slice(0, 12)}…`);
+ }
- const result = await runOneSlice({ sandcastleRoot, sandbox, agent, next });
+ const result = await runOneSlice({
+ sandcastleRoot,
+ sandbox,
+ agent,
+ next,
+ resumeSession: currentSession,
+ });
if (result.outcome === "approved") {
+ const usedTokens = totalInputTokens(result.usage);
+ if (usedTokens > tokenResetThreshold) {
+ console.log(
+ `(Session at ${usedTokens} input tokens > threshold ${tokenResetThreshold} — resetting before next task)`,
+ );
+ currentSession = null;
+ } else {
+ currentSession = result.sessionId ?? null;
+ }
applyApprovedState(next);
approved++;
continue;