diff --git a/.env.example b/.env.example
index 2acf48b..9341a9d 100644
--- a/.env.example
+++ b/.env.example
@@ -88,14 +88,3 @@ CMS_URL=http://localhost:3001
# notes printed. Bump for tricky slices; lower for fast-feedback iteration.
#
# SANDCASTLE_MAX_ATTEMPTS=3
-
-# Session-resume token threshold. The orchestrator passes the prior
-# implementer's session ID into the next slice's run() via sandcastle's
-# `resumeSession` — the agent walks into task 2 already knowing where
-# helpers live, what the prior diff looked like, which gates passed.
-# When the prior iteration's total input tokens (input + cacheRead +
-# cacheCreation) crosses this threshold the orchestrator drops the
-# session and starts the next task fresh, avoiding mid-slice context
-# exhaustion. Default 140000 ≈ 70% of Sonnet 4.6's 200k window.
-#
-# SANDCASTLE_SESSION_TOKEN_RESET=140000
diff --git a/scripts/work/dispatch.mjs b/scripts/work/dispatch.mjs
index b10f56b..d5fe647 100644
--- a/scripts/work/dispatch.mjs
+++ b/scripts/work/dispatch.mjs
@@ -323,60 +323,19 @@ function explainSandcastleError(stage, e) {
console.error(" See docs/guides/runbook.md → 'Using Sandcastle' for setup.");
}
-/**
- * Sum of all input-token classes counted against the model's context
- * window for one iteration. `cacheReadInputTokens` and
- * `cacheCreationInputTokens` are cheap dollar-wise but still occupy the
- * window, so they all count for the reset-threshold check.
- */
-function totalInputTokens(usage) {
- if (!usage) return 0;
- return (
- (usage.inputTokens ?? 0) +
- (usage.cacheCreationInputTokens ?? 0) +
- (usage.cacheReadInputTokens ?? 0)
- );
-}
-
-/**
- * Detect Claude / sandcastle errors that indicate the agent's input
- * exceeded the model's context window. The orchestrator handles this by
- * dropping the resumed session and retrying once with a fresh session.
- */
-function isContextExhaustedError(e) {
- const msg = String(e?.message ?? e ?? "");
- return (
- /prompt is too long/i.test(msg) ||
- /context_length_exceeded/i.test(msg) ||
- /context window/i.test(msg) ||
- /too many tokens/i.test(msg)
- );
-}
-
/**
* Run one slice end-to-end: implementer + reviewer, with a fix-up cycle on
- * reject (capped at maxAttempts). The implementer is invoked with
- * `resumeSession` so its prior context (file reads, helper signatures,
- * gate output) carries forward; the reviewer always runs fresh so each
- * approve/reject decision is independent.
+ * reject (capped at maxAttempts). Each slice is an independent sandcastle
+ * session — sandcastle's `resumeSession` is incompatible with the
+ * multi-iteration budgets a TDD slice requires (applies to iteration 1 only).
*
- * Returns:
- * { outcome, attempts, sessionId, usage, ... }
- * where sessionId / usage are from the implementer's LAST iteration of
- * its LAST attempt (used by the caller for the next slice's resume +
- * threshold check). Outcome variants:
+ * Outcome variants:
* "approved" (implJson, reviewJson)
* "rejected-final" (lastRejectNotes)
* "blocked" (implJson)
* "error" (reason)
*/
-async function runOneSlice({
- sandcastleRoot,
- sandbox,
- agent,
- next,
- resumeSession,
-}) {
+async function runOneSlice({ sandcastleRoot, sandbox, agent, next }) {
const maxAttempts = Number(process.env.SANDCASTLE_MAX_ATTEMPTS ?? 3);
const implementerPrompt = path.join(SANDCASTLE_DIR, "implementer.prompt.md");
const reviewerPrompt = path.join(SANDCASTLE_DIR, "reviewer.prompt.md");
@@ -384,13 +343,6 @@ async function runOneSlice({
let rejectionNotes = null;
let lastRejectNotes = null;
let attempts = 0;
- // Across retries within this slice, the implementer resumes from the
- // most recent session — so attempt N sees attempt N-1's reasoning + the
- // reviewer's rejection notes. Caller's resumeSession (from the prior
- // slice) seeds the first attempt; null = fresh session.
- let currentSession = resumeSession ?? null;
- let lastUsage = null;
- let contextResetUsedThisSlice = false;
while (attempts < maxAttempts) {
attempts++;
@@ -414,55 +366,21 @@ async function runOneSlice({
// it, sandcastle re-invokes the model up to maxIterations even
// when the work is already done.
completionSignal: "COMPLETE",
- // Resume from the prior slice's (or prior attempt's) session so
- // the implementer doesn't re-explore the repo for every task in
- // the same story.
- resumeSession: currentSession ?? undefined,
});
} catch (e) {
- // Context-exhaustion safety net: drop the resumed session and retry
- // the same attempt fresh, exactly once per slice.
- if (
- isContextExhaustedError(e) &&
- currentSession &&
- !contextResetUsedThisSlice
- ) {
- console.log(
- "↺ Context window exhausted; dropping resumed session and retrying fresh.",
- );
- currentSession = null;
- contextResetUsedThisSlice = true;
- attempts--; // not counted against SANDCASTLE_MAX_ATTEMPTS
- continue;
- }
explainSandcastleError("Implementer", e);
- return {
- outcome: "error",
- attempts,
- reason: e.message,
- sessionId: currentSession,
- usage: lastUsage,
- };
+ return { outcome: "error", attempts, reason: e.message };
}
console.log(
`Implementer returned. Branch: ${implResult.branch}, Commits: ${implResult.commits.length}`,
);
- const finalIter = implResult.iterations[implResult.iterations.length - 1];
- currentSession = finalIter?.sessionId ?? currentSession;
- lastUsage = finalIter?.usage ?? lastUsage;
const implJson = parseAgentJson(implResult.stdout);
if (
implJson?.status === "blocked" ||
implJson?.status === "needs-clarification"
) {
- return {
- outcome: "blocked",
- attempts,
- implJson,
- sessionId: currentSession,
- usage: lastUsage,
- };
+ return { outcome: "blocked", attempts, implJson };
}
let diff = "";
@@ -489,30 +407,15 @@ async function runOneSlice({
maxIterations: Number(process.env.SANDCASTLE_REVIEWER_ITERATIONS ?? 10),
// See implementer comment above.
completionSignal: "COMPLETE",
- // Reviewer always runs fresh — each approve/reject decision should
- // be independent of prior tasks to keep the gate honest.
});
} catch (e) {
explainSandcastleError("Reviewer", e);
- return {
- outcome: "error",
- attempts,
- reason: e.message,
- sessionId: currentSession,
- usage: lastUsage,
- };
+ return { outcome: "error", attempts, reason: e.message };
}
const reviewJson = parseAgentJson(reviewResult.stdout);
if (reviewJson?.decision === "approve") {
- return {
- outcome: "approved",
- attempts,
- implJson,
- reviewJson,
- sessionId: currentSession,
- usage: lastUsage,
- };
+ return { outcome: "approved", attempts, implJson, reviewJson };
}
if (reviewJson?.decision === "reject") {
rejectionNotes =
@@ -527,17 +430,9 @@ async function runOneSlice({
outcome: "error",
attempts,
reason: `reviewer returned no parseable decision; stdout:\n${reviewResult.stdout}`,
- sessionId: currentSession,
- usage: lastUsage,
};
}
- return {
- outcome: "rejected-final",
- attempts,
- lastRejectNotes,
- sessionId: currentSession,
- usage: lastUsage,
- };
+ return { outcome: "rejected-final", attempts, lastRejectNotes };
}
/**
@@ -641,17 +536,7 @@ async function executeDispatch({ maxTasks }) {
const sandbox = dockerProvider(dockerOpts);
const agent = sandcastleRoot.claudeCode("claude-sonnet-4-6", agentOpts);
- // Session is carried forward across slices WITHIN the same story so the
- // implementer doesn't re-explore the repo each task. Reset on story
- // boundaries (different repo area, different context) and when the prior
- // session's input-token usage crosses the threshold (avoids hitting the
- // model's context-window limit mid-slice).
- const tokenResetThreshold = Number(
- process.env.SANDCASTLE_SESSION_TOKEN_RESET ?? 140000,
- );
let approved = 0;
- let currentStory = null;
- let currentSession = null;
while (true) {
if (maxTasks !== null && approved >= maxTasks) {
console.log(`\nHit --max-tasks=${maxTasks} cap; stopping.`);
@@ -662,40 +547,13 @@ async function executeDispatch({ maxTasks }) {
console.log("\nNo more ready tasks. Dispatch loop complete.");
break;
}
- if (next.story !== currentStory) {
- if (currentStory !== null && currentSession) {
- console.log(
- `\n(Story boundary — resetting implementer session from ${currentStory} → ${next.story})`,
- );
- }
- currentSession = null;
- currentStory = next.story;
- }
console.log(
`\n--- Slice ${approved + 1}: ${next.epic} / ${next.story} ---`,
);
console.log(` Bullet: ${next.bulletLine.trim()}`);
- if (currentSession) {
- console.log(` Resuming session: ${currentSession.slice(0, 12)}…`);
- }
- const result = await runOneSlice({
- sandcastleRoot,
- sandbox,
- agent,
- next,
- resumeSession: currentSession,
- });
+ const result = await runOneSlice({ sandcastleRoot, sandbox, agent, next });
if (result.outcome === "approved") {
- const usedTokens = totalInputTokens(result.usage);
- if (usedTokens > tokenResetThreshold) {
- console.log(
- `(Session at ${usedTokens} input tokens > threshold ${tokenResetThreshold} — resetting before next task)`,
- );
- currentSession = null;
- } else {
- currentSession = result.sessionId ?? null;
- }
applyApprovedState(next);
approved++;
continue;