From 9e7723f9a59bafac183a898c4ceacc6c33a46828 Mon Sep 17 00:00:00 2001
From: Danijel Martinek <danijel@fraqtal.xyz>
Date: Thu, 14 May 2026 11:48:32 +0200
Subject: [PATCH] fix(scripts): remove broken session-resume from dispatch loop

Sandcastle rejects `resumeSession` when `maxIterations > 1` with
"Resume applies to iteration 1 only; multi-iteration resume
semantics are not supported." Since a TDD slice needs the full
30-iteration budget, the session-resume path we shipped in d5c0120
is dead infrastructure that breaks dispatch mid-run.

Rip it out cleanly:
- runOneSlice drops the resumeSession param + the
  context-exhaustion safety net + sessionId/usage return fields
- executeDispatch drops the currentStory/currentSession bookkeeping
  and the token-reset threshold
- helpers totalInputTokens + isContextExhaustedError go (only used
  by the resume path)
- SANDCASTLE_SESSION_TOKEN_RESET removed from .env.example

Net: -153 lines. Each slice is again an independent sandcastle
session; token cost per slice goes up (each implementer
re-discovers context) but the multi-iteration TDD shape works.
A different cross-slice context-passing mechanism (e.g. a
story-level context summary injected into each task spec) is left
as future work.
---
 .env.example              |  11 ---
 scripts/work/dispatch.mjs | 164 +++-----------------------------------
 2 files changed, 11 insertions(+), 164 deletions(-)

diff --git a/.env.example b/.env.example
index 2acf48b..9341a9d 100644
--- a/.env.example
+++ b/.env.example
@@ -88,14 +88,3 @@ CMS_URL=http://localhost:3001
 # notes printed. Bump for tricky slices; lower for fast-feedback iteration.
 #
 # SANDCASTLE_MAX_ATTEMPTS=3
-
-# Session-resume token threshold. The orchestrator passes the prior
-# implementer's session ID into the next slice's run() via sandcastle's
-# `resumeSession` — the agent walks into task 2 already knowing where
-# helpers live, what the prior diff looked like, which gates passed.
-# When the prior iteration's total input tokens (input + cacheRead +
-# cacheCreation) crosses this threshold the orchestrator drops the
-# session and starts the next task fresh, avoiding mid-slice context
-# exhaustion. Default 140000 ≈ 70% of Sonnet 4.6's 200k window.
-#
-# SANDCASTLE_SESSION_TOKEN_RESET=140000
diff --git a/scripts/work/dispatch.mjs b/scripts/work/dispatch.mjs
index b10f56b..d5fe647 100644
--- a/scripts/work/dispatch.mjs
+++ b/scripts/work/dispatch.mjs
@@ -323,60 +323,19 @@ function explainSandcastleError(stage, e) {
   console.error("  See docs/guides/runbook.md → 'Using Sandcastle' for setup.");
 }
 
-/**
- * Sum of all input-token classes counted against the model's context
- * window for one iteration. `cacheReadInputTokens` and
- * `cacheCreationInputTokens` are cheap dollar-wise but still occupy the
- * window, so they all count for the reset-threshold check.
- */
-function totalInputTokens(usage) {
-  if (!usage) return 0;
-  return (
-    (usage.inputTokens ?? 0) +
-    (usage.cacheCreationInputTokens ?? 0) +
-    (usage.cacheReadInputTokens ?? 0)
-  );
-}
-
-/**
- * Detect Claude / sandcastle errors that indicate the agent's input
- * exceeded the model's context window. The orchestrator handles this by
- * dropping the resumed session and retrying once with a fresh session.
- */
-function isContextExhaustedError(e) {
-  const msg = String(e?.message ?? e ?? "");
-  return (
-    /prompt is too long/i.test(msg) ||
-    /context_length_exceeded/i.test(msg) ||
-    /context window/i.test(msg) ||
-    /too many tokens/i.test(msg)
-  );
-}
-
 /**
  * Run one slice end-to-end: implementer + reviewer, with a fix-up cycle on
- * reject (capped at maxAttempts). The implementer is invoked with
- * `resumeSession` so its prior context (file reads, helper signatures,
- * gate output) carries forward; the reviewer always runs fresh so each
- * approve/reject decision is independent.
+ * reject (capped at maxAttempts). Each slice is an independent sandcastle
+ * session — sandcastle's `resumeSession` is incompatible with the
+ * multi-iteration budgets a TDD slice requires (applies to iteration 1 only).
  *
- * Returns:
- *   { outcome, attempts, sessionId, usage, ... }
- * where sessionId / usage are from the implementer's LAST iteration of
- * its LAST attempt (used by the caller for the next slice's resume +
- * threshold check). Outcome variants:
+ * Outcome variants:
  *   "approved"       (implJson, reviewJson)
  *   "rejected-final" (lastRejectNotes)
  *   "blocked"        (implJson)
  *   "error"          (reason)
  */
-async function runOneSlice({
-  sandcastleRoot,
-  sandbox,
-  agent,
-  next,
-  resumeSession,
-}) {
+async function runOneSlice({ sandcastleRoot, sandbox, agent, next }) {
   const maxAttempts = Number(process.env.SANDCASTLE_MAX_ATTEMPTS ?? 3);
   const implementerPrompt = path.join(SANDCASTLE_DIR, "implementer.prompt.md");
   const reviewerPrompt = path.join(SANDCASTLE_DIR, "reviewer.prompt.md");
@@ -384,13 +343,6 @@ async function runOneSlice({
   let rejectionNotes = null;
   let lastRejectNotes = null;
   let attempts = 0;
-  // Across retries within this slice, the implementer resumes from the
-  // most recent session — so attempt N sees attempt N-1's reasoning + the
-  // reviewer's rejection notes. Caller's resumeSession (from the prior
-  // slice) seeds the first attempt; null = fresh session.
-  let currentSession = resumeSession ?? null;
-  let lastUsage = null;
-  let contextResetUsedThisSlice = false;
 
   while (attempts < maxAttempts) {
     attempts++;
@@ -414,55 +366,21 @@ async function runOneSlice({
         // it, sandcastle re-invokes the model up to maxIterations even
         // when the work is already done.
         completionSignal: "<promise>COMPLETE</promise>",
-        // Resume from the prior slice's (or prior attempt's) session so
-        // the implementer doesn't re-explore the repo for every task in
-        // the same story.
-        resumeSession: currentSession ?? undefined,
       });
     } catch (e) {
-      // Context-exhaustion safety net: drop the resumed session and retry
-      // the same attempt fresh, exactly once per slice.
-      if (
-        isContextExhaustedError(e) &&
-        currentSession &&
-        !contextResetUsedThisSlice
-      ) {
-        console.log(
-          "↺ Context window exhausted; dropping resumed session and retrying fresh.",
-        );
-        currentSession = null;
-        contextResetUsedThisSlice = true;
-        attempts--; // not counted against SANDCASTLE_MAX_ATTEMPTS
-        continue;
-      }
       explainSandcastleError("Implementer", e);
-      return {
-        outcome: "error",
-        attempts,
-        reason: e.message,
-        sessionId: currentSession,
-        usage: lastUsage,
-      };
+      return { outcome: "error", attempts, reason: e.message };
     }
     console.log(
       `Implementer returned. Branch: ${implResult.branch}, Commits: ${implResult.commits.length}`,
     );
-    const finalIter = implResult.iterations[implResult.iterations.length - 1];
-    currentSession = finalIter?.sessionId ?? currentSession;
-    lastUsage = finalIter?.usage ?? lastUsage;
 
     const implJson = parseAgentJson(implResult.stdout);
     if (
       implJson?.status === "blocked" ||
       implJson?.status === "needs-clarification"
     ) {
-      return {
-        outcome: "blocked",
-        attempts,
-        implJson,
-        sessionId: currentSession,
-        usage: lastUsage,
-      };
+      return { outcome: "blocked", attempts, implJson };
     }
 
     let diff = "";
@@ -489,30 +407,15 @@ async function runOneSlice({
         maxIterations: Number(process.env.SANDCASTLE_REVIEWER_ITERATIONS ?? 10),
         // See implementer comment above.
         completionSignal: "<promise>COMPLETE</promise>",
-        // Reviewer always runs fresh — each approve/reject decision should
-        // be independent of prior tasks to keep the gate honest.
       });
     } catch (e) {
       explainSandcastleError("Reviewer", e);
-      return {
-        outcome: "error",
-        attempts,
-        reason: e.message,
-        sessionId: currentSession,
-        usage: lastUsage,
-      };
+      return { outcome: "error", attempts, reason: e.message };
     }
     const reviewJson = parseAgentJson(reviewResult.stdout);
 
     if (reviewJson?.decision === "approve") {
-      return {
-        outcome: "approved",
-        attempts,
-        implJson,
-        reviewJson,
-        sessionId: currentSession,
-        usage: lastUsage,
-      };
+      return { outcome: "approved", attempts, implJson, reviewJson };
     }
     if (reviewJson?.decision === "reject") {
       rejectionNotes =
@@ -527,17 +430,9 @@ async function runOneSlice({
       outcome: "error",
       attempts,
       reason: `reviewer returned no parseable decision; stdout:\n${reviewResult.stdout}`,
-      sessionId: currentSession,
-      usage: lastUsage,
     };
   }
-  return {
-    outcome: "rejected-final",
-    attempts,
-    lastRejectNotes,
-    sessionId: currentSession,
-    usage: lastUsage,
-  };
+  return { outcome: "rejected-final", attempts, lastRejectNotes };
 }
 
 /**
@@ -641,17 +536,7 @@ async function executeDispatch({ maxTasks }) {
   const sandbox = dockerProvider(dockerOpts);
   const agent = sandcastleRoot.claudeCode("claude-sonnet-4-6", agentOpts);
 
-  // Session is carried forward across slices WITHIN the same story so the
-  // implementer doesn't re-explore the repo each task. Reset on story
-  // boundaries (different repo area, different context) and when the prior
-  // session's input-token usage crosses the threshold (avoids hitting the
-  // model's context-window limit mid-slice).
-  const tokenResetThreshold = Number(
-    process.env.SANDCASTLE_SESSION_TOKEN_RESET ?? 140000,
-  );
   let approved = 0;
-  let currentStory = null;
-  let currentSession = null;
   while (true) {
     if (maxTasks !== null && approved >= maxTasks) {
       console.log(`\nHit --max-tasks=${maxTasks} cap; stopping.`);
@@ -662,40 +547,13 @@ async function executeDispatch({ maxTasks }) {
       console.log("\nNo more ready tasks. Dispatch loop complete.");
       break;
     }
-    if (next.story !== currentStory) {
-      if (currentStory !== null && currentSession) {
-        console.log(
-          `\n(Story boundary — resetting implementer session from ${currentStory} → ${next.story})`,
-        );
-      }
-      currentSession = null;
-      currentStory = next.story;
-    }
     console.log(
       `\n--- Slice ${approved + 1}: ${next.epic} / ${next.story} ---`,
     );
     console.log(`    Bullet: ${next.bulletLine.trim()}`);
-    if (currentSession) {
-      console.log(`    Resuming session: ${currentSession.slice(0, 12)}…`);
-    }
 
-    const result = await runOneSlice({
-      sandcastleRoot,
-      sandbox,
-      agent,
-      next,
-      resumeSession: currentSession,
-    });
+    const result = await runOneSlice({ sandcastleRoot, sandbox, agent, next });
     if (result.outcome === "approved") {
-      const usedTokens = totalInputTokens(result.usage);
-      if (usedTokens > tokenResetThreshold) {
-        console.log(
-          `(Session at ${usedTokens} input tokens > threshold ${tokenResetThreshold} — resetting before next task)`,
-        );
-        currentSession = null;
-      } else {
-        currentSession = result.sessionId ?? null;
-      }
       applyApprovedState(next);
       approved++;
       continue;