import { describe, expect, test, beforeEach, afterEach, mock } from "bun:test"
import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs"
import { join } from "node:path"
import { tmpdir } from "node:os"
import { randomUUID } from "node:crypto"
import { createOpencodeClient } from "@opencode-ai/sdk"
import {
  writeBoulderState,
  clearBoulderState,
  readBoulderState,
} from "../../features/boulder-state"
import type { BoulderState } from "../../features/boulder-state"
import { _resetForTesting, registerAgentName, subagentSessions, updateSessionAgent } from "../../features/claude-code-session-state"
import { DEFAULT_PROMPT_DISPATCH_TIMEOUT_MS } from "../../shared/prompt-async-gate"
import type { AtlasHookOptions, PendingTaskRef } from "./types"
import { createAtlasHook } from "./index"
import { createToolExecuteAfterHandler } from "./tool-execute-after"
import { createToolExecuteBeforeHandler } from "./tool-execute-before"

const callerAgentBySession = new Map<string, string>()
type MockAtlasInput = Parameters<typeof createAtlasHook>[0] & {
  _promptMock: ReturnType<typeof mock>
  _sessionGetMock: ReturnType<typeof mock>
}

describe("atlas hook", () => {
  let TEST_DIR: string
  let OMO_DIR: string

  function createMockPluginInput(overrides?: {
    promptMock?: ReturnType<typeof mock>
    sessionGetMock?: ReturnType<typeof mock>
  }): MockAtlasInput {
    const promptMock = overrides?.promptMock ?? mock(() => Promise.resolve())
    const sessionGetMock = overrides?.sessionGetMock ?? mock(async ({ path }: { path: { id: string } }) => ({
      data: {
        id: path.id,
        parentID: path.id.startsWith("ses_") ? "session-1" : "main-session-123",
      },
    }))
    const client = createOpencodeClient({ baseUrl: "http://localhost" })
    Reflect.set(client.session, "get", sessionGetMock)
    Reflect.set(client.session, "prompt", promptMock)
    Reflect.set(client.session, "promptAsync", promptMock)

    return {
      directory: TEST_DIR,
      project: {} as Parameters<typeof createAtlasHook>[0]["project"],
      worktree: TEST_DIR,
      serverUrl: new URL("http://localhost"),
      $: {} as Parameters<typeof createAtlasHook>[0]["$"],
      client,
      _promptMock: promptMock,
      _sessionGetMock: sessionGetMock,
    }
  }

  function setupMessageStorage(sessionID: string, agent: string): void {
    callerAgentBySession.set(sessionID, agent)
  }

  function cleanupMessageStorage(sessionID: string): void {
    callerAgentBySession.delete(sessionID)
  }

  function createTestAtlasHook(
    input = createMockPluginInput(),
    options: Partial<AtlasHookOptions> = {},
  ): ReturnType<typeof createAtlasHook> {
    const resolvedOptions: AtlasHookOptions = {
      directory: TEST_DIR,
      idleSettleMs: 0,
      isCallerOrchestrator: async (sessionID) => callerAgentBySession.get(sessionID ?? "") === "atlas",
      ...options,
    }
    return createAtlasHook(input, resolvedOptions)
  }

  beforeEach(() => {
    _resetForTesting()
    registerAgentName("atlas")
    registerAgentName("sisyphus")
    TEST_DIR = join(tmpdir(), `atlas-test-${randomUUID()}`)
    OMO_DIR = join(TEST_DIR, ".omo")
    if (!existsSync(TEST_DIR)) {
      mkdirSync(TEST_DIR, { recursive: true })
    }
    if (!existsSync(OMO_DIR)) {
      mkdirSync(OMO_DIR, { recursive: true })
    }
    clearBoulderState(TEST_DIR)
    callerAgentBySession.clear()
  })

  afterEach(() => {
    _resetForTesting()
    callerAgentBySession.clear()
    clearBoulderState(TEST_DIR)
    if (existsSync(TEST_DIR)) {
      rmSync(TEST_DIR, { recursive: true, force: true })
    }
  })

  describe("tool.execute.after handler", () => {
    test("should handle undefined output gracefully (issue #1035)", async () => {
      // given - hook and undefined output (e.g., from /review command)
      const hook = createTestAtlasHook(createMockPluginInput())

      // when - calling with undefined output
      const result = await hook["tool.execute.after"](
        { tool: "task", sessionID: "session-123" },
        undefined
      )

      // then - returns undefined without throwing
      expect(result).toBeUndefined()
    })

    test("should ignore non-task tools", async () => {
      // given - hook and non-task tool
      const hook = createTestAtlasHook(createMockPluginInput())
      const output = {
        title: "Test Tool",
        output: "Original output",
        metadata: {},
      }

      // when
      await hook["tool.execute.after"](
        { tool: "other_tool", sessionID: "session-123" },
        output
      )

      // then - output unchanged
      expect(output.output).toBe("Original output")
    })

     test("should not transform when caller is not Atlas", async () => {
       // given - boulder state exists but caller agent in message storage is not Atlas
       const sessionID = "session-non-orchestrator-test"
       setupMessageStorage(sessionID, "other-agent")
      
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: ["session-1"],
        plan_name: "test-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const hook = createTestAtlasHook(createMockPluginInput())
      const output = {
        title: "Sisyphus Task",
        output: "Task completed successfully",
        metadata: {},
      }

      // when
      await hook["tool.execute.after"](
        { tool: "task", sessionID },
        output
      )

      // then - output unchanged because caller is not orchestrator
      expect(output.output).toBe("Task completed successfully")
      
      cleanupMessageStorage(sessionID)
    })

     test("should append standalone verification when no boulder state but caller is Atlas", async () => {
       // given - no boulder state, but caller is Atlas
       const sessionID = "session-no-boulder-test"
       setupMessageStorage(sessionID, "atlas")
      
      const hook = createTestAtlasHook(createMockPluginInput())
      const output = {
        title: "Sisyphus Task",
        output: "Task completed successfully",
        metadata: {},
      }

      // when
      await hook["tool.execute.after"](
        { tool: "task", sessionID },
        output
      )

      // then - standalone verification reminder appended
      expect(output.output).toContain("Task completed successfully")
      expect(output.output).toContain("LYING")
      expect(output.output).toContain("PHASE 1")
      
      cleanupMessageStorage(sessionID)
    })

     test("should transform output when caller is Atlas with boulder state", async () => {
       // given - Atlas caller with boulder state
       const sessionID = "session-transform-test"
       setupMessageStorage(sessionID, "atlas")
      
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: ["session-1"],
        plan_name: "test-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const hook = createTestAtlasHook(createMockPluginInput())
      const output = {
        title: "Sisyphus Task",
        output: "Task completed successfully",
        metadata: {},
      }

      // when
      await hook["tool.execute.after"](
        { tool: "task", sessionID },
        output
      )

      // then - output should be transformed (original output preserved for debugging)
      expect(output.output).toContain("Task completed successfully")
      expect(output.output).toContain("SUBAGENT WORK COMPLETED")
      expect(output.output).toContain("test-plan")
      expect(output.output).toContain("LYING")
      expect(output.output).toContain("PHASE 1")
      
      cleanupMessageStorage(sessionID)
    })

    test("should preserve metadata when transforming output for boulder orchestrator", async () => {
      // given - Atlas caller with boulder state and metadata containing sessionId
      const sessionID = "session-metadata-preserve-test"
      setupMessageStorage(sessionID, "atlas")

      const planPath = join(TEST_DIR, "metadata-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: ["session-1"],
        plan_name: "metadata-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const hook = createTestAtlasHook(createMockPluginInput())
      const output = {
        title: "Sisyphus Task",
        output: `Task completed

<task_metadata>
session_id: ses_subagent_abc
</task_metadata>`,
        metadata: {
          sessionId: "ses_subagent_abc",
          agent: "sisyphus-junior",
          category: "quick",
          truncated: false,
        } as Record<string, unknown>,
      }

      // when
      await hook["tool.execute.after"](
        { tool: "task", sessionID },
        output
      )

      // then - output is transformed but metadata is preserved
      expect(output.output).toContain("SUBAGENT WORK COMPLETED")
      expect(output.metadata.sessionId).toBe("ses_subagent_abc")
      expect(output.metadata.agent).toBe("sisyphus-junior")
      expect(output.metadata.category).toBe("quick")
      expect(output.metadata.truncated).toBe(false)

      cleanupMessageStorage(sessionID)
    })

    test("should preserve metadata when appending standalone verification reminder", async () => {
      // given - Atlas caller without boulder state, metadata containing sessionId
      const sessionID = "session-standalone-metadata-test"
      setupMessageStorage(sessionID, "atlas")

      const hook = createTestAtlasHook(createMockPluginInput())
      const output = {
        title: "Sisyphus Task",
        output: `Task completed

<task_metadata>
session_id: ses_standalone_def
</task_metadata>`,
        metadata: {
          sessionId: "ses_standalone_def",
          agent: "sisyphus-junior",
          model: { providerID: "openai", modelID: "gpt-5.4" },
          truncated: false,
        } as Record<string, unknown>,
      }

      // when
      await hook["tool.execute.after"](
        { tool: "task", sessionID },
        output
      )

      // then - standalone verification appended but metadata preserved
      expect(output.output).toContain("LYING")
      expect(output.metadata.sessionId).toBe("ses_standalone_def")
      expect(output.metadata.agent).toBe("sisyphus-junior")
      expect(output.metadata.model).toEqual({ providerID: "openai", modelID: "gpt-5.4" })
      expect(output.metadata.truncated).toBe(false)

      cleanupMessageStorage(sessionID)
    })

     test("should still transform when plan is complete (shows progress)", async () => {
       // given - boulder state with complete plan, Atlas caller
       const sessionID = "session-complete-plan-test"
       setupMessageStorage(sessionID, "atlas")
      
      const planPath = join(TEST_DIR, "complete-plan.md")
      writeFileSync(planPath, "# Plan\n- [x] Task 1\n- [x] Task 2")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: ["session-1"],
        plan_name: "complete-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const hook = createTestAtlasHook(createMockPluginInput())
      const output = {
        title: "Sisyphus Task",
        output: "Original output",
        metadata: {},
      }

      // when
      await hook["tool.execute.after"](
        { tool: "task", sessionID },
        output
      )

      // then - output transformed even when complete (shows 2/2 done)
      expect(output.output).toContain("SUBAGENT WORK COMPLETED")
      expect(output.output).toContain("2/2 done")
      expect(output.output).toContain("0 remaining")
      
      cleanupMessageStorage(sessionID)
    })

     test("should not append unrelated current session to boulder state if not already tracked", async () => {
       // given - boulder state without session-append-test, Atlas caller
       const sessionID = "session-append-test"
       setupMessageStorage(sessionID, "atlas")
      
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: ["session-1"],
        plan_name: "test-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const hook = createTestAtlasHook(createMockPluginInput())
      const output = {
        title: "Sisyphus Task",
        output: "Task output",
        metadata: {},
      }

      // when
      await hook["tool.execute.after"](
        { tool: "task", sessionID },
        output
      )

      // then - unrelated current session should not be absorbed into boulder
      const updatedState = readBoulderState(TEST_DIR)
      expect(updatedState?.session_ids).not.toContain(sessionID)
      
      cleanupMessageStorage(sessionID)
    })

     test("should not append current session when session lookup fails during append decision", async () => {
       // given - boulder state without session-get-failure-test, Atlas caller, and session lookup failure
       const sessionID = "session-get-failure-test"
       setupMessageStorage(sessionID, "atlas")

      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: ["session-1"],
        plan_name: "test-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const hook = createTestAtlasHook(createMockPluginInput({
        sessionGetMock: mock(async () => {
          throw new Error("session lookup failed")
        }),
      }))
      const output = {
        title: "Sisyphus Task",
        output: "Task output",
        metadata: {},
      }

      // when
      await hook["tool.execute.after"](
        { tool: "task", sessionID },
        output,
      )

      // then
      const updatedState = readBoulderState(TEST_DIR)
      expect(updatedState?.session_ids).not.toContain(sessionID)

      cleanupMessageStorage(sessionID)
    })

     test("should not duplicate existing session ID", async () => {
       // given - boulder state already has session-dup-test, Atlas caller
       const sessionID = "session-dup-test"
       setupMessageStorage(sessionID, "atlas")
      
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [sessionID],
        plan_name: "test-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const hook = createTestAtlasHook(createMockPluginInput())
      const output = {
        title: "Sisyphus Task",
        output: "Task output",
        metadata: {},
      }

      // when
      await hook["tool.execute.after"](
        { tool: "task", sessionID },
        output
      )

      // then - should still have only one sessionID
      const updatedState = readBoulderState(TEST_DIR)
      const count = updatedState?.session_ids.filter((id) => id === sessionID).length
      expect(count).toBe(1)
      
      cleanupMessageStorage(sessionID)
    })

     test("should include boulder.json path and notepad path in transformed output", async () => {
       // given - boulder state, Atlas caller
       const sessionID = "session-path-test"
       setupMessageStorage(sessionID, "atlas")
      
      const planPath = join(TEST_DIR, "my-feature.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2\n- [x] Task 3")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: ["session-1"],
        plan_name: "my-feature",
      }
      writeBoulderState(TEST_DIR, state)

      const hook = createTestAtlasHook(createMockPluginInput())
      const output = {
        title: "Sisyphus Task",
        output: "Task completed",
        metadata: {},
      }

      // when
      await hook["tool.execute.after"](
        { tool: "task", sessionID },
        output
      )

      // then - output should contain plan name and progress
      expect(output.output).toContain("my-feature")
      expect(output.output).toContain("1/3 done")
      expect(output.output).toContain("2 remaining")
      
      cleanupMessageStorage(sessionID)
    })

     test("should include session_id and checkbox instructions in reminder", async () => {
       // given - boulder state, Atlas caller
       const sessionID = "session-resume-test"
       setupMessageStorage(sessionID, "atlas")
      
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: ["session-1"],
        plan_name: "test-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const hook = createTestAtlasHook(createMockPluginInput())
      const output = {
        title: "Sisyphus Task",
        output: "Task completed",
        metadata: {},
      }

      // when
      await hook["tool.execute.after"](
        { tool: "task", sessionID },
        output
      )

      // then - should include verification instructions
      expect(output.output).toContain("LYING")
     expect(output.output).toContain("PHASE 1")
     expect(output.output).toContain("PHASE 2")
      
      cleanupMessageStorage(sessionID)
    })

    test("should clean pending task refs when a task returns background launch output", async () => {
      // given - direct handlers with shared pending maps
      const sessionID = "session-bg-launch-cleanup-test"
      setupMessageStorage(sessionID, "atlas")

      const planPath = join(TEST_DIR, "background-cleanup-plan.md")
      writeFileSync(planPath, `# Plan

## TODOs
- [ ] 1. Implement auth flow
`)
      writeBoulderState(TEST_DIR, {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: ["session-1"],
        plan_name: "background-cleanup-plan",
      })

      const pendingFilePaths = new Map<string, string>()
      const pendingTaskRefs = new Map<string, PendingTaskRef>()
      const beforeHandler = createToolExecuteBeforeHandler({
        ctx: createMockPluginInput(),
        pendingFilePaths,
        pendingTaskRefs,
        isCallerOrchestrator: async (id) => callerAgentBySession.get(id ?? "") === "atlas",
      })
      const afterHandler = createToolExecuteAfterHandler({
        ctx: createMockPluginInput(),
        pendingFilePaths,
        pendingTaskRefs,
        autoCommit: true,
        getState: () => ({ promptFailureCount: 0 }),
        isCallerOrchestrator: async (id) => callerAgentBySession.get(id ?? "") === "atlas",
      })

      // when - the task is captured before execution
      await beforeHandler(
        { tool: "task", sessionID, callID: "call-bg-launch" },
        { args: { prompt: "Implement auth flow" } }
      )
      expect(pendingTaskRefs.size).toBe(1)

      // and the task returns a background launch result
      await afterHandler(
        { tool: "task", sessionID, callID: "call-bg-launch" },
        {
          title: "Sisyphus Task",
          output: "Background task launched.\n\nSession ID: ses_bg_12345",
          metadata: {},
        }
      )

      // then - the pending task ref is still cleaned up
      expect(pendingTaskRefs.size).toBe(0)

      cleanupMessageStorage(sessionID)
    })

     test("should persist preferred subagent session for the current top-level task", async () => {
       // given - boulder state with a current top-level task, Atlas caller
       const sessionID = "session-task-session-track-test"
       setupMessageStorage(sessionID, "atlas")

      const planPath = join(TEST_DIR, "task-session-plan.md")
      writeFileSync(planPath, `# Plan

## TODOs
- [ ] 1. Implement auth flow
  - [ ] nested acceptance checkbox
`)

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: ["session-1"],
        plan_name: "task-session-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const hook = createTestAtlasHook(createMockPluginInput())
      const output = {
        title: "Sisyphus Task",
        output: `Task completed successfully

<task_metadata>
session_id: ses_auth_flow_123
</task_metadata>`,
        metadata: {
          agent: "sisyphus-junior",
          category: "deep",
        },
      }

      // when
      await hook["tool.execute.after"](
        { tool: "task", sessionID },
        output
      )

      // then
     const updatedState = readBoulderState(TEST_DIR)
      expect(updatedState?.task_sessions?.["todo:1"]?.session_id).toBe("ses_auth_flow_123")
      expect(updatedState?.task_sessions?.["todo:1"]?.task_title).toBe("Implement auth flow")
      expect(updatedState?.task_sessions?.["todo:1"]?.agent).toBe("sisyphus-junior")
      expect(updatedState?.task_sessions?.["todo:1"]?.category).toBe("deep")

      cleanupMessageStorage(sessionID)
    })

     test("should preserve the delegated task key even after the plan advances to the next task", async () => {
       // given - Atlas caller starts task 1, then the plan advances before task output is processed
       const sessionID = "session-stable-task-key-test"
       setupMessageStorage(sessionID, "atlas")

      const planPath = join(TEST_DIR, "stable-task-key-plan.md")
      writeFileSync(planPath, `# Plan

## TODOs
- [ ] 1. Implement auth flow
- [ ] 2. Add API validation
`)

      writeBoulderState(TEST_DIR, {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: ["session-1"],
        plan_name: "stable-task-key-plan",
      })

      const hook = createTestAtlasHook(createMockPluginInput())

      // when - Atlas delegates task 1
      await hook["tool.execute.before"](
        { tool: "task", sessionID, callID: "call-task-1" },
        { args: { prompt: "Implement auth flow" } }
      )

      // and the plan is advanced before the task output is processed
      writeFileSync(planPath, `# Plan

## TODOs
- [x] 1. Implement auth flow
- [ ] 2. Add API validation
`)

      await hook["tool.execute.after"](
        { tool: "task", sessionID, callID: "call-task-1" },
        {
          title: "Sisyphus Task",
          output: `Task completed successfully

<task_metadata>
session_id: ses_auth_flow_123
</task_metadata>`,
          metadata: {
            agent: "sisyphus-junior",
            category: "deep",
          },
        }
      )

      // then - the completed task session is still recorded against task 1, not task 2
     const updatedState = readBoulderState(TEST_DIR)
      expect(updatedState?.task_sessions?.["todo:1"]?.session_id).toBe("ses_auth_flow_123")
      expect(updatedState?.task_sessions?.["todo:2"]).toBeUndefined()

      cleanupMessageStorage(sessionID)
    })

     test("should not overwrite the current task mapping when task() explicitly resumes an older session", async () => {
       // given - current plan is on task 2, but Atlas explicitly resumes an older session for a previous task
       const sessionID = "session-cross-task-resume-test"
       setupMessageStorage(sessionID, "atlas")

      const planPath = join(TEST_DIR, "cross-task-resume-plan.md")
      writeFileSync(planPath, `# Plan

## TODOs
- [x] 1. Implement auth flow
- [ ] 2. Add API validation
`)

      writeBoulderState(TEST_DIR, {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: ["session-1"],
        plan_name: "cross-task-resume-plan",
      })

      const hook = createTestAtlasHook(createMockPluginInput())

      // when - Atlas resumes an explicit prior session
      await hook["tool.execute.before"](
        { tool: "task", sessionID, callID: "call-resume-old-task" },
        { args: { prompt: "Follow up on previous task", session_id: "ses_old_task_111" } }
      )

      const output = {
        title: "Sisyphus Task",
        output: `Task continued successfully

<task_metadata>
session_id: ses_old_task_111
</task_metadata>`,
        metadata: {
          agent: "sisyphus-junior",
          category: "deep",
        },
      }
      await hook["tool.execute.after"](
        { tool: "task", sessionID, callID: "call-resume-old-task" },
        output
      )

      // then - Atlas does not poison task 2's preferred session mapping
      const updatedState = readBoulderState(TEST_DIR)
      expect(updatedState?.task_sessions?.["todo:2"]).toBeUndefined()
      expect(output.output).not.toContain('task(session_id="ses_old_task_111"')

      cleanupMessageStorage(sessionID)
    })

    test("should not reuse an explicitly resumed session id in completion reminders", async () => {
      // given - current plan is on task 2 with an existing tracked session
      const sessionID = "session-explicit-resume-reminder-test"
      setupMessageStorage(sessionID, "atlas")

      const planPath = join(TEST_DIR, "explicit-resume-reminder-plan.md")
      writeFileSync(planPath, `# Plan

## TODOs
- [x] 1. Implement auth flow
- [ ] 2. Add API validation
`)

      writeBoulderState(TEST_DIR, {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: ["session-1"],
        plan_name: "explicit-resume-reminder-plan",
        task_sessions: {
          "todo:2": {
            task_key: "todo:2",
            task_label: "2",
            task_title: "Add API validation",
            session_id: "ses_tracked_current_task",
            updated_at: "2026-01-02T10:00:00Z",
          },
        },
      })

      const hook = createTestAtlasHook(createMockPluginInput())
      const output = {
        title: "Sisyphus Task",
        output: `Task continued successfully

<task_metadata>
session_id: ses_old_task_111
</task_metadata>`,
        metadata: {},
      }

      // when
      await hook["tool.execute.before"](
        { tool: "task", sessionID, callID: "call-explicit-resume-reminder" },
        { args: { prompt: "Follow up on previous task", session_id: "ses_old_task_111" } }
      )
      await hook["tool.execute.after"](
        { tool: "task", sessionID, callID: "call-explicit-resume-reminder" },
        output
      )

      // then
      expect(output.output).not.toContain('task(session_id="ses_old_task_111"')
      expect(output.output).toContain("ses_tracked_current_task")

      cleanupMessageStorage(sessionID)
    })

    test("should skip persistence when multiple in-flight task calls claim the same top-level task", async () => {
      // given
      const sessionID = "session-parallel-task-collision-test"
      setupMessageStorage(sessionID, "atlas")

      const planPath = join(TEST_DIR, "parallel-task-collision-plan.md")
      writeFileSync(planPath, `# Plan

## TODOs
- [ ] 1. Implement auth flow
- [ ] 2. Add API validation
`)

      writeBoulderState(TEST_DIR, {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: ["session-1"],
        plan_name: "parallel-task-collision-plan",
      })

      const pendingFilePaths = new Map<string, string>()
      const pendingTaskRefs = new Map<string, PendingTaskRef>()
      const beforeHandler = createToolExecuteBeforeHandler({
        ctx: createMockPluginInput(),
        pendingFilePaths,
        pendingTaskRefs,
        isCallerOrchestrator: async (id) => callerAgentBySession.get(id ?? "") === "atlas",
      })
      const afterHandler = createToolExecuteAfterHandler({
        ctx: createMockPluginInput(),
        pendingFilePaths,
        pendingTaskRefs,
        autoCommit: true,
        getState: () => ({ promptFailureCount: 0 }),
        isCallerOrchestrator: async (id) => callerAgentBySession.get(id ?? "") === "atlas",
      })

      // when - two task() calls start before either one completes
      await beforeHandler(
        { tool: "task", sessionID, callID: "call-task-first" },
        { args: { prompt: "Implement auth flow part 1" } }
      )
      await beforeHandler(
        { tool: "task", sessionID, callID: "call-task-second" },
        { args: { prompt: "Implement auth flow part 2" } }
      )

      const secondPendingTaskRef = pendingTaskRefs.get("call-task-second")

      await afterHandler(
        { tool: "task", sessionID, callID: "call-task-second" },
        {
          title: "Sisyphus Task",
          output: `Task completed successfully

<task_metadata>
session_id: ses_parallel_collision_222
</task_metadata>`,
          metadata: {},
        }
      )

      // then
      expect(secondPendingTaskRef).toEqual({
        kind: "skip",
        reason: "ambiguous_task_key",
        task: {
          key: "todo:1",
          label: "1",
          title: "Implement auth flow",
        },
      })
      const updatedState = readBoulderState(TEST_DIR)
      expect(updatedState?.task_sessions?.["todo:1"]).toBeUndefined()

      cleanupMessageStorage(sessionID)
    })

    test("should ignore extracted session ids that are outside the active boulder lineage", async () => {
      // given
      const sessionID = "session-untrusted-session-id-test"
      setupMessageStorage(sessionID, "atlas")

      const planPath = join(TEST_DIR, "untrusted-session-id-plan.md")
      writeFileSync(planPath, `# Plan

## TODOs
- [ ] 1. Implement auth flow
`)

      writeBoulderState(TEST_DIR, {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: ["session-1"],
        plan_name: "untrusted-session-id-plan",
      })

      const hook = createTestAtlasHook(createMockPluginInput({
        sessionGetMock: mock(async ({ path }: { path: { id: string } }) => ({
          data: {
            id: path.id,
            parentID: path.id === "ses_untrusted_999" ? "session-outside-lineage" : "main-session-123",
          },
        })),
      }))
      const output = {
        title: "Sisyphus Task",
        output: `Task completed successfully

<task_metadata>
session_id: ses_untrusted_999
</task_metadata>`,
        metadata: {},
      }

      // when
      await hook["tool.execute.after"](
        { tool: "task", sessionID },
        output
      )

      // then
      const updatedState = readBoulderState(TEST_DIR)
      expect(updatedState?.task_sessions?.["todo:1"]).toBeUndefined()
      expect(output.output).not.toContain('task(session_id="ses_untrusted_999"')
      expect(output.output).not.toContain('task(task_id="ses_untrusted_999"')
      expect(output.output).toContain('task(task_id="<session_id>"')

      cleanupMessageStorage(sessionID)
    })

    describe("completion gate output ordering", () => {
      const COMPLETION_GATE_SESSION = "completion-gate-order-test"

      beforeEach(() => {
        setupMessageStorage(COMPLETION_GATE_SESSION, "atlas")
      })

      afterEach(() => {
        cleanupMessageStorage(COMPLETION_GATE_SESSION)
      })

      test("should include completion gate before Subagent Response in transformed boulder output", async () => {
        // given - Atlas caller with boulder state
        const planPath = join(TEST_DIR, "test-plan.md")
        writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2")

        const state: BoulderState = {
          active_plan: planPath,
          started_at: "2026-01-02T10:00:00Z",
          session_ids: ["session-1"],
          plan_name: "test-plan",
        }
        writeBoulderState(TEST_DIR, state)

        const hook = createTestAtlasHook(createMockPluginInput())
        const output = {
          title: "Sisyphus Task",
          output: "Task completed successfully",
          metadata: {},
        }

        // when
        await hook["tool.execute.after"](
          { tool: "task", sessionID: COMPLETION_GATE_SESSION },
          output
        )

        // then - completion gate should appear BEFORE Subagent Response
        const subagentResponseIndex = output.output.indexOf("**Subagent Response:**")
        const completionGateIndex = output.output.indexOf("COMPLETION GATE")

        expect(completionGateIndex).toBeGreaterThanOrEqual(0)
        expect(subagentResponseIndex).toBeGreaterThanOrEqual(0)
        expect(completionGateIndex).toBeLessThan(subagentResponseIndex)
      })

      test("should include completion gate before verification phase text", async () => {
        // given - Atlas caller with boulder state
        const planPath = join(TEST_DIR, "test-plan.md")
        writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2")

        const state: BoulderState = {
          active_plan: planPath,
          started_at: "2026-01-02T10:00:00Z",
          session_ids: ["session-1"],
          plan_name: "test-plan",
        }
        writeBoulderState(TEST_DIR, state)

        const hook = createTestAtlasHook(createMockPluginInput())
        const output = {
          title: "Sisyphus Task",
          output: "Task completed successfully",
          metadata: {},
        }

        // when
        await hook["tool.execute.after"](
          { tool: "task", sessionID: COMPLETION_GATE_SESSION },
          output
        )

        // then - completion gate should appear BEFORE verification phase text
        const completionGateIndex = output.output.indexOf("COMPLETION GATE")
        const lyingIndex = output.output.indexOf("LYING")
        const phase1Index = output.output.indexOf("PHASE 1")

        expect(completionGateIndex).toBeGreaterThanOrEqual(0)
        expect(lyingIndex).toBeGreaterThanOrEqual(0)
        expect(completionGateIndex).toBeLessThan(lyingIndex)
        if (phase1Index !== -1) {
          expect(completionGateIndex).toBeLessThan(phase1Index)
        }
      })

      test("should not contain old STEP 7 MARK COMPLETION IN PLAN FILE text", async () => {
        // given - Atlas caller with boulder state
        const planPath = join(TEST_DIR, "test-plan.md")
        writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2")

        const state: BoulderState = {
          active_plan: planPath,
          started_at: "2026-01-02T10:00:00Z",
          session_ids: ["session-1"],
          plan_name: "test-plan",
        }
        writeBoulderState(TEST_DIR, state)

        const hook = createTestAtlasHook(createMockPluginInput())
        const output = {
          title: "Sisyphus Task",
          output: "Task completed successfully",
          metadata: {},
        }

        // when
        await hook["tool.execute.after"](
          { tool: "task", sessionID: COMPLETION_GATE_SESSION },
          output
        )

        // then - old STEP 7 MARK COMPLETION IN PLAN FILE should be absent
        expect(output.output).not.toContain("STEP 7: MARK COMPLETION IN PLAN FILE")
        expect(output.output).not.toContain("MARK COMPLETION IN PLAN FILE")
      })
    })

    describe("Write/Edit tool direct work reminder", () => {
      const ORCHESTRATOR_SESSION = "orchestrator-write-test"

       beforeEach(() => {
         setupMessageStorage(ORCHESTRATOR_SESSION, "atlas")
       })

      afterEach(() => {
        cleanupMessageStorage(ORCHESTRATOR_SESSION)
      })

      test("should append delegation reminder when orchestrator writes outside .omo/", async () => {
        // given
        const hook = createTestAtlasHook(createMockPluginInput())
        const output = {
          title: "Write",
          output: "File written successfully",
          metadata: { filePath: "/path/to/code.ts" },
        }

        // when
        await hook["tool.execute.after"](
          { tool: "Write", sessionID: ORCHESTRATOR_SESSION },
          output
        )

        // then
        expect(output.output).toContain("DELEGATION REQUIRED")
        expect(output.output).toContain("task")
        expect(output.output).toContain("task")
      })

      test("should append delegation reminder when orchestrator edits outside .omo/", async () => {
        // given
        const hook = createTestAtlasHook(createMockPluginInput())
        const output = {
          title: "Edit",
          output: "File edited successfully",
          metadata: { filePath: "/src/components/button.tsx" },
        }

        // when
        await hook["tool.execute.after"](
          { tool: "Edit", sessionID: ORCHESTRATOR_SESSION },
          output
        )

        // then
        expect(output.output).toContain("DELEGATION REQUIRED")
      })

      test("should NOT append reminder when orchestrator writes inside .omo/", async () => {
        // given
        const hook = createTestAtlasHook(createMockPluginInput())
        const originalOutput = "File written successfully"
        const output = {
          title: "Write",
          output: originalOutput,
          metadata: { filePath: "/project/.omo/plans/work-plan.md" },
        }

        // when
        await hook["tool.execute.after"](
          { tool: "Write", sessionID: ORCHESTRATOR_SESSION },
          output
        )

        // then
        expect(output.output).toBe(originalOutput)
        expect(output.output).not.toContain("DELEGATION REQUIRED")
      })

      test("should NOT append reminder when non-orchestrator writes outside .omo/", async () => {
        // given
        const nonOrchestratorSession = "non-orchestrator-session"
        setupMessageStorage(nonOrchestratorSession, "sisyphus-junior")
        
        const hook = createTestAtlasHook(createMockPluginInput())
        const originalOutput = "File written successfully"
        const output = {
          title: "Write",
          output: originalOutput,
          metadata: { filePath: "/path/to/code.ts" },
        }

        // when
        await hook["tool.execute.after"](
          { tool: "Write", sessionID: nonOrchestratorSession },
          output
        )

        // then
        expect(output.output).toBe(originalOutput)
        expect(output.output).not.toContain("DELEGATION REQUIRED")
        
        cleanupMessageStorage(nonOrchestratorSession)
      })

      test("should NOT append reminder for read-only tools", async () => {
        // given
        const hook = createTestAtlasHook(createMockPluginInput())
        const originalOutput = "File content"
        const output = {
          title: "Read",
          output: originalOutput,
          metadata: { filePath: "/path/to/code.ts" },
        }

        // when
        await hook["tool.execute.after"](
          { tool: "Read", sessionID: ORCHESTRATOR_SESSION },
          output
        )

        // then
        expect(output.output).toBe(originalOutput)
      })

      test("should handle missing filePath gracefully", async () => {
        // given
        const hook = createTestAtlasHook(createMockPluginInput())
        const originalOutput = "File written successfully"
        const output = {
          title: "Write",
          output: originalOutput,
          metadata: {},
        }

        // when
        await hook["tool.execute.after"](
          { tool: "Write", sessionID: ORCHESTRATOR_SESSION },
          output
        )

        // then
        expect(output.output).toBe(originalOutput)
      })

      describe("cross-platform path validation (Windows support)", () => {
        test("should NOT append reminder when orchestrator writes inside .omo\\ (Windows backslash)", async () => {
          // given
          const hook = createTestAtlasHook(createMockPluginInput())
          const originalOutput = "File written successfully"
          const output = {
            title: "Write",
            output: originalOutput,
            metadata: { filePath: ".omo\\plans\\work-plan.md" },
          }

          // when
          await hook["tool.execute.after"](
            { tool: "Write", sessionID: ORCHESTRATOR_SESSION },
            output
          )

          // then
          expect(output.output).toBe(originalOutput)
          expect(output.output).not.toContain("DELEGATION REQUIRED")
        })

        test("should NOT append reminder when orchestrator writes inside .omo with mixed separators", async () => {
          // given
          const hook = createTestAtlasHook(createMockPluginInput())
          const originalOutput = "File written successfully"
          const output = {
            title: "Write",
            output: originalOutput,
            metadata: { filePath: ".omo\\plans/work-plan.md" },
          }

          // when
          await hook["tool.execute.after"](
            { tool: "Write", sessionID: ORCHESTRATOR_SESSION },
            output
          )

          // then
          expect(output.output).toBe(originalOutput)
          expect(output.output).not.toContain("DELEGATION REQUIRED")
        })

        test("should NOT append reminder for absolute Windows path inside .omo\\", async () => {
          // given
          const hook = createTestAtlasHook(createMockPluginInput())
          const originalOutput = "File written successfully"
          const output = {
            title: "Write",
            output: originalOutput,
            metadata: { filePath: "C:\\Users\\test\\project\\.omo\\plans\\x.md" },
          }

          // when
          await hook["tool.execute.after"](
            { tool: "Write", sessionID: ORCHESTRATOR_SESSION },
            output
          )

          // then
          expect(output.output).toBe(originalOutput)
          expect(output.output).not.toContain("DELEGATION REQUIRED")
        })

        test("should append reminder for Windows path outside .omo\\", async () => {
          // given
          const hook = createTestAtlasHook(createMockPluginInput())
          const output = {
            title: "Write",
            output: "File written successfully",
            metadata: { filePath: "C:\\Users\\test\\project\\src\\code.ts" },
          }

          // when
          await hook["tool.execute.after"](
            { tool: "Write", sessionID: ORCHESTRATOR_SESSION },
            output
          )

          // then
          expect(output.output).toContain("DELEGATION REQUIRED")
        })
      })
    })
  })

  describe("session.idle handler (boulder continuation)", () => {
    const MAIN_SESSION_ID = "main-session-123"

    async function flushMicrotasks(): Promise<void> {
      await Promise.resolve()
      await Promise.resolve()
    }

     beforeEach(() => {
       _resetForTesting()
       registerAgentName("atlas")
       registerAgentName("sisyphus")
        subagentSessions.clear()
        setupMessageStorage(MAIN_SESSION_ID, "atlas")
      })

    afterEach(() => {
      cleanupMessageStorage(MAIN_SESSION_ID)
      _resetForTesting()
    })

    test("should inject continuation when boulder has incomplete tasks", async () => {
      // given - boulder state with incomplete plan
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2\n- [ ] Task 3")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "test-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const mockInput = createMockPluginInput()
      const hook = createTestAtlasHook(mockInput)

      // when
      await hook.handler({
        event: {
          type: "session.idle",
          properties: { sessionID: MAIN_SESSION_ID },
        },
      })

      // then - should call prompt with continuation
      expect(mockInput._promptMock).toHaveBeenCalled()
      const callArgs = mockInput._promptMock.mock.calls[0][0]
      expect(callArgs.path.id).toBe(MAIN_SESSION_ID)
      expect(callArgs.body.parts[0].text).toContain("incomplete tasks")
      expect(callArgs.body.parts[0].text).toContain("2 remaining")
    })

    test("should inject continuation when idle event carries session id in info", async () => {
      // given - boulder state with incomplete plan and nested session event shape
      const planPath = join(TEST_DIR, "test-plan-info-idle.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2\n- [ ] Task 3")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "test-plan-info-idle",
      }
      writeBoulderState(TEST_DIR, state)

      const mockInput = createMockPluginInput()
      const hook = createTestAtlasHook(mockInput)

      // when
      await hook.handler({
        event: {
          type: "session.idle",
          properties: { info: { id: MAIN_SESSION_ID } },
        },
      })

      // then - should call prompt with continuation
      expect(mockInput._promptMock).toHaveBeenCalled()
      const callArgs = mockInput._promptMock.mock.calls[0][0]
      expect(callArgs.path.id).toBe(MAIN_SESSION_ID)
      expect(callArgs.body.parts[0].text).toContain("incomplete tasks")
      expect(callArgs.body.parts[0].text).toContain("2 remaining")
    })

    test("should settle idle before injecting boulder continuation", async () => {
      // given
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "test-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const mockInput = createMockPluginInput()
      const hook = createTestAtlasHook(mockInput, { idleSettleMs: 50 })

      // when
      const startedAt = Date.now()
      const eventPromise = hook.handler({
        event: {
          type: "session.idle",
          properties: { sessionID: MAIN_SESSION_ID },
        },
      })
      await Promise.resolve()

      // then
      expect(mockInput._promptMock).not.toHaveBeenCalled()

      await eventPromise
      expect(Date.now() - startedAt).toBeGreaterThanOrEqual(45)
      expect(mockInput._promptMock).toHaveBeenCalledTimes(1)
    })

    test("should not inject when no boulder state exists", async () => {
      // given - no boulder state
      const mockInput = createMockPluginInput()
      const hook = createTestAtlasHook(mockInput)

      // when
      await hook.handler({
        event: {
          type: "session.idle",
          properties: { sessionID: MAIN_SESSION_ID },
        },
      })

      // then - should not call prompt
      expect(mockInput._promptMock).not.toHaveBeenCalled()
    })

    test("should not inject when main session is not in boulder session_ids", async () => {
      // given - boulder state exists but current (main) session is NOT in session_ids
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: ["some-other-session-id"],
        plan_name: "test-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const mockInput = createMockPluginInput()
      const hook = createTestAtlasHook(mockInput)

      // when - main session fires idle but is NOT in boulder's session_ids
      await hook.handler({
        event: {
          type: "session.idle",
          properties: { sessionID: MAIN_SESSION_ID },
        },
      })

      // then - should NOT call prompt because session is not part of this boulder
      expect(mockInput._promptMock).not.toHaveBeenCalled()
    })

    test("should not append lineage-only subagent session during idle without explicit boulder tracking", async () => {
      // given - active boulder plan with another registered session and current session tracked as subagent
      const subagentSessionID = "subagent-session-456"
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "test-plan",
      }
      writeBoulderState(TEST_DIR, state)
      subagentSessions.add(subagentSessionID)
      updateSessionAgent(subagentSessionID, "atlas")

      const mockInput = createMockPluginInput()
      const hook = createTestAtlasHook(mockInput)

      // when - subagent session goes idle before explicit tracking appends it
      await hook.handler({
        event: {
          type: "session.idle",
          properties: { sessionID: subagentSessionID },
        },
      })

      // then - lineage alone is not enough to absorb the session into boulder
      expect(readBoulderState(TEST_DIR)?.session_ids).not.toContain(subagentSessionID)
      expect(mockInput._promptMock).not.toHaveBeenCalled()
    })

    test("should inject when registered boulder session has incomplete tasks even if last agent differs", async () => {
      cleanupMessageStorage(MAIN_SESSION_ID)
      setupMessageStorage(MAIN_SESSION_ID, "hephaestus")

      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "test-plan",
        agent: "atlas",
      }
      writeBoulderState(TEST_DIR, state)

      const mockInput = createMockPluginInput()
      const hook = createTestAtlasHook(mockInput)

      await hook.handler({
        event: {
          type: "session.idle",
          properties: { sessionID: MAIN_SESSION_ID },
        },
      })

      expect(mockInput._promptMock).toHaveBeenCalled()
      const callArgs = mockInput._promptMock.mock.calls[0][0]
      expect(callArgs.path.id).toBe(MAIN_SESSION_ID)
      expect(callArgs.body.parts[0].text).toContain("2 remaining")
    })

    test("should inject completion nudge when boulder plan is complete", async () => {
      // given - boulder state with complete plan
      const planPath = join(TEST_DIR, "complete-plan.md")
      writeFileSync(planPath, "# Plan\n- [x] Task 1\n- [x] Task 2")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "complete-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const mockInput = createMockPluginInput()
      const hook = createTestAtlasHook(mockInput)

      // when
      await hook.handler({
        event: {
          type: "session.idle",
          properties: { sessionID: MAIN_SESSION_ID },
        },
      })

      // then
      expect(mockInput._promptMock).toHaveBeenCalledTimes(1)
    })

    test("should inject completion nudge when mirrored worktree plan is complete even if the main repo plan is stale", async () => {
      // given
      const mainPlanPath = join(TEST_DIR, ".omo", "plans", "worktree-complete-plan.md")
      const worktreeDir = join(tmpdir(), `atlas-worktree-${randomUUID()}`)
      const worktreePlanPath = join(worktreeDir, ".omo", "plans", "worktree-complete-plan.md")
      mkdirSync(join(TEST_DIR, ".omo", "plans"), { recursive: true })
      mkdirSync(join(worktreeDir, ".omo", "plans"), { recursive: true })
      writeFileSync(mainPlanPath, "# Plan\n- [ ] Main repo task\n")
      writeFileSync(worktreePlanPath, "# Plan\n- [x] Worktree task\n")

      writeBoulderState(TEST_DIR, {
        active_plan: mainPlanPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "worktree-complete-plan",
        worktree_path: worktreeDir,
      })

      const mockInput = createMockPluginInput()
      const hook = createTestAtlasHook(mockInput)

      try {
        // when
        await hook.handler({
          event: {
            type: "session.idle",
            properties: { sessionID: MAIN_SESSION_ID },
          },
        })

        // then
        expect(mockInput._promptMock).toHaveBeenCalledTimes(1)
      } finally {
        rmSync(worktreeDir, { recursive: true, force: true })
      }
    })

    test("should skip when abort error occurred before idle", async () => {
      // given - boulder state with incomplete plan
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "test-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const mockInput = createMockPluginInput()
      const hook = createTestAtlasHook(mockInput)

      // when - send abort error then idle
      await hook.handler({
        event: {
          type: "session.error",
          properties: {
            sessionID: MAIN_SESSION_ID,
            error: { name: "AbortError", message: "aborted" },
          },
        },
      })
      await hook.handler({
        event: {
          type: "session.idle",
          properties: { sessionID: MAIN_SESSION_ID },
        },
      })

      // then - should not call prompt
      expect(mockInput._promptMock).not.toHaveBeenCalled()
    })

    test("#given boulder has incomplete tasks #when non-abort session error fires #then continuation injects immediately", async () => {
      // given - boulder state with incomplete plan
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "test-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const mockInput = createMockPluginInput()
      const hook = createTestAtlasHook(mockInput)

      // when - a recoverable runtime error fires without waiting for idle
      await hook.handler({
        event: {
          type: "session.error",
          properties: {
            sessionID: MAIN_SESSION_ID,
            error: { name: "RuntimeError", message: "provider overloaded" },
          },
        },
      })

      // then - boulder resumes work immediately
      expect(mockInput._promptMock).toHaveBeenCalledTimes(1)
      const callArgs = mockInput._promptMock.mock.calls[0][0]
      expect(callArgs.path.id).toBe(MAIN_SESSION_ID)
      expect(callArgs.body.parts[0].text).toContain("incomplete tasks")
      expect(callArgs.body.parts[0].text).toContain("2 remaining")
    })

    test("#given boulder retried a runtime error #when stale idle follows #then no delayed duplicate retry is scheduled", async () => {
      // given - boulder state with incomplete plan
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "test-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const originalSetTimeout = globalThis.setTimeout
      const originalClearTimeout = globalThis.clearTimeout
      const activeTimers = new Map<ReturnType<typeof setTimeout>, number>()
      globalThis.setTimeout = ((_handler: Parameters<typeof setTimeout>[0], timeout?: number, ..._args: unknown[]) => {
        const id = originalSetTimeout(() => undefined, 0)
        activeTimers.set(id, timeout ?? 0)
        return id
      }) as typeof setTimeout
      globalThis.clearTimeout = ((id: ReturnType<typeof setTimeout>) => {
        activeTimers.delete(id)
        originalClearTimeout(id)
      }) as typeof clearTimeout

      try {
        const mockInput = createMockPluginInput()
        const hook = createTestAtlasHook(mockInput)

        // when - runtime error resumes immediately and OpenCode later emits stale idle
        await hook.handler({
          event: {
            type: "session.error",
            properties: {
              sessionID: MAIN_SESSION_ID,
              error: { name: "RuntimeError", message: "provider overloaded" },
            },
          },
        })
        await hook.handler({
          event: {
            type: "session.idle",
            properties: { sessionID: MAIN_SESSION_ID },
          },
        })

        // then - stale idle is consumed, not converted into another scheduled continuation
        const scheduledDelays = Array.from(activeTimers.values())
        expect(mockInput._promptMock).toHaveBeenCalledTimes(1)
        expect(scheduledDelays.filter((delay) => delay >= 5_000 && delay !== DEFAULT_PROMPT_DISPATCH_TIMEOUT_MS)).toHaveLength(0)
      } finally {
        globalThis.setTimeout = originalSetTimeout
        globalThis.clearTimeout = originalClearTimeout
      }
    })

    test("#given boulder retried a runtime error #when assistant activity arrives #then next idle can continue", async () => {
      // given - boulder state with incomplete plan
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "test-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const originalDateNow = Date.now
      let now = 1000
      Date.now = () => now

      try {
        const mockInput = createMockPluginInput()
        const hook = createTestAtlasHook(mockInput)

        // when - runtime error resumes immediately and then the retry run emits assistant activity
        await hook.handler({
          event: {
            type: "session.error",
            properties: {
              sessionID: MAIN_SESSION_ID,
              error: { name: "RuntimeError", message: "provider overloaded" },
            },
          },
        })
        await hook.handler({
          event: {
            type: "message.updated",
            properties: { info: { sessionID: MAIN_SESSION_ID, role: "assistant" } },
          },
        })
        now = 7000
        await hook.handler({
          event: {
            type: "session.idle",
            properties: { sessionID: MAIN_SESSION_ID },
          },
        })

        // then - assistant activity marks the following idle as real work completion
        expect(mockInput._promptMock).toHaveBeenCalledTimes(2)
      } finally {
        Date.now = originalDateNow
      }
    })

     test("should skip when background tasks are running", async () => {
       // given - boulder state with incomplete plan
       const planPath = join(TEST_DIR, "test-plan.md")
       writeFileSync(planPath, "# Plan\n- [ ] Task 1")

       const state: BoulderState = {
         active_plan: planPath,
         started_at: "2026-01-02T10:00:00Z",
         session_ids: [MAIN_SESSION_ID],
         plan_name: "test-plan",
       }
       writeBoulderState(TEST_DIR, state)

       const mockBackgroundManager = {
         getTasksByParentSession: () => [{ status: "running" }],
       }

       const mockInput = createMockPluginInput()
       const hook = createTestAtlasHook(mockInput, {
         directory: TEST_DIR,
         backgroundManager: mockBackgroundManager,
       })

       // when
       await hook.handler({
         event: {
           type: "session.idle",
           properties: { sessionID: MAIN_SESSION_ID },
         },
       })

       // then - should not call prompt
       expect(mockInput._promptMock).not.toHaveBeenCalled()
     })

     test("should skip when continuation is stopped via isContinuationStopped", async () => {
       // given - boulder state with incomplete plan
       const planPath = join(TEST_DIR, "test-plan.md")
       writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")

       const state: BoulderState = {
         active_plan: planPath,
         started_at: "2026-01-02T10:00:00Z",
         session_ids: [MAIN_SESSION_ID],
         plan_name: "test-plan",
       }
       writeBoulderState(TEST_DIR, state)

       const mockInput = createMockPluginInput()
       const hook = createTestAtlasHook(mockInput, {
         directory: TEST_DIR,
         isContinuationStopped: (sessionID: string) => sessionID === MAIN_SESSION_ID,
       })

       // when
       await hook.handler({
         event: {
           type: "session.idle",
           properties: { sessionID: MAIN_SESSION_ID },
         },
       })

       // then - should not call prompt because continuation is stopped
       expect(mockInput._promptMock).not.toHaveBeenCalled()
     })

    test("should clear abort state on message.updated", async () => {
      // given - boulder with incomplete plan
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "test-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const mockInput = createMockPluginInput()
      const hook = createTestAtlasHook(mockInput)

      // when - abort error, then message update, then idle
      await hook.handler({
        event: {
          type: "session.error",
          properties: {
            sessionID: MAIN_SESSION_ID,
            error: { name: "AbortError" },
          },
        },
      })
      await hook.handler({
        event: {
          type: "message.updated",
          properties: { info: { sessionID: MAIN_SESSION_ID, role: "user" } },
        },
      })
      await hook.handler({
        event: {
          type: "session.idle",
          properties: { sessionID: MAIN_SESSION_ID },
        },
      })

      // then - should call prompt because abort state was cleared
      expect(mockInput._promptMock).toHaveBeenCalled()
    })

    test("should include plan progress in continuation prompt", async () => {
      // given - boulder state with specific progress
      const planPath = join(TEST_DIR, "progress-plan.md")
      writeFileSync(planPath, "# Plan\n- [x] Task 1\n- [x] Task 2\n- [ ] Task 3\n- [ ] Task 4")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "progress-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const mockInput = createMockPluginInput()
      const hook = createTestAtlasHook(mockInput)

      // when
      await hook.handler({
        event: {
          type: "session.idle",
          properties: { sessionID: MAIN_SESSION_ID },
        },
      })

      // then - should include progress
      const callArgs = mockInput._promptMock.mock.calls[0][0]
      expect(callArgs.body.parts[0].text).toContain("2/4 completed")
      expect(callArgs.body.parts[0].text).toContain("2 remaining")
    })

    test("should include preferred reuse session in continuation prompt for current top-level task", async () => {
      // given - boulder state with tracked preferred session
      const planPath = join(TEST_DIR, "preferred-session-plan.md")
      writeFileSync(planPath, `# Plan

## TODOs
- [ ] 1. Implement auth flow
`)

      writeBoulderState(TEST_DIR, {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "preferred-session-plan",
        task_sessions: {
          "todo:1": {
            task_key: "todo:1",
            task_label: "1",
            task_title: "Implement auth flow",
            session_id: "ses_auth_flow_123",
            updated_at: "2026-01-02T10:00:00Z",
          },
        },
      })

      const mockInput = createMockPluginInput()
      const hook = createTestAtlasHook(mockInput)

      // when
      await hook.handler({
        event: {
          type: "session.idle",
          properties: { sessionID: MAIN_SESSION_ID },
        },
      })

      // then
      const callArgs = mockInput._promptMock.mock.calls[0][0]
      expect(callArgs.body.parts[0].text).toContain("Preferred reuse session for current top-level plan task")
      expect(callArgs.body.parts[0].text).toContain("ses_auth_flow_123")
    })

    test("#given blocked-task continuation #when prompt is built #then it requires a plan edit to mark the task blocked", async () => {
      // given - boulder state with an incomplete externally blockable task
      const planPath = join(TEST_DIR, "blocked-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Wait for external credentials")

      writeBoulderState(TEST_DIR, {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "blocked-plan",
      })

      const mockInput = createMockPluginInput()
      const hook = createTestAtlasHook(mockInput)

      // when
      await hook.handler({
        event: {
          type: "session.idle",
          properties: { sessionID: MAIN_SESSION_ID },
        },
      })

      // then - prompt enforces the behavioral invariant instead of allowing text-only blocker reports
      const callArgs = mockInput._promptMock.mock.calls[0][0]
      const promptText = callArgs.body.parts[0].text
      expect(promptText).toContain("- [~]")
      expect(promptText).toMatch(/edit the plan file/i)
      expect(promptText).toMatch(/text-only explanation.*not progress/i)
    })

    test("#given continuation emits text without tool progress #when three continuation iterations repeat #then Atlas stalls instead of looping", async () => {
      // given - boulder state with one externally blocked task that never receives a tool edit
      const planPath = join(TEST_DIR, "blocked-loop-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Wait for external approval")

      writeBoulderState(TEST_DIR, {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "blocked-loop-plan",
      })

      const mockInput = createMockPluginInput()
      const hook = createTestAtlasHook(mockInput)

      const originalDateNow = Date.now
      let now = 0
      Date.now = () => now

      try {
        // when - each idle represents a completed text-only continuation turn with no bash/edit/write progress
        for (let iteration = 0; iteration < 4; iteration += 1) {
          await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
          await flushMicrotasks()
          now += 6000
        }

        // then - K=3 continuations are attempted, and the fourth idle aborts cleanly instead of injecting again
        expect(mockInput._promptMock).toHaveBeenCalledTimes(3)
      } finally {
        Date.now = originalDateNow
      }
    })

    test("#given one plan stalls #when a different boulder plan becomes active #then Atlas continues the new plan", async () => {
      // given - a boulder plan that reaches the stalled no-tool-progress threshold
      const firstPlanPath = join(TEST_DIR, "first-blocked-loop-plan.md")
      writeFileSync(firstPlanPath, "# Plan\n- [ ] Wait for external approval")

      writeBoulderState(TEST_DIR, {
        active_plan: firstPlanPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "first-blocked-loop-plan",
      })

      const mockInput = createMockPluginInput()
      const hook = createTestAtlasHook(mockInput)

      const originalDateNow = Date.now
      let now = 0
      Date.now = () => now

      try {
        for (let iteration = 0; iteration < 4; iteration += 1) {
          await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
          await flushMicrotasks()
          now += 6000
        }
        expect(mockInput._promptMock).toHaveBeenCalledTimes(3)

        const secondPlanPath = join(TEST_DIR, "second-plan.md")
        writeFileSync(secondPlanPath, "# Plan\n- [ ] Fresh task")
        writeBoulderState(TEST_DIR, {
          active_plan: secondPlanPath,
          started_at: "2026-01-02T10:10:00Z",
          session_ids: [MAIN_SESSION_ID],
          plan_name: "second-plan",
        })
        now += 6000

        // when - the same session id receives a different active plan
        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
        await flushMicrotasks()

        // then - stale stall state from the previous plan does not permanently block continuation
        expect(mockInput._promptMock).toHaveBeenCalledTimes(4)
      } finally {
        Date.now = originalDateNow
      }
    })

    test("#given continuation makes tangible tool progress #when idle repeats #then no-progress stall counter resets", async () => {
      // given - boulder state with incomplete work and a successful edit between continuation turns
      const planPath = join(TEST_DIR, "progress-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")

      writeBoulderState(TEST_DIR, {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "progress-plan",
      })

      const mockInput = createMockPluginInput()
      const hook = createTestAtlasHook(mockInput)

      const originalDateNow = Date.now
      let now = 0
      Date.now = () => now

      try {
        // when - the first continuation is followed by a successful edit tool result
        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
        await hook["tool.execute.after"](
          { tool: "edit", sessionID: MAIN_SESSION_ID, callID: "progress-edit" },
          { title: "Edited file", output: "Updated plan", metadata: { filePath: planPath } },
        )
        now += 6000

        for (let iteration = 0; iteration < 3; iteration += 1) {
          await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
          await flushMicrotasks()
          now += 6000
        }

        // then - one progress reset plus three no-progress continuation attempts are allowed
        expect(mockInput._promptMock).toHaveBeenCalledTimes(4)
      } finally {
        Date.now = originalDateNow
      }
    })

    test("should inject when last agent is sisyphus and boulder targets atlas explicitly", async () => {
       // given - boulder explicitly set to atlas, but last agent is sisyphus (initial state after /start-work)
       const planPath = join(TEST_DIR, "test-plan.md")
       writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")

       const state: BoulderState = {
         active_plan: planPath,
         started_at: "2026-01-02T10:00:00Z",
         session_ids: [MAIN_SESSION_ID],
         plan_name: "test-plan",
         agent: "atlas",
       }
       writeBoulderState(TEST_DIR, state)

       // given - last agent is sisyphus (typical state right after /start-work)
       cleanupMessageStorage(MAIN_SESSION_ID)
       setupMessageStorage(MAIN_SESSION_ID, "sisyphus")

       const mockInput = createMockPluginInput()
       const hook = createTestAtlasHook(mockInput)

       // when
       await hook.handler({
         event: {
           type: "session.idle",
           properties: { sessionID: MAIN_SESSION_ID },
         },
       })

       // then - should call prompt because sisyphus is always allowed for atlas boulders
       expect(mockInput._promptMock).toHaveBeenCalled()
     })

    test("should inject when registered atlas boulder session last agent does not match", async () => {
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")

       const state: BoulderState = {
         active_plan: planPath,
         started_at: "2026-01-02T10:00:00Z",
         session_ids: [MAIN_SESSION_ID],
         plan_name: "test-plan",
         agent: "atlas",
       }
       writeBoulderState(TEST_DIR, state)

       cleanupMessageStorage(MAIN_SESSION_ID)
       setupMessageStorage(MAIN_SESSION_ID, "hephaestus")

       const mockInput = createMockPluginInput()
       const hook = createTestAtlasHook(mockInput)

      await hook.handler({
        event: {
          type: "session.idle",
          properties: { sessionID: MAIN_SESSION_ID },
        },
      })

      expect(mockInput._promptMock).toHaveBeenCalled()
    })

     test("should inject when last agent matches boulder agent even if non-Atlas", async () => {
       // given - boulder state expects sisyphus and last agent is sisyphus
       const planPath = join(TEST_DIR, "test-plan.md")
       writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")

       const state: BoulderState = {
         active_plan: planPath,
         started_at: "2026-01-02T10:00:00Z",
         session_ids: [MAIN_SESSION_ID],
         plan_name: "test-plan",
         agent: "sisyphus",
       }
       writeBoulderState(TEST_DIR, state)

       cleanupMessageStorage(MAIN_SESSION_ID)
       setupMessageStorage(MAIN_SESSION_ID, "sisyphus")

       const mockInput = createMockPluginInput()
       const hook = createTestAtlasHook(mockInput)

       // when
       await hook.handler({
         event: {
           type: "session.idle",
           properties: { sessionID: MAIN_SESSION_ID },
         },
       })

       // then - should call prompt for sisyphus
       expect(mockInput._promptMock).toHaveBeenCalled()
       const callArgs = mockInput._promptMock.mock.calls[0][0]
       expect(callArgs.body.agent).toBe("sisyphus")
     })

    test("should preserve display-name agent in continuation prompt when boulder agent uses display form", async () => {
      // given - boulder state uses display-form agent name
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "test-plan",
        agent: "Atlas - Plan Executor",
      }
      writeBoulderState(TEST_DIR, state)
      registerAgentName("Atlas - Plan Executor")

      const mockInput = createMockPluginInput()
      const hook = createTestAtlasHook(mockInput)

      // when
      await hook.handler({
        event: {
          type: "session.idle",
          properties: { sessionID: MAIN_SESSION_ID },
        },
      })

      // then
      expect(mockInput._promptMock).toHaveBeenCalled()
      const callArgs = mockInput._promptMock.mock.calls[0][0]
      expect(callArgs.body.agent).toBe("Atlas - Plan Executor")
      expect(callArgs.body.agent).not.toBe("atlas")
    })

    test("#given boulder agent registered with ZWSP sort prefix #when continuation injects #then promptAsync receives display name without ZWSP", async () => {
      // given - OpenCode TUI registers agent names with leading ZWSP for sort ordering
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "test-plan",
        agent: "\u200B\u200BAtlas - Plan Executor",
      }
      writeBoulderState(TEST_DIR, state)
      registerAgentName("\u200B\u200BAtlas - Plan Executor")

      const mockInput = createMockPluginInput()
      const hook = createTestAtlasHook(mockInput)

      // when
      await hook.handler({
        event: {
          type: "session.idle",
          properties: { sessionID: MAIN_SESSION_ID },
        },
      })

      // then
      expect(mockInput._promptMock).toHaveBeenCalled()
      const callArgs = mockInput._promptMock.mock.calls[0][0]
      expect(callArgs.body.agent).toBe("Atlas - Plan Executor")
      expect(callArgs.body.agent).not.toContain("\u200B")
    })

    test("should debounce rapid continuation injections (prevent infinite loop)", async () => {
      // given - boulder state with incomplete plan
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "test-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const mockInput = createMockPluginInput()
      const hook = createTestAtlasHook(mockInput)

      // when - fire multiple idle events in rapid succession (simulating infinite loop bug)
      await hook.handler({
        event: {
          type: "session.idle",
          properties: { sessionID: MAIN_SESSION_ID },
        },
      })
      await hook.handler({
        event: {
          type: "session.idle",
          properties: { sessionID: MAIN_SESSION_ID },
        },
      })
      await hook.handler({
        event: {
          type: "session.idle",
          properties: { sessionID: MAIN_SESSION_ID },
        },
      })

      // then - should only call prompt ONCE due to debouncing
      expect(mockInput._promptMock).toHaveBeenCalledTimes(1)
    })

    test("should stop continuation after 10 consecutive prompt failures (issue #1355)", async () => {
      //#given - boulder state with incomplete plan and prompt always fails
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "test-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const promptMock = mock((): Promise<void> => Promise.reject(new Error("Bad Request")))
      const mockInput = createMockPluginInput({ promptMock })
      const hook = createTestAtlasHook(mockInput)

      const originalDateNow = Date.now
      let now = 0
      Date.now = () => now

      try {
        //#when - idle fires repeatedly, past cooldown each time
        for (let i = 0; i < 10; i++) {
          await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
          await flushMicrotasks()
          now += 6000
        }

        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
        await flushMicrotasks()

        //#then - should attempt only 10 times, then disable continuation
        expect(promptMock).toHaveBeenCalledTimes(10)
      } finally {
        Date.now = originalDateNow
      }
    })

    test("should reset prompt failure counter on success and only stop after 10 consecutive failures", async () => {
      //#given - boulder state with incomplete plan
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "test-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const promptMock = mock((): Promise<void> => Promise.reject(new Error("Bad Request")))
      promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
      promptMock.mockImplementationOnce(() => Promise.resolve())

      const mockInput = createMockPluginInput({ promptMock })
      const hook = createTestAtlasHook(mockInput)

      const originalDateNow = Date.now
      let now = 0
      Date.now = () => now

      try {
        //#when - fail, succeed (reset), then fail 10 times (disable), then attempt again
        for (let i = 0; i < 13; i++) {
          await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
          await flushMicrotasks()
          now += 6000
        }

        //#then - 12 prompt attempts; 13th idle is skipped after 10 consecutive failures
        expect(promptMock).toHaveBeenCalledTimes(12)
      } finally {
        Date.now = originalDateNow
      }
    })

    test("should keep skipping continuation during 5-minute backoff after 10 consecutive failures", async () => {
      //#given - boulder state with incomplete plan and prompt always fails
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "test-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const promptMock = mock(() => Promise.reject(new Error("Bad Request")))
      const mockInput = createMockPluginInput({ promptMock })
      const hook = createTestAtlasHook(mockInput)

      const originalDateNow = Date.now
      let now = 0
      Date.now = () => now

      try {
        //#when - 11th idle occurs inside 5-minute backoff window
        for (let i = 0; i < 10; i++) {
          await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
          await flushMicrotasks()
          now += 6000
        }

        now += 60000

        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
        await flushMicrotasks()

        //#then - 11th attempt should still be skipped
        expect(promptMock).toHaveBeenCalledTimes(10)
      } finally {
        Date.now = originalDateNow
      }
    })

    test("should retry continuation after 5-minute backoff expires following 10 consecutive failures", async () => {
      //#given - boulder state with incomplete plan and prompt always fails
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "test-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const promptMock = mock(() => Promise.reject(new Error("Bad Request")))
      const mockInput = createMockPluginInput({ promptMock })
      const hook = createTestAtlasHook(mockInput)

      const originalDateNow = Date.now
      let now = 0
      Date.now = () => now

      try {
        //#when - 11th idle occurs after 5+ minutes
        for (let i = 0; i < 10; i++) {
          await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
          await flushMicrotasks()
          now += 6000
        }

        now += 300000

        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
        await flushMicrotasks()

        //#then - 11th attempt should run after backoff expiration
        expect(promptMock).toHaveBeenCalledTimes(11)
      } finally {
        Date.now = originalDateNow
      }
    })

    test("should reset prompt failure counter after successful retry beyond backoff window", async () => {
      //#given - boulder state with incomplete plan and success on first retry after backoff
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "test-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const promptMock = mock((): Promise<void> => Promise.reject(new Error("Bad Request")))
      for (let i = 0; i < 10; i++) {
        promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
      }
      promptMock.mockImplementationOnce(() => Promise.resolve(undefined))
      const mockInput = createMockPluginInput({ promptMock })
      const hook = createTestAtlasHook(mockInput)

      const originalDateNow = Date.now
      let now = 0
      Date.now = () => now

      try {
        //#when - fail 10 times, recover after backoff with success, then fail 10 times again
        for (let i = 0; i < 10; i++) {
          await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
          await flushMicrotasks()
          now += 6000
        }

        now += 300000

        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
        await flushMicrotasks()
        now += 6000

        for (let i = 0; i < 10; i++) {
          await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
          await flushMicrotasks()
          now += 6000
        }

        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
        await flushMicrotasks()

        //#then - success retry resets counter, so 10 additional failures are allowed before skip
        expect(promptMock).toHaveBeenCalledTimes(21)
      } finally {
        Date.now = originalDateNow
      }
    })

    test("should reset continuation failure state on session.compacted event", async () => {
      //#given - boulder state with incomplete plan and prompt always fails
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "test-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const promptMock = mock(() => Promise.reject(new Error("Bad Request")))
      const mockInput = createMockPluginInput({ promptMock })
      const hook = createTestAtlasHook(mockInput)

      const originalDateNow = Date.now
      let now = 0
      Date.now = () => now

      try {
        //#when - 10 failures disable continuation, then compaction resets it
        for (let i = 0; i < 10; i++) {
          await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
          await flushMicrotasks()
          now += 6000
        }

        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
        await flushMicrotasks()

        await hook.handler({ event: { type: "session.compacted", properties: { sessionID: MAIN_SESSION_ID } } })
        now += 6000

        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
        await flushMicrotasks()

        //#then - 10 attempts + 1 after compaction (11 total)
        expect(promptMock).toHaveBeenCalledTimes(11)
      } finally {
        Date.now = originalDateNow
      }
    })

    test("should cleanup on session.deleted", async () => {
      // given - boulder state
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "test-plan",
      }
      writeBoulderState(TEST_DIR, state)

      const mockInput = createMockPluginInput()
      const hook = createTestAtlasHook(mockInput)

      // when - create abort state then delete
      await hook.handler({
        event: {
          type: "session.error",
          properties: {
            sessionID: MAIN_SESSION_ID,
            error: { name: "AbortError" },
          },
        },
      })
      await hook.handler({
        event: {
          type: "session.deleted",
          properties: { info: { id: MAIN_SESSION_ID } },
        },
      })

      // Re-create boulder after deletion
      writeBoulderState(TEST_DIR, state)

      // Trigger idle - should inject because state was cleaned up
      await hook.handler({
        event: {
          type: "session.idle",
          properties: { sessionID: MAIN_SESSION_ID },
        },
      })

      // then - should call prompt because session state was cleaned
      expect(mockInput._promptMock).toHaveBeenCalled()
    })

    test("should inject when session agent was updated to atlas by start-work even if message storage agent differs", async () => {
      // given - boulder targets atlas, but nearest stored message still says hephaestus
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")

      const state: BoulderState = {
        active_plan: planPath,
        started_at: "2026-01-02T10:00:00Z",
        session_ids: [MAIN_SESSION_ID],
        plan_name: "test-plan",
        agent: "atlas",
      }
      writeBoulderState(TEST_DIR, state)

      cleanupMessageStorage(MAIN_SESSION_ID)
      setupMessageStorage(MAIN_SESSION_ID, "hephaestus")
      updateSessionAgent(MAIN_SESSION_ID, "atlas")

      const mockInput = createMockPluginInput()
      const hook = createTestAtlasHook(mockInput)

      // when
      await hook.handler({
        event: {
          type: "session.idle",
          properties: { sessionID: MAIN_SESSION_ID },
        },
      })

      // then - should continue because start-work updated session agent to atlas
      expect(mockInput._promptMock).toHaveBeenCalled()
    })

    describe("delayed retry timer (abort-stuck fix)", () => {
      const capturedTimers = new Map<ReturnType<typeof setTimeout>, { callback: () => void | Promise<void>; cleared: boolean }>()
      const originalSetTimeout = globalThis.setTimeout
      const originalClearTimeout = globalThis.clearTimeout
      const originalDateNow = Date.now
      let fakeNow = 0

      beforeEach(() => {
        capturedTimers.clear()
        fakeNow = 10000
        Date.now = () => fakeNow

        globalThis.setTimeout = ((callback: Parameters<typeof setTimeout>[0], delay?: number, ...args: unknown[]) => {
          const normalized = typeof delay === "number" ? delay : 0
          if (normalized >= 5000 && normalized !== DEFAULT_PROMPT_DISPATCH_TIMEOUT_MS) {
            const timerID = originalSetTimeout(() => undefined, 0)
            const capturedCallback = typeof callback === "function"
              ? () => callback(...args)
              : () => undefined
            capturedTimers.set(timerID, { callback: capturedCallback, cleared: false })
            return timerID
          }
          return typeof callback === "function"
            ? originalSetTimeout(callback, delay, ...args)
            : originalSetTimeout(() => undefined, delay)
        }) as typeof setTimeout

        globalThis.clearTimeout = ((id?: ReturnType<typeof setTimeout>) => {
          const timerEntry = id ? capturedTimers.get(id) : undefined
          if (timerEntry) {
            timerEntry.cleared = true
            capturedTimers.delete(id)
            return
          }
          if (id !== undefined) {
            originalClearTimeout(id)
          }
        }) as typeof clearTimeout
      })

      afterEach(() => {
        globalThis.setTimeout = originalSetTimeout
        globalThis.clearTimeout = originalClearTimeout
        Date.now = originalDateNow
      })

      async function firePendingTimers(): Promise<void> {
        for (const [id, entry] of capturedTimers) {
          if (!entry.cleared) {
            capturedTimers.delete(id)
            fakeNow += 6000
            await entry.callback()
          }
        }
        await flushMicrotasks()
      }

      test("should schedule delayed retry when cooldown blocks idle for incomplete boulder", async () => {
        // given - boulder with incomplete plan
        const planPath = join(TEST_DIR, "test-plan.md")
        writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2")

        const state: BoulderState = {
          active_plan: planPath,
          started_at: "2026-01-02T10:00:00Z",
          session_ids: [MAIN_SESSION_ID],
          plan_name: "test-plan",
        }
        writeBoulderState(TEST_DIR, state)

        const mockInput = createMockPluginInput()
        const hook = createTestAtlasHook(mockInput)

        // when - first idle injects, second idle within cooldown schedules retry timer
        await hook.handler({
          event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } },
        })
        await hook.handler({
          event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } },
        })

        // then - fire pending timer and verify retry
        await firePendingTimers()
        expect(mockInput._promptMock).toHaveBeenCalledTimes(2)
      })

      test("should not schedule duplicate retry timers for rapid idle events", async () => {
        // given - boulder with incomplete plan
        const planPath = join(TEST_DIR, "test-plan.md")
        writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")

        const state: BoulderState = {
          active_plan: planPath,
          started_at: "2026-01-02T10:00:00Z",
          session_ids: [MAIN_SESSION_ID],
          plan_name: "test-plan",
        }
        writeBoulderState(TEST_DIR, state)

        const mockInput = createMockPluginInput()
        const hook = createTestAtlasHook(mockInput)

        // when - first idle injects, then 3 rapid idles within cooldown
        await hook.handler({
          event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } },
        })
        await hook.handler({
          event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } },
        })
        await hook.handler({
          event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } },
        })
        await hook.handler({
          event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } },
        })

        // then - only one retry fires despite multiple cooldown-blocked idles
        await firePendingTimers()
        expect(mockInput._promptMock).toHaveBeenCalledTimes(2)
      })

      test("should not retry if plan completes before timer fires", async () => {
        // given - boulder with incomplete plan
        const planPath = join(TEST_DIR, "test-plan.md")
        writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2")

        const state: BoulderState = {
          active_plan: planPath,
          started_at: "2026-01-02T10:00:00Z",
          session_ids: [MAIN_SESSION_ID],
          plan_name: "test-plan",
        }
        writeBoulderState(TEST_DIR, state)

        const mockInput = createMockPluginInput()
        const hook = createTestAtlasHook(mockInput)

        // when - first idle injects, second schedules retry, then plan completes before timer fires
        await hook.handler({
          event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } },
        })
        await hook.handler({
          event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } },
        })

        writeFileSync(planPath, "# Plan\n- [x] Task 1\n- [x] Task 2")

        // then - retry sees complete plan and bails out
        await firePendingTimers()
        expect(mockInput._promptMock).toHaveBeenCalledTimes(1)
      })

      test("should cleanup pending retry timer on session.deleted", async () => {
        // given - boulder with incomplete plan, schedule retry timer
        const planPath = join(TEST_DIR, "test-plan.md")
        writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2")

        const state: BoulderState = {
          active_plan: planPath,
          started_at: "2026-01-02T10:00:00Z",
          session_ids: [MAIN_SESSION_ID],
          plan_name: "test-plan",
        }
        writeBoulderState(TEST_DIR, state)

        const mockInput = createMockPluginInput()
        const hook = createTestAtlasHook(mockInput)

        await hook.handler({
          event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } },
        })
        await hook.handler({
          event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } },
        })

        // when - delete session before timer fires
        await hook.handler({
          event: { type: "session.deleted", properties: { info: { id: MAIN_SESSION_ID } } },
        })

        // then - timer was cleared, prompt called only once
        await firePendingTimers()
        expect(mockInput._promptMock).toHaveBeenCalledTimes(1)
      })

      test("should cleanup pending retry timer on session.compacted", async () => {
        // given - boulder with incomplete plan, schedule retry timer
        const planPath = join(TEST_DIR, "test-plan.md")
        writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2")

        const state: BoulderState = {
          active_plan: planPath,
          started_at: "2026-01-02T10:00:00Z",
          session_ids: [MAIN_SESSION_ID],
          plan_name: "test-plan",
        }
        writeBoulderState(TEST_DIR, state)

        const mockInput = createMockPluginInput()
        const hook = createTestAtlasHook(mockInput)

        await hook.handler({
          event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } },
        })
        await hook.handler({
          event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } },
        })

        // when - compact session before timer fires
        await hook.handler({
          event: { type: "session.compacted", properties: { sessionID: MAIN_SESSION_ID } },
        })

        // then - timer was cleared, prompt called only once
        await firePendingTimers()
        expect(mockInput._promptMock).toHaveBeenCalledTimes(1)
      })
    })
  })
})