scripts

2026-06-02 06:16:48 +02:00 · 2026-05-15 17:26:55 -04:00
parent 816f5e526b
commit 3739eee8cf
8 changed files with 645 additions and 0 deletions
@@ -49,6 +49,21 @@ export interface SimulationMcpServer {

 const DefaultRemotePort = 43110
 const MaxPortAttempts = 100
+const MasterInstanceID = "master"
+
+interface RemoteInstance {
+  readonly id: string
+  readonly port: number
+  readonly url: string
+}
+
+interface JsonRpcResponse {
+  readonly result?: unknown
+  readonly error?: {
+    readonly code?: number
+    readonly message?: string
+  }
+}

 type RenderBuffer = {
  readonly width: number
@@ -60,6 +75,19 @@ const decoder = new TextDecoder()

 const ActionSchema = z.discriminatedUnion("type", [
  z.object({ type: z.literal("typeText"), text: z.string() }),
+  z.object({
+    type: z.literal("pressKey"),
+    key: z.string(),
+    modifiers: z
+      .object({
+        ctrl: z.boolean().optional(),
+        shift: z.boolean().optional(),
+        meta: z.boolean().optional(),
+        super: z.boolean().optional(),
+        hyper: z.boolean().optional(),
+      })
+      .optional(),
+  }),
  z.object({ type: z.literal("pressEnter") }),
  z.object({ type: z.literal("pressArrow"), direction: z.enum(["up", "down", "left", "right"]) }),
  z.object({ type: z.literal("focus"), target: z.number() }),
@@ -115,6 +143,17 @@ const LlmScriptSchema = z.object({
  finish: z.enum(["stop", "tool-calls", "error", "length", "unknown"]).optional(),
 })

+const ScriptActionSchema = z.union([
+  ActionSchema,
+  z.object({ type: z.literal("writeFile"), path: z.string(), content: FileContentSchema }),
+  z.object({ type: z.literal("enqueueLLM"), scripts: z.array(LlmScriptSchema) }),
+  z.object({ type: z.literal("wait"), ms: z.number().min(0).max(30_000).optional() }),
+])
+
+const ScriptSchema = z.union([z.array(ScriptActionSchema), z.object({ actions: z.array(ScriptActionSchema) })])
+const TargetSchema = z.union([z.string(), z.array(z.string()).min(1), z.literal("all")])
+const remoteInstances = new Map<string, RemoteInstance>()
+
 function currentBuffer(renderer: CliRenderer): RenderBuffer {
  return Reflect.get(renderer, "currentRenderBuffer") as RenderBuffer
 }
@@ -125,6 +164,18 @@ function remotePort() {
  return DefaultRemotePort
 }

+function masterEnabled() {
+  return process.env.OPENCODE_SIMULATION_MCP_MASTER === "1" || process.env.OPENCODE_SIMULATION_MCP_MASTER === "true"
+}
+
+function childURL(port: number) {
+  return `http://127.0.0.1:${port}/mcp`
+}
+
+function jsonRpcError(response: JsonRpcResponse) {
+  return response.error?.message ?? `MCP request failed${response.error?.code === undefined ? "" : `: ${response.error.code}`}`
+}
+
 function isPortUnavailable(error: unknown) {
  const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase()
  return message.includes("eaddrinuse") || message.includes("address already in use") || message.includes(" in use")
@@ -213,6 +264,126 @@ async function control(options: Options, method: string, pathname: string, body?
  throw new Error(typeof data?.error === "string" ? data.error : `Simulation control request failed: ${response.status}`)
 }

+async function mcpRequest(url: string, method: string, params: unknown, timeout = 500) {
+  const response = await fetch(url, {
+    method: "POST",
+    headers: {
+      "content-type": "application/json",
+      accept: "application/json, text/event-stream",
+    },
+    body: JSON.stringify({ jsonrpc: "2.0", id: crypto.randomUUID(), method, params }),
+    signal: AbortSignal.timeout(timeout),
+  })
+  if (!response.ok) throw new Error(`MCP request failed: HTTP ${response.status}`)
+  const data = (await response.json()) as JsonRpcResponse
+  if (data.error) throw new Error(jsonRpcError(data))
+  return data.result
+}
+
+async function initializeChild(url: string, timeout?: number) {
+  await mcpRequest(
+    url,
+    "initialize",
+    {
+      protocolVersion: "2025-06-18",
+      capabilities: {},
+      clientInfo: { name: "opencode-simulation-master", version: InstallationVersion },
+    },
+    timeout,
+  )
+  await mcpRequest(url, "notifications/initialized", {}, timeout).catch(() => undefined)
+}
+
+async function childToolCall(instance: RemoteInstance, name: string, args: unknown) {
+  await initializeChild(instance.url, 2_000)
+  return mcpRequest(instance.url, "tools/call", { name, arguments: args }, 30_000)
+}
+
+function instances() {
+  return [{ id: MasterInstanceID, port: remotePort(), url: childURL(remotePort()) }, ...remoteInstances.values()]
+}
+
+function selectedTargets(target: z.infer<typeof TargetSchema>) {
+  const ids = target === "all" ? instances().map((item) => item.id) : Array.isArray(target) ? target : [target]
+  return ids.map((id) => {
+    if (id === MasterInstanceID) return { id, local: true as const }
+    const remote = remoteInstances.get(id)
+    if (!remote) throw new Error(`Unknown simulation instance: ${id}`)
+    return { id, local: false as const, remote }
+  })
+}
+
+async function discoverInstances(input: { startPort?: number; maxPorts?: number; consecutiveFailures?: number } = {}) {
+  const startPort = input.startPort ?? remotePort() + 1
+  const maxPorts = input.maxPorts ?? 30
+  const consecutiveFailures = input.consecutiveFailures ?? 3
+  let failures = 0
+  const found: RemoteInstance[] = []
+
+  for (let offset = 0; offset < maxPorts && failures < consecutiveFailures; offset++) {
+    const port = startPort + offset
+    if (port === remotePort()) continue
+    const instance = { id: `simulation-${port}`, port, url: childURL(port) }
+    try {
+      await initializeChild(instance.url)
+      await childToolCall(instance, "simulation_control_snapshot", {})
+      remoteInstances.set(instance.id, instance)
+      found.push(instance)
+      failures = 0
+    } catch {
+      failures++
+      remoteInstances.delete(instance.id)
+    }
+  }
+
+  return { instances: instances(), discovered: found, scanned: { startPort, maxPorts, consecutiveFailures } }
+}
+
+async function runScript(options: Options, file: string) {
+  const parsed = ScriptSchema.parse(await Bun.file(file).json())
+  const actions = Array.isArray(parsed) ? parsed : parsed.actions
+  const counts = { uiActions: 0, fileWrites: 0, llmScriptsQueued: 0, waits: 0 }
+
+  for (const action of actions) {
+    if (action.type === "writeFile") {
+      await control(options, "POST", "/experimental/simulation/filesystem/write", {
+        path: action.path,
+        content: action.content,
+      })
+      counts.fileWrites++
+      continue
+    }
+    if (action.type === "enqueueLLM") {
+      await control(options, "POST", "/experimental/simulation/llm/enqueue", { scripts: action.scripts })
+      counts.llmScriptsQueued += action.scripts.length
+      continue
+    }
+    if (action.type === "wait") {
+      await new Promise((resolve) => setTimeout(resolve, action.ms ?? 1_000))
+      await current(options).harness.renderOnce()
+      counts.waits++
+      continue
+    }
+    await SimulationActions.execute(current(options).harness, action)
+    counts.uiActions++
+  }
+
+  return { file, actions: actions.length, ...counts, snapshot: snapshot(options) }
+}
+
+async function runOnTargets<A>(
+  options: Options,
+  target: z.infer<typeof TargetSchema>,
+  local: () => Promise<A>,
+  remote: (instance: RemoteInstance) => Promise<unknown>,
+) {
+  const output = []
+  for (const item of selectedTargets(target)) {
+    output.push({ id: item.id, result: item.local ? await local() : await remote(item.remote) })
+  }
+  return { results: output, snapshot: snapshot(options) }
+}
+
 function createServer(options: Options) {
  const server = new McpServer(
    { name: "opencode-simulation", version: InstallationVersion },
@@ -282,6 +453,14 @@ function createServer(options: Options) {
      return toolResult(snapshot(options))
    },
  )
+  server.registerTool(
+    "simulation_script_run",
+    {
+      description: "Run a JSON simulation script from a host filesystem path.",
+      inputSchema: z.object({ path: z.string() }),
+    },
+    async (input) => toolResult(await runScript(options, input.path)),
+  )

  if ("runtime" in options) {
    server.registerTool("simulation_restart", { description: "Restart the simulated TUI and backend while keeping MCP alive." }, async () =>
@@ -328,6 +507,91 @@ function createServer(options: Options) {
    toolResult(await control(options, "GET", "/experimental/simulation/snapshot")),
  )

+  if (masterEnabled()) {
+    server.registerTool(
+      "simulation_instances_discover",
+      {
+        description: "Discover child simulation MCP servers on sequential localhost ports.",
+        inputSchema: z.object({
+          startPort: z.number().optional(),
+          maxPorts: z.number().optional(),
+          consecutiveFailures: z.number().optional(),
+        }),
+      },
+      async (input) => toolResult(await discoverInstances(input)),
+    )
+    server.registerTool("simulation_instances_list", { description: "List known simulation instances." }, () =>
+      toolResult({ instances: instances() }),
+    )
+    server.registerTool(
+      "simulation_instances_action_execute",
+      {
+        description: "Execute one UI action on one or more simulation instances.",
+        inputSchema: z.object({ target: TargetSchema, action: ActionSchema }),
+      },
+      async (input) =>
+        toolResult(
+          await runOnTargets(
+            options,
+            input.target,
+            async () => {
+              await SimulationActions.execute(current(options).harness, input.action)
+              return snapshot(options)
+            },
+            (instance) => childToolCall(instance, "simulation_action_execute", { action: input.action }),
+          ),
+        ),
+    )
+    server.registerTool(
+      "simulation_instances_filesystem_write",
+      {
+        description: "Write one file on one or more simulation instances.",
+        inputSchema: z.object({ target: TargetSchema, path: z.string(), content: FileContentSchema }),
+      },
+      async (input) =>
+        toolResult(
+          await runOnTargets(
+            options,
+            input.target,
+            () => control(options, "POST", "/experimental/simulation/filesystem/write", { path: input.path, content: input.content }),
+            (instance) => childToolCall(instance, "simulation_control_filesystem_write", { path: input.path, content: input.content }),
+          ),
+        ),
+    )
+    server.registerTool(
+      "simulation_instances_llm_enqueue",
+      {
+        description: "Queue LLM scripts on one or more simulation instances.",
+        inputSchema: z.object({ target: TargetSchema, scripts: z.array(LlmScriptSchema) }),
+      },
+      async (input) =>
+        toolResult(
+          await runOnTargets(
+            options,
+            input.target,
+            () => control(options, "POST", "/experimental/simulation/llm/enqueue", { scripts: input.scripts }),
+            (instance) => childToolCall(instance, "simulation_control_llm_enqueue", { scripts: input.scripts }),
+          ),
+        ),
+    )
+    server.registerTool(
+      "simulation_instances_script_run",
+      {
+        description: "Run a JSON simulation script on one or more simulation instances.",
+        inputSchema: z.object({ target: TargetSchema, path: z.string() }),
+      },
+      async (input) =>
+        toolResult(
+          await runOnTargets(
+            options,
+            input.target,
+            () => runScript(options, input.path),
+            (instance) => childToolCall(instance, "simulation_script_run", { path: input.path }),
+          ),
+        ),
+    )
+  }
+
  return server
 }

@@ -2,10 +2,19 @@ import type { CliRenderer, Renderable } from "@opentui/core"

 export interface MockInput {
  readonly typeText: (text: string) => Promise<void>
+  readonly pressKey: (key: string, modifiers?: KeyModifiers) => void
  readonly pressEnter: () => void
  readonly pressArrow: (direction: "up" | "down" | "left" | "right") => void
 }

+export interface KeyModifiers {
+  readonly ctrl?: boolean
+  readonly shift?: boolean
+  readonly meta?: boolean
+  readonly super?: boolean
+  readonly hyper?: boolean
+}
+
 export interface MockMouse {
  readonly click: (x: number, y: number) => Promise<void>
 }
@@ -32,6 +41,7 @@ export interface Element {

 export type Action =
  | { readonly type: "typeText"; readonly text: string }
+  | { readonly type: "pressKey"; readonly key: string; readonly modifiers?: KeyModifiers }
  | { readonly type: "pressEnter" }
  | { readonly type: "pressArrow"; readonly direction: "up" | "down" | "left" | "right" }
  | { readonly type: "focus"; readonly target: number }
@@ -108,6 +118,9 @@ export async function execute(harness: Harness, action: Action) {
    case "typeText":
      await harness.mockInput.typeText(action.text)
      break
+    case "pressKey":
+      harness.mockInput.pressKey(action.key, action.modifiers)
+      break
    case "pressEnter":
      harness.mockInput.pressEnter()
      break
@@ -0,0 +1,65 @@
+{
+  "actions": [
+    { "type": "pressKey", "key": "x", "modifiers": { "ctrl": true } },
+    { "type": "pressKey", "key": "b" },
+    { "type": "writeFile", "path": "README.md", "content": "# Demo App\n\nA small simulated app for orientation testing.\n" },
+    { "type": "writeFile", "path": "src/app.ts", "content": "export const appName = 'demo-app'\nexport function boot() { return appName }\n" },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "thinking", "content": "Inspect the project files and produce a concise orientation." }, { "type": "text", "content": "This is a compact Bun/TypeScript demo app. The main entrypoint is `src/app.ts`, which exports `appName` and `boot()`. The README provides the human summary; the next best step is to add a small test around `boot()`." }]], "usage": { "inputTokens": 240, "outputTokens": 72, "totalTokens": 312 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "What is in this project? Give me a quick orientation." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1200 },
+    { "type": "typeText", "text": "!ls" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "typeText", "text": "!cat README.md" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "typeText", "text": "/hide thinking" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 500 },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "text", "content": "The concise summary: a README and a TypeScript entrypoint define a tiny demo app; add a boot test before expanding behavior." }]], "usage": { "inputTokens": 150, "outputTokens": 28, "totalTokens": 178 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Summarize that without reasoning." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1000 },
+    { "type": "pressKey", "key": "x", "modifiers": { "ctrl": true } },
+    { "type": "pressKey", "key": "b" },
+    { "type": "writeFile", "path": "docs/architecture.json", "content": "{\n  \"entrypoints\": [\"src/app.ts\"],\n  \"runtime\": \"bun\",\n  \"features\": [{\"name\":\"boot\",\"status\":\"implemented\"}],\n  \"risks\": []\n}\n" },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "thinking", "content": "Read structured architecture metadata and explain the fields." }, { "type": "text", "content": "`docs/architecture.json` records one entrypoint, Bun as the runtime, one implemented `boot` feature, and no current risks. It is a minimal baseline rather than a complete architecture document." }]], "usage": { "inputTokens": 260, "outputTokens": 54, "totalTokens": 314 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Read the architecture metadata and explain it." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1100 },
+    { "type": "typeText", "text": "!cat docs/architecture.json" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "writeFile", "path": "docs/roadmap.md", "content": "# Roadmap\n\n| Milestone | Scope | Risk |\n| --- | --- | --- |\n| M1 | Add boot test | Low |\n| M2 | Document CLI usage | Medium |\n| M3 | Add config loading | Medium |\n" },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "text", "content": "Roadmap order: M1 first because testing existing behavior is low risk; M2 second for usability; M3 last because config loading adds new behavior and edge cases." }]], "usage": { "inputTokens": 220, "outputTokens": 41, "totalTokens": 261 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Summarize the roadmap table and recommend an order." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1000 },
+    { "type": "typeText", "text": "!cat docs/roadmap.md" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "pressKey", "key": "x", "modifiers": { "ctrl": true } },
+    { "type": "pressKey", "key": "b" },
+    { "type": "writeFile", "path": "test/app.test.ts", "content": "import { expect, test } from 'bun:test'\nimport { boot } from '../src/app'\n\ntest('boot returns the app name', () => {\n  expect(boot()).toBe('demo-app')\n})\n" },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "thinking", "content": "The generated test validates the boot contract exactly." }, { "type": "text", "content": "The generated `test/app.test.ts` verifies that `boot()` returns `demo-app`. Run `bun test test/app.test.ts` to validate the current behavior." }]], "usage": { "inputTokens": 200, "outputTokens": 37, "totalTokens": 237 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "What does the generated test cover, and how would you run it?" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1100 },
+    { "type": "typeText", "text": "!cat test/app.test.ts" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "typeText", "text": "/show thinking" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 500 },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "thinking", "content": "Synthesize README, architecture metadata, roadmap, and test coverage." }, { "type": "text", "content": "Final orientation: this is a minimal Bun/TypeScript app with `src/app.ts` as entrypoint, architecture metadata in `docs/architecture.json`, a roadmap beginning with test coverage, and `test/app.test.ts` verifying `boot()`." }]], "usage": { "inputTokens": 260, "outputTokens": 54, "totalTokens": 314 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Give a final project orientation including the new metadata and test." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1100 },
+    { "type": "pressKey", "key": "x", "modifiers": { "ctrl": true } },
+    { "type": "pressKey", "key": "b" },
+    { "type": "typeText", "text": "!ls docs && ls test" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 800 }
+  ]
+}
@@ -0,0 +1,65 @@
+{
+  "actions": [
+    { "type": "pressKey", "key": "x", "modifiers": { "ctrl": true } },
+    { "type": "pressKey", "key": "b" },
+    { "type": "writeFile", "path": "src/counter.ts", "content": "export function increment(value: number) {\n  return value + 2\n}\n" },
+    { "type": "writeFile", "path": "test/counter.test.ts", "content": "import { expect, test } from 'bun:test'\nimport { increment } from '../src/counter'\n\ntest('increments by one', () => {\n  expect(increment(1)).toBe(2)\n})\n" },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "thinking", "content": "Counter adds two, while test expects plus one." }, { "type": "text", "content": "The bug is in `src/counter.ts`: `increment` returns `value + 2`, but the test expects `value + 1`. The smallest fix is changing that return expression only." }]], "usage": { "inputTokens": 310, "outputTokens": 52, "totalTokens": 362 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "The counter test fails. Triage the bug and suggest the smallest fix." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1200 },
+    { "type": "typeText", "text": "!cat src/counter.ts" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "typeText", "text": "!cat test/counter.test.ts" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "typeText", "text": "/hide thinking" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 500 },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "text", "content": "Minimal fix: change `return value + 2` to `return value + 1` in `src/counter.ts`." }]], "usage": { "inputTokens": 120, "outputTokens": 22, "totalTokens": 142 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Restate the fix as a one-line recommendation." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 900 },
+    { "type": "pressKey", "key": "x", "modifiers": { "ctrl": true } },
+    { "type": "pressKey", "key": "b" },
+    { "type": "writeFile", "path": "logs/test-output.txt", "content": "counter.test.ts:\nExpected: 2\nReceived: 3\nStack: increment(1) returned value + 2\n" },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "thinking", "content": "The log confirms received value is one too high." }, { "type": "text", "content": "The failure log confirms the same issue numerically: expected `2`, received `3`. That maps directly to adding two instead of one." }]], "usage": { "inputTokens": 230, "outputTokens": 38, "totalTokens": 268 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Interpret this failure log and connect it to the code." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1000 },
+    { "type": "typeText", "text": "!cat logs/test-output.txt" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "writeFile", "path": "src/counter.ts", "content": "export function increment(value: number) {\n  return value + 1\n}\n" },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "text", "content": "The implementation now returns `value + 1`, so the original test should pass without changing the assertion." }]], "usage": { "inputTokens": 170, "outputTokens": 29, "totalTokens": 199 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Confirm the implementation fix after applying it." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 900 },
+    { "type": "pressKey", "key": "x", "modifiers": { "ctrl": true } },
+    { "type": "pressKey", "key": "b" },
+    { "type": "typeText", "text": "!cat src/counter.ts" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "writeFile", "path": "test/counter.edge.test.ts", "content": "import { expect, test } from 'bun:test'\nimport { increment } from '../src/counter'\n\ntest.each([[0, 1], [-1, 0], [41, 42]])('increments %p to %p', (input, output) => {\n  expect(increment(input)).toBe(output)\n})\n" },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "thinking", "content": "The edge test covers zero, negative, and positive values." }, { "type": "text", "content": "The extra table-driven test covers zero, a negative value, and a positive sentinel. It catches accidental `+2` regressions without broadening scope." }]], "usage": { "inputTokens": 210, "outputTokens": 40, "totalTokens": 250 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Review the extra edge test and explain its coverage." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1000 },
+    { "type": "typeText", "text": "!cat test/counter.edge.test.ts" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "typeText", "text": "/show thinking" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 500 },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "thinking", "content": "Produce final handoff referencing implementation, tests, and log." }, { "type": "text", "content": "Handoff: `src/counter.ts` now increments by one; `test/counter.test.ts` covers the original failure; `test/counter.edge.test.ts` adds table-driven coverage; `logs/test-output.txt` records the initial failure." }]], "usage": { "inputTokens": 230, "outputTokens": 52, "totalTokens": 282 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Give a handoff summary referencing every artifact." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1100 },
+    { "type": "pressKey", "key": "x", "modifiers": { "ctrl": true } },
+    { "type": "pressKey", "key": "b" },
+    { "type": "typeText", "text": "!ls src test logs" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 800 }
+  ]
+}
@@ -0,0 +1,59 @@
+{
+  "actions": [
+    { "type": "pressKey", "key": "x", "modifiers": { "ctrl": true } },
+    { "type": "pressKey", "key": "b" },
+    { "type": "writeFile", "path": "package.json", "content": "{\n  \"scripts\": {\n    \"test\": \"bun test\",\n    \"typecheck\": \"bun typecheck\"\n  }\n}\n" },
+    { "type": "writeFile", "path": "src/math.ts", "content": "export const add = (a: number, b: number) => a + b\n" },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "thinking", "content": "Render a tool-call-like plan as structured text." }, { "type": "text", "content": "Validation plan:\n\n```json\n{\"tool\":\"read\",\"input\":{\"filePath\":\"package.json\"}}\n```\n\nThen run `bun test` and `bun typecheck`." }]], "usage": { "inputTokens": 300, "outputTokens": 62, "totalTokens": 362 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Show me the validation plan and include the tool call you would make." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1200 },
+    { "type": "typeText", "text": "!cat package.json" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "text", "content": "The package scripts support the plan: `test` maps to `bun test` and `typecheck` maps to `bun typecheck`." }]], "usage": { "inputTokens": 150, "outputTokens": 29, "totalTokens": 179 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Does package.json support your validation plan?" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1000 },
+    { "type": "typeText", "text": "/hide thinking" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 500 },
+    { "type": "pressKey", "key": "x", "modifiers": { "ctrl": true } },
+    { "type": "pressKey", "key": "b" },
+    { "type": "writeFile", "path": "test/math.test.ts", "content": "import { expect, test } from 'bun:test'\nimport { add } from '../src/math'\n\ntest('adds two numbers', () => {\n  expect(add(1, 2)).toBe(3)\n})\n" },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "text", "content": "Structured artifact:\n\n```json\n{\"tool\":\"write\",\"path\":\"test/math.test.ts\",\"purpose\":\"verify add(1,2)\"}\n```" }]], "usage": { "inputTokens": 200, "outputTokens": 45, "totalTokens": 245 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Render the generated test as a structured tool artifact." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1000 },
+    { "type": "typeText", "text": "!cat test/math.test.ts" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "writeFile", "path": "reports/tool-transcript.json", "content": "{\n  \"steps\": [\n    {\"tool\":\"read\",\"target\":\"package.json\",\"status\":\"ok\"},\n    {\"tool\":\"read\",\"target\":\"src/math.ts\",\"status\":\"ok\"},\n    {\"tool\":\"write\",\"target\":\"test/math.test.ts\",\"status\":\"ok\"}\n  ],\n  \"next\": \"bun test test/math.test.ts\"\n}\n" },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "thinking", "content": "Explain transcript and next command." }, { "type": "text", "content": "The transcript records read/read/write tool-style steps, ending with a focused test file. The next command is `bun test test/math.test.ts`." }]], "usage": { "inputTokens": 190, "outputTokens": 38, "totalTokens": 228 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Explain the tool transcript and next command." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1000 },
+    { "type": "pressKey", "key": "x", "modifiers": { "ctrl": true } },
+    { "type": "pressKey", "key": "b" },
+    { "type": "typeText", "text": "!cat reports/tool-transcript.json" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "writeFile", "path": "reports/coverage-notes.md", "content": "# Coverage Notes\n\n- add(1, 2) covers the happy path.\n- add(-1, 1) should be added if signed values matter.\n- add(0, 0) should be added if identity behavior matters.\n" },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "text", "content": "Coverage notes separate required and optional work: the happy path test is required; signed and identity cases are useful follow-ups." }]], "usage": { "inputTokens": 170, "outputTokens": 32, "totalTokens": 202 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Review the coverage notes and separate must-have from optional." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1000 },
+    { "type": "typeText", "text": "!cat reports/coverage-notes.md" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "typeText", "text": "/show thinking" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 500 },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "thinking", "content": "Close with generated artifacts and command." }, { "type": "text", "content": "Final checklist: `test/math.test.ts` validates `add`; `reports/tool-transcript.json` documents the flow; `reports/coverage-notes.md` lists optional cases; run `bun test test/math.test.ts`." }]], "usage": { "inputTokens": 230, "outputTokens": 50, "totalTokens": 280 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Close with a checklist of generated artifacts and command." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1100 },
+    { "type": "pressKey", "key": "x", "modifiers": { "ctrl": true } },
+    { "type": "pressKey", "key": "b" }
+  ]
+}
@@ -0,0 +1,57 @@
+{
+  "actions": [
+    { "type": "pressKey", "key": "x", "modifiers": { "ctrl": true } },
+    { "type": "pressKey", "key": "b" },
+    { "type": "writeFile", "path": "notes/toggles.md", "content": "# Toggle Notes\n\nExercise prompt command toggles and verify responses remain readable.\n" },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "thinking", "content": "Start with thinking visible and explain test purpose." }, { "type": "text", "content": "This walkthrough checks thinking visibility toggles, shell output, prompt follow-ups, and sidebar open/close behavior through `ctrl+x b`." }]], "usage": { "inputTokens": 210, "outputTokens": 40, "totalTokens": 250 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "We are testing prompt command toggles. Explain the scenario." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1100 },
+    { "type": "typeText", "text": "/hide thinking" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 500 },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "text", "content": "Thinking should now be hidden. The visible answer should still be useful and concise." }]], "usage": { "inputTokens": 120, "outputTokens": 23, "totalTokens": 143 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Confirm thinking is hidden and continue." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 900 },
+    { "type": "typeText", "text": "!cat notes/toggles.md" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "pressKey", "key": "x", "modifiers": { "ctrl": true } },
+    { "type": "pressKey", "key": "b" },
+    { "type": "writeFile", "path": "notes/command-palette.md", "content": "# Command Palette Exercise\n\nActions to simulate:\n- hide thinking\n- show thinking\n- read shell output\n- summarize transcript\n" },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "thinking", "content": "Script uses slash command text as command-palette stand-in." }, { "type": "text", "content": "The command-palette exercise documents hide/show thinking, shell reads, and transcript summaries. The script also toggles the sidebar with `ctrl+x b`." }]], "usage": { "inputTokens": 220, "outputTokens": 40, "totalTokens": 260 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Explain the command-palette exercise file." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1000 },
+    { "type": "typeText", "text": "!cat notes/command-palette.md" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "writeFile", "path": "notes/transcript.json", "content": "{\n  \"mode\": \"command-toggle\",\n  \"events\": [\"hide-thinking\", \"shell-cat\", \"show-thinking\", \"sidebar-toggle\"],\n  \"expected\": {\"cwd\": \"/opencode\", \"status\": \"readable\"}\n}\n" },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "text", "content": "The transcript JSON records expected events and asserts `/opencode` as the shell working directory. It is useful for deterministic replay checks." }]], "usage": { "inputTokens": 150, "outputTokens": 33, "totalTokens": 183 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Interpret the transcript JSON." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 900 },
+    { "type": "pressKey", "key": "x", "modifiers": { "ctrl": true } },
+    { "type": "pressKey", "key": "b" },
+    { "type": "typeText", "text": "!cat notes/transcript.json" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "typeText", "text": "/show thinking" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 500 },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "thinking", "content": "Show-thinking checkpoint should include reasoning." }, { "type": "text", "content": "Show-thinking checkpoint: reasoning is visible again, and the transcript includes toggles, shell reads, and sidebar interaction." }]], "usage": { "inputTokens": 150, "outputTokens": 35, "totalTokens": 185 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Give a show-thinking checkpoint summary." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 900 },
+    { "type": "typeText", "text": "!ls notes" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "pressKey", "key": "x", "modifiers": { "ctrl": true } },
+    { "type": "pressKey", "key": "b" },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "thinking", "content": "Final summary references all notes." }, { "type": "text", "content": "Final walkthrough summary: `notes/toggles.md`, `notes/command-palette.md`, and `notes/transcript.json` document the command-toggle flow and expected shell context." }]], "usage": { "inputTokens": 190, "outputTokens": 39, "totalTokens": 229 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Summarize the notes directory artifacts." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1000 }
+  ]
+}
@@ -0,0 +1,57 @@
+{
+  "actions": [
+    { "type": "pressKey", "key": "x", "modifiers": { "ctrl": true } },
+    { "type": "pressKey", "key": "b" },
+    { "type": "writeFile", "path": "src/profile.ts", "content": "export interface Profile {\n  id: string\n  name: string\n}\n\nexport function label(profile: Profile) {\n  return profile.id\n}\n" },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "thinking", "content": "label should use name, not id." }, { "type": "text", "content": "The smallest change is in `src/profile.ts`: update `label(profile)` to return `profile.name` instead of `profile.id`." }]], "usage": { "inputTokens": 250, "outputTokens": 36, "totalTokens": 286 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "I want profile labels to show names, not ids. What should change?" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1100 },
+    { "type": "typeText", "text": "!cat src/profile.ts" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "typeText", "text": "/hide thinking" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 500 },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "text", "content": "Patch shape:\n```diff\n-  return profile.id\n+  return profile.name\n```" }]], "usage": { "inputTokens": 115, "outputTokens": 27, "totalTokens": 142 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Render the edit as a tiny diff only." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 900 },
+    { "type": "writeFile", "path": "src/profile.ts", "content": "export interface Profile {\n  id: string\n  name: string\n}\n\nexport function label(profile: Profile) {\n  return profile.name\n}\n" },
+    { "type": "typeText", "text": "!cat src/profile.ts" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "pressKey", "key": "x", "modifiers": { "ctrl": true } },
+    { "type": "pressKey", "key": "b" },
+    { "type": "writeFile", "path": "test/profile.test.ts", "content": "import { expect, test } from 'bun:test'\nimport { label, type Profile } from '../src/profile'\n\ntest('labels a profile with its name', () => {\n  const profile: Profile = { id: 'u_123', name: 'Ada Lovelace' }\n  expect(label(profile)).toBe('Ada Lovelace')\n})\n" },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "thinking", "content": "Different id/name values catch the bug." }, { "type": "text", "content": "The profile test is targeted: the id and name differ, so returning `profile.id` fails while returning `profile.name` passes." }]], "usage": { "inputTokens": 210, "outputTokens": 37, "totalTokens": 247 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Review the new profile test and explain why it catches the bug." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1000 },
+    { "type": "typeText", "text": "!cat test/profile.test.ts" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "writeFile", "path": "docs/profile-change.json", "content": "{\n  \"change\": \"profile-label-name\",\n  \"files\": [\"src/profile.ts\", \"test/profile.test.ts\"],\n  \"behavior\": {\"before\": \"id\", \"after\": \"name\"},\n  \"risk\": \"low\"\n}\n" },
+    { "type": "pressKey", "key": "x", "modifiers": { "ctrl": true } },
+    { "type": "pressKey", "key": "b" },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "text", "content": "The change metadata records a low-risk behavior shift from id labels to name labels, touching implementation and test files." }]], "usage": { "inputTokens": 170, "outputTokens": 31, "totalTokens": 201 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Interpret the profile change metadata." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 900 },
+    { "type": "typeText", "text": "!cat docs/profile-change.json" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "typeText", "text": "/show thinking" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 500 },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "thinking", "content": "Final summary should mention implementation, test, and metadata." }, { "type": "text", "content": "Final handoff: `src/profile.ts` returns `profile.name`, `test/profile.test.ts` verifies id/name distinction, and `docs/profile-change.json` records the low-risk behavior change." }]], "usage": { "inputTokens": 210, "outputTokens": 44, "totalTokens": 254 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Give a complete final handoff summary." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1000 },
+    { "type": "pressKey", "key": "x", "modifiers": { "ctrl": true } },
+    { "type": "pressKey", "key": "b" },
+    { "type": "typeText", "text": "!ls src test docs" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 800 }
+  ]
+}
@@ -0,0 +1,65 @@
+{
+  "actions": [
+    { "type": "pressKey", "key": "x", "modifiers": { "ctrl": true } },
+    { "type": "pressKey", "key": "b" },
+    { "type": "writeFile", "path": "docs/design.md", "content": "# Queue Design\n\nJobs are pushed to an in-memory array and workers poll every second.\n" },
+    { "type": "writeFile", "path": "src/queue.ts", "content": "const jobs: string[] = []\n\nexport function push(job: string) { jobs.push(job) }\nexport function next() { return jobs.shift() }\n" },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "thinking", "content": "Identify reliability and latency risks." }, { "type": "text", "content": "Design review: the in-memory queue is simple and fast, but loses jobs on restart. Polling every second adds latency. Production needs durability, leases, and metrics." }]], "usage": { "inputTokens": 340, "outputTokens": 50, "totalTokens": 390 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Review the queue design and call out production risks." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1200 },
+    { "type": "typeText", "text": "!cat docs/design.md" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "typeText", "text": "!cat src/queue.ts" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "typeText", "text": "/hide thinking" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 500 },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "text", "content": "Production checklist: durable storage, retry metadata, visibility timeout or leases, queue-depth metrics, and lower-latency notifications if needed." }]], "usage": { "inputTokens": 160, "outputTokens": 33, "totalTokens": 193 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Turn that review into a production checklist." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1000 },
+    { "type": "pressKey", "key": "x", "modifiers": { "ctrl": true } },
+    { "type": "pressKey", "key": "b" },
+    { "type": "writeFile", "path": "docs/risk-matrix.json", "content": "{\n  \"risks\": [\n    {\"name\":\"job-loss\",\"severity\":\"high\",\"mitigation\":\"durable storage\"},\n    {\"name\":\"polling-latency\",\"severity\":\"medium\",\"mitigation\":\"notifications\"},\n    {\"name\":\"duplicate-work\",\"severity\":\"medium\",\"mitigation\":\"leases\"}\n  ]\n}\n" },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "thinking", "content": "Rank risks from matrix." }, { "type": "text", "content": "Highest risk: job loss because the queue is in-memory. Medium risks: polling latency and duplicate work, mitigated by notifications and leases." }]], "usage": { "inputTokens": 240, "outputTokens": 40, "totalTokens": 280 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Analyze the risk matrix and rank the risks." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1000 },
+    { "type": "typeText", "text": "!cat docs/risk-matrix.json" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "writeFile", "path": "test/queue.test.ts", "content": "import { expect, test } from 'bun:test'\nimport { next, push } from '../src/queue'\n\ntest('returns jobs in fifo order', () => {\n  push('first')\n  push('second')\n  expect(next()).toBe('first')\n  expect(next()).toBe('second')\n})\n" },
+    { "type": "pressKey", "key": "x", "modifiers": { "ctrl": true } },
+    { "type": "pressKey", "key": "b" },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "text", "content": "The FIFO test guards current behavior. It does not cover durability, retries, leases, or cross-process behavior." }]], "usage": { "inputTokens": 180, "outputTokens": 28, "totalTokens": 208 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Review the FIFO test and explain what it does not cover." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 900 },
+    { "type": "typeText", "text": "!cat test/queue.test.ts" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "writeFile", "path": "docs/decision-record.md", "content": "# ADR: Keep In-Memory Queue for Demo\n\nStatus: Accepted for demo only\n\nContext: The current queue is in-memory and non-durable.\n\nDecision: Keep it for demos, add FIFO tests, and document production risks.\n\nConsequences: Restart loses jobs; production needs durable storage.\n" },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "thinking", "content": "Summarize ADR context, decision, and consequences." }, { "type": "text", "content": "The ADR accepts the in-memory queue for demo use only, adds FIFO tests, and documents that production requires durable storage." }]], "usage": { "inputTokens": 220, "outputTokens": 33, "totalTokens": 253 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Summarize the ADR and its consequences." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 900 },
+    { "type": "pressKey", "key": "x", "modifiers": { "ctrl": true } },
+    { "type": "pressKey", "key": "b" },
+    { "type": "typeText", "text": "!cat docs/decision-record.md" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 700 },
+    { "type": "typeText", "text": "/show thinking" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 500 },
+    { "type": "enqueueLLM", "scripts": [{ "steps": [[{ "type": "thinking", "content": "Final handoff references design doc, risk matrix, FIFO test, and ADR." }, { "type": "text", "content": "Final handoff: `docs/design.md` describes the queue, `docs/risk-matrix.json` ranks risks, `test/queue.test.ts` covers FIFO behavior, and `docs/decision-record.md` limits the design to demo use." }]], "usage": { "inputTokens": 250, "outputTokens": 52, "totalTokens": 302 }, "finish": "stop" }] },
+    { "type": "typeText", "text": "Give a final handoff summary of all artifacts." },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 1100 },
+    { "type": "typeText", "text": "!ls docs test src" },
+    { "type": "pressEnter" },
+    { "type": "wait", "ms": 800 }
+  ]
+}