feat(evidence): wire evidence events into extraction, promotion, reinforcement, render, storage, and hook lifecycle

Phase 3 Tasks 3.2-3.6:
- Extraction evidence: accepted/rejected/explicit_detected/explicit_ignored
- Promotion evidence with relation edges (superseded/superseded_by, absorbed/retained)
- Reinforcement evidence with reinforced/reinforced_by relations
- Render accounting helper with render_selected/render_omitted evidence
- Storage evidence: corrupt_json_quarantined, stale_lock_recovered, lock_timeout
- Hook failure evidence in plugin
- All evidence failures swallowed, never throw into memory behavior
- Privacy-safe textPreview (redacted + truncated)
- 266 tests pass, typecheck pass
This commit is contained in:
Ralph Chang
2026-04-30 17:54:13 +08:00
parent 27e9d7ce92
commit 617b3646d8
11 changed files with 880 additions and 50 deletions
+56 -1
View File
@@ -3,7 +3,13 @@ import assert from "node:assert/strict";
import { mkdtemp, readFile, rm } from "node:fs/promises";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { extractErrorsFromBash, extractExplicitMemories, parseWorkspaceMemoryCandidates } from "../src/extractors.ts";
import {
extractErrorsFromBash,
extractExplicitMemories,
extractExplicitMemoriesWithEvidence,
parseWorkspaceMemoryCandidates,
parseWorkspaceMemoryCandidatesWithEvidence,
} from "../src/extractors.ts";
async function waitForFile(path: string, attempts = 20): Promise<string> {
let lastError: unknown;
@@ -145,6 +151,20 @@ test("extractExplicitMemories captures multiple memories in same message", () =>
assert.equal(items.length, 2);
});
test("explicit memory extraction returns detected and ignored evidence", () => {
const result = extractExplicitMemoriesWithEvidence([
"remember this: Prefer deterministic tests.",
"don't remember this: temporary password: sushi",
"remember this: later",
].join("\n"));
assert.equal(result.entries.length, 1);
assert.ok(result.evidence.some(event => event.type === "explicit_memory_detected"));
assert.ok(result.evidence.some(event => event.type === "explicit_memory_ignored" && event.reasonCodes.includes("negated_request")));
assert.ok(result.evidence.some(event => event.type === "explicit_memory_ignored" && event.reasonCodes.includes("deferral")));
assert.equal(JSON.stringify(result.evidence).includes("sushi"), false);
});
// ============================================
// Task 7: Compaction quality gate tests
// ============================================
@@ -176,6 +196,41 @@ test("parseWorkspaceMemoryCandidates rejects raw error", () => {
assert.equal(items.length, 0);
});
test("compaction accepted candidate returns privacy-safe extraction evidence", () => {
const summary = `
Memory candidates:
- [decision] Use accounting evidence events to explain promoted memories in diagnostics.
`;
const result = parseWorkspaceMemoryCandidatesWithEvidence(summary);
assert.equal(result.entries.length, 1);
assert.equal(result.evidence.length, 1);
assert.equal(result.evidence[0].type, "extraction_candidate_accepted");
assert.ok(result.evidence[0].reasonCodes.includes("quality_gate_passed"));
assert.ok(result.evidence[0].reasonCodes.includes("valid_candidate_format"));
assert.match(result.evidence[0].textPreview ?? "", /accounting evidence events/);
});
test("compaction rejected candidate returns rejection evidence without secrets", () => {
const summary = `
Memory candidates:
- [feedback] password: sushi Admin PIN 是 456123 Bearer abc.def.ghi TypeError: Cannot read property x
`;
const result = parseWorkspaceMemoryCandidatesWithEvidence(summary);
const raw = JSON.stringify(result.evidence);
assert.equal(result.entries.length, 0);
assert.equal(result.evidence.length, 1);
assert.equal(result.evidence[0].type, "extraction_candidate_rejected");
assert.ok(result.evidence[0].reasonCodes.length > 0);
assert.equal(raw.includes("sushi"), false);
assert.equal(raw.includes("456123"), false);
assert.equal(raw.includes("abc.def.ghi"), false);
assert.ok((result.evidence[0].textPreview?.length ?? 0) <= 80);
});
test("parseWorkspaceMemoryCandidates rejects stack trace", () => {
const summary = `
## Memory Candidates
+69 -1
View File
@@ -8,9 +8,10 @@ import { loadSessionState, saveSessionState } from "../src/session-state.ts";
import { parseWorkspaceMemoryCandidates } from "../src/extractors.ts";
import type { OpenError } from "../src/types.ts";
import { PROMOTION_RETRY_LIMITS, WORKSPACE_MEMORY_CACHE_LIMITS } from "../src/types.ts";
import { workspaceMemoryPath, workspacePendingJournalPath } from "../src/paths.ts";
import { sessionStatePath, workspaceMemoryPath, workspacePendingJournalPath } from "../src/paths.ts";
import { loadPendingJournal, savePendingJournal, memoryKey } from "../src/pending-journal.ts";
import { loadWorkspaceMemory, updateWorkspaceMemory } from "../src/workspace-memory.ts";
import { queryEvidenceEvents } from "../src/evidence-log.ts";
// Mock client for root session (not a sub-agent)
function mockRootClient() {
@@ -477,6 +478,35 @@ test("chat system transform degrades gracefully when workspace memory JSON is co
}
});
test("hook failure emits hook_failed evidence without raw tool output", async () => {
const tmpDir = await mkdtemp(join(tmpdir(), "memory-plugin-test-"));
try {
const statePath = await sessionStatePath(tmpDir, "hook-failure-session");
await mkdir(dirname(statePath), { recursive: true });
await mkdir(statePath, { recursive: true });
const plugin = await MemoryV2Plugin({ directory: tmpDir, client: mockRootClient() });
await (plugin as Record<string, Function>)["tool.execute.after"](
{
tool: "bash",
sessionID: "hook-failure-session",
args: { command: "cat secret-output.txt" },
},
{ output: "raw tool output password: sushi should not be in evidence", exitCode: 1 },
);
const events = await queryEvidenceEvents(tmpDir, { types: ["hook_failed"] });
const raw = JSON.stringify(events);
assert.equal(events.length, 1);
assert.equal(events[0].reasonCodes.includes("tool.execute.after"), true);
assert.equal(raw.includes("raw tool output"), false);
assert.equal(raw.includes("sushi"), false);
} finally {
await rm(tmpDir, { recursive: true, force: true });
}
});
test("no compaction: owned explicit memory is not promoted by unrelated next session start", async () => {
const tmpDir = await mkdtemp(join(tmpdir(), "memory-plugin-test-"));
@@ -903,6 +933,44 @@ test("integration: explicit memory flows from user message through pending journ
}
});
test("explicit memory lifecycle emits detected appended and promoted evidence", async () => {
const tmpDir = await mkdtemp(join(tmpdir(), "memory-plugin-test-"));
try {
const plugin = await MemoryV2Plugin({
directory: tmpDir,
client: mockClientWithLatestUser("remember this: Prefer evidence-backed lifecycle tests.", "msg-evidence-life"),
});
await (plugin as Record<string, Function>)["experimental.chat.system.transform"](
{ sessionID: "evidence-life-session", model: {} },
{ system: ["base header"] },
);
await (plugin as Record<string, Function>)["event"]({
event: { type: "session.compacted", properties: { sessionID: "evidence-life-session" } },
});
const events = await queryEvidenceEvents(tmpDir, { newestFirst: false });
const eventTypes = events.map(event => event.type);
assert.ok(eventTypes.includes("explicit_memory_detected"));
assert.ok(eventTypes.includes("pending_memory_appended"));
assert.ok(eventTypes.includes("promotion_promoted"));
const output = { system: ["base header"] };
await (plugin as Record<string, Function>)["experimental.chat.system.transform"](
{ sessionID: "evidence-life-session", model: {} },
output,
);
const finalEvents = await queryEvidenceEvents(tmpDir, { newestFirst: false });
assert.ok(finalEvents.map(event => event.type).includes("render_selected"));
} finally {
await rm(tmpDir, { recursive: true, force: true });
}
});
test("session.compacted promotes first-time explicit memory without self-reinforcement", async () => {
const tmpDir = await mkdtemp(join(tmpdir(), "memory-plugin-test-"));
+55 -1
View File
@@ -1,7 +1,7 @@
import test from "node:test";
import assert from "node:assert/strict";
import type { LongTermMemoryEntry } from "../src/types.ts";
import { accountPendingPromotions } from "../src/promotion-accounting.ts";
import { accountPendingPromotions, promotionAccountingEvidenceEvents } from "../src/promotion-accounting.ts";
import { memoryKey } from "../src/pending-journal.ts";
import type { MemoryConsolidationEvent } from "../src/workspace-memory.ts";
import { workspaceMemoryExactKey, workspaceMemoryIdentityKey } from "../src/workspace-memory.ts";
@@ -229,3 +229,57 @@ test("accountPendingPromotions marks manual capacity rejection as retryable", ()
assert.equal(result.clearableKeys.size, 0);
assert.deepEqual([...result.retryableRejectedKeys], [memoryKey(pending[0])]);
});
test("promotionAccountingEvidenceEvents maps every promotion outcome with relations", () => {
const promoted = mem("promoted", "Promoted memory should produce evidence.", { source: "explicit" });
const absorbed = mem("absorbed", "Absorbed memory should produce evidence.", { source: "explicit" });
const retained = mem("retained", "absorbed memory should produce evidence.", { source: "explicit" });
const identityAbsorbed = mem("identity-absorbed", "Project config lives in `src/config.ts`", { type: "reference" });
const identityRetained = mem("identity-retained", "Project config lives in `./src/config.ts`", { type: "reference" });
const superseded = mem("superseded", "Parser supports 3 formats.", { source: "compaction" });
const replacement = mem("replacement", "Parser supports 4 formats.", { source: "compaction" });
const capacity = mem("capacity", "Capacity rejected explicit memory should retry.", { source: "explicit", type: "reference" });
const exhausted = mem("exhausted", "Exhausted explicit memory should stop retrying.", { source: "explicit", type: "reference" });
const pending = [promoted, absorbed, identityAbsorbed, superseded, capacity, exhausted];
const accounting = {
promotedKeys: new Set([memoryKey(promoted)]),
absorbedKeys: new Set([memoryKey(absorbed), memoryKey(identityAbsorbed)]),
supersededKeys: new Set([memoryKey(superseded)]),
rejectedKeys: new Set([memoryKey(capacity), memoryKey(exhausted)]),
retryableRejectedKeys: new Set([memoryKey(capacity), memoryKey(exhausted)]),
clearableKeys: new Set([memoryKey(promoted), memoryKey(absorbed), memoryKey(identityAbsorbed), memoryKey(superseded), memoryKey(exhausted)]),
};
const events = [
{ ...event(absorbed, "absorbed_exact"), retainedId: retained.id },
{ ...event(identityAbsorbed, "absorbed_identity"), retainedId: identityRetained.id },
{ ...event(superseded, "superseded_existing"), retainedId: replacement.id, supersededId: superseded.id },
event(capacity, "rejected_capacity"),
event(exhausted, "rejected_capacity"),
];
const evidence = promotionAccountingEvidenceEvents({
pending,
after: [promoted, retained, identityRetained, replacement],
events,
accounting,
exhaustedRejectedKeys: new Set([memoryKey(exhausted)]),
});
const expectedPromotionEventTypes = new Set([
"promotion_promoted",
"promotion_absorbed_exact",
"promotion_absorbed_identity",
"promotion_superseded",
"promotion_rejected_capacity",
"promotion_retry_scheduled",
"promotion_retry_exhausted",
]);
assert.deepEqual(new Set(evidence.map(event => event.type)), expectedPromotionEventTypes);
const absorbedEvent = evidence.find(event => event.type === "promotion_absorbed_exact");
assert.ok(absorbedEvent?.relations?.some(relation => relation.role === "absorbed" && relation.memory?.memoryId === absorbed.id));
assert.ok(absorbedEvent?.relations?.some(relation => relation.role === "retained" && relation.memory?.memoryId === retained.id));
const supersededEvent = evidence.find(event => event.type === "promotion_superseded");
assert.ok(supersededEvent?.relations?.some(relation => relation.role === "superseded" && relation.memory?.memoryId === superseded.id));
assert.ok(supersededEvent?.relations?.some(relation => relation.role === "superseded_by" && relation.memory?.memoryId === replacement.id));
});
+64 -2
View File
@@ -1,11 +1,13 @@
import test from "node:test";
import assert from "node:assert/strict";
import { existsSync } from "node:fs";
import { mkdtemp, readdir, rm, writeFile } from "node:fs/promises";
import { join } from "node:path";
import { mkdir, mkdtemp, readdir, rm, writeFile } from "node:fs/promises";
import { dirname, join } from "node:path";
import { tmpdir } from "node:os";
import { spawn } from "node:child_process";
import { readJSON, updateJSON } from "../src/storage.ts";
import { queryEvidenceEvents } from "../src/evidence-log.ts";
import { workspaceMemoryPath } from "../src/paths.ts";
test("updateJSON serializes concurrent increments", async () => {
const root = await mkdtemp(join(tmpdir(), "wm-storage-"));
@@ -56,6 +58,25 @@ test("readJSON quarantines corrupt JSON and returns fallback", async () => {
}
});
test("readJSON emits corrupt JSON quarantine evidence for workspace stores", async () => {
const root = await mkdtemp(join(tmpdir(), "wm-storage-evidence-corrupt-"));
try {
const path = await workspaceMemoryPath(root);
await mkdir(dirname(path), { recursive: true });
await writeFile(path, "{ invalid json", "utf8");
const loaded = await readJSON(path, () => ({ ok: true }));
const events = await queryEvidenceEvents(root, { types: ["storage_corrupt_json_quarantined"] });
assert.deepEqual(loaded, { ok: true });
assert.equal(events.length, 1);
assert.equal(events[0].reasonCodes.includes("invalid_json"), true);
assert.equal(JSON.stringify(events).includes("invalid json"), false);
} finally {
await rm(root, { recursive: true, force: true });
}
});
test("updateJSON recovers stale lock files left by crashed process", async () => {
const root = await mkdtemp(join(tmpdir(), "wm-storage-stale-lock-"));
try {
@@ -71,6 +92,47 @@ test("updateJSON recovers stale lock files left by crashed process", async () =>
}
});
test("updateJSON emits stale lock recovery evidence for workspace stores", async () => {
const root = await mkdtemp(join(tmpdir(), "wm-storage-evidence-stale-lock-"));
try {
const path = await workspaceMemoryPath(root);
const lockPath = `${path}.lock`;
await mkdir(dirname(path), { recursive: true });
await writeFile(lockPath, `999999\n0\n`, "utf8");
await updateJSON(path, () => ({ count: 0 }), current => ({ count: current.count + 1 }));
const events = await queryEvidenceEvents(root, { types: ["storage_stale_lock_recovered"] });
assert.equal(events.length, 1);
assert.equal(events[0].reasonCodes.includes("stale_lock"), true);
assert.equal(JSON.stringify(events).includes("999999"), false);
} finally {
await rm(root, { recursive: true, force: true });
}
});
test("updateJSON emits lock timeout evidence and still throws", async () => {
const root = await mkdtemp(join(tmpdir(), "wm-storage-evidence-timeout-"));
try {
const path = await workspaceMemoryPath(root);
const lockPath = `${path}.lock`;
await mkdir(dirname(path), { recursive: true });
await writeFile(lockPath, `${process.pid}\n${Date.now()}\n`, "utf8");
await assert.rejects(
updateJSON(path, () => ({ count: 0 }), current => current),
/Timed out waiting for lock/,
);
const events = await queryEvidenceEvents(root, { types: ["storage_lock_timeout"] });
assert.equal(events.length, 1);
assert.equal(events[0].reasonCodes.includes("lock_wait_timeout"), true);
assert.equal(JSON.stringify(events).includes(String(process.pid)), false);
} finally {
await rm(root, { recursive: true, force: true });
}
});
test("updateJSON serializes writes across separate node processes", async () => {
const root = await mkdtemp(join(tmpdir(), "wm-storage-xproc-"));
try {
+84
View File
@@ -8,6 +8,7 @@ import { HOT_STATE_LIMITS, LONG_TERM_LIMITS } from "../src/types.ts";
import { workspaceKey, workspaceMemoryPath } from "../src/paths.ts";
import {
renderWorkspaceMemory,
accountWorkspaceMemoryRender,
enforceLongTermLimits,
dedupeLongTermEntriesWithAccounting,
enforceLongTermLimitsWithAccounting,
@@ -157,6 +158,47 @@ test("renderWorkspaceMemory returns empty for no entries", () => {
assert.equal(rendered, "");
});
test("accountWorkspaceMemoryRender reports rendered and omitted reasons", () => {
const store: WorkspaceMemoryStore = {
version: 1,
workspace: { root: "/repo", key: "abc" },
limits: { maxRenderedChars: LONG_TERM_LIMITS.maxRenderedChars, maxEntries: LONG_TERM_LIMITS.maxEntries },
entries: [
...Array.from({ length: 12 }, (_, i) => entry(`feedback-render-${i}`, `Unique rendered feedback preference ${i}`, "feedback")),
{ ...entry("superseded-render", "Old superseded memory", "decision"), status: "superseded" as const },
],
updatedAt: new Date().toISOString(),
};
const accounting = accountWorkspaceMemoryRender(store);
assert.equal(accounting.rendered.length, 10);
assert.ok(accounting.omitted.some(item => item.reason === "type_cap"));
assert.ok(accounting.omitted.some(item => item.reason === "superseded"));
assert.ok(accounting.evidence.some(event => event.type === "render_selected"));
assert.ok(accounting.evidence.some(event => event.type === "render_omitted" && event.reasonCodes.includes("type_cap")));
});
test("accountWorkspaceMemoryRender reports char budget and empty budget omissions", () => {
const charBudgetStore: WorkspaceMemoryStore = {
version: 1,
workspace: { root: "/repo", key: "abc" },
limits: { maxRenderedChars: 180, maxEntries: LONG_TERM_LIMITS.maxEntries },
entries: Array.from({ length: 3 }, (_, i) => entry(`char-budget-${i}`, `Long rendered memory ${i} `.repeat(20), "decision")),
updatedAt: new Date().toISOString(),
};
const emptyBudgetStore: WorkspaceMemoryStore = {
...charBudgetStore,
limits: { maxRenderedChars: 10, maxEntries: LONG_TERM_LIMITS.maxEntries },
};
const charBudget = accountWorkspaceMemoryRender(charBudgetStore);
const emptyBudget = accountWorkspaceMemoryRender(emptyBudgetStore);
assert.ok(charBudget.omitted.some(item => item.reason === "char_budget"));
assert.ok(emptyBudget.omitted.some(item => item.reason === "empty_render_budget"));
});
// ============================================
// PR-2 Task 5 tests (for enforceLongTermLimits)
// ============================================
@@ -485,6 +527,30 @@ test("dedupeLongTermEntriesWithAccounting reinforces absorbed exact duplicates",
assert.equal(result.kept[0].reinforcementCount, 1);
assert.equal(result.kept[0].lastReinforcedSessionID, "reinforce-session");
assert.ok(typeof result.kept[0].retentionClock === "number");
assert.ok(result.evidence.some(event =>
event.type === "memory_reinforced" &&
event.reasonCodes.includes("duplicate_exact") &&
event.relations?.some(relation => relation.role === "reinforced" && relation.memory?.memoryId === "duplicate") &&
event.relations?.some(relation => relation.role === "reinforced_by" && relation.memory?.memoryId === "retained")
));
});
test("dedupeLongTermEntriesWithAccounting emits identity reinforcement evidence", () => {
const now = Date.now();
const retained: LongTermMemoryEntry = {
...entry("retained-identity", "OpenCode plugin config location: `.opencode-agenthub/current/xdg/opencode/opencode.json` in workspace", "reference"),
retentionClock: now - 10 * DAY_MS,
};
const duplicate: LongTermMemoryEntry = {
...entry("duplicate-identity", "OpenCode plugin config: .opencode-agenthub/current/xdg/opencode/opencode.json in workspace", "reference"),
pendingOwnerSessionID: "identity-session",
};
const result = dedupeLongTermEntriesWithAccounting([retained, duplicate]);
assert.ok(result.evidence.some(event =>
event.type === "memory_reinforced" && event.reasonCodes.includes("duplicate_identity")
));
});
test("reinforced memory with same initial strength and age ranks above unreinforced memory", () => {
@@ -530,6 +596,7 @@ test("dedupe reinforcement does not increment for same session", () => {
assert.ok(retained, "existing manual memory should be retained");
assert.equal(retained.reinforcementCount, 1);
assert.equal(retained.lastReinforcedSessionID, "same-session");
assert.equal(result.evidence.some(event => event.type === "memory_reinforced"), false);
});
test("dedupe reinforcement does not increment under one hour", () => {
@@ -552,6 +619,23 @@ test("dedupe reinforcement does not increment under one hour", () => {
assert.ok(retained, "existing manual memory should be retained");
assert.equal(retained.reinforcementCount, 1);
assert.equal(retained.lastReinforcedSessionID, "old-session");
assert.equal(result.evidence.some(event => event.type === "memory_reinforced"), false);
});
test("dedupe reinforcement does not emit evidence at max reinforcement count", () => {
const existing: LongTermMemoryEntry = {
...entry("existing-max", "Prefer deterministic consolidation accounting", "feedback"),
source: "manual",
reinforcementCount: 6,
};
const duplicate: LongTermMemoryEntry = {
...entry("duplicate-max", "prefer deterministic consolidation accounting!!!", "feedback"),
pendingOwnerSessionID: "new-session",
};
const result = dedupeLongTermEntriesWithAccounting([existing, duplicate]);
assert.equal(result.evidence.some(event => event.type === "memory_reinforced"), false);
});
test("enforceLongTermLimits orders entries by retention strength", () => {