From 8e07bfe3c1181fceca4d2ac8549c62a20d5ef927 Mon Sep 17 00:00:00 2001 From: Ralph Chang Date: Tue, 28 Apr 2026 14:29:28 +0800 Subject: [PATCH] fix(memory): address quality cleanup audit findings --- .gitignore | 3 + CHANGELOG.md | 18 +++-- RELEASE_NOTES.md | 66 ++++++++++++----- scripts/dev/dry-run-migration.ts | 47 ++++++++++-- src/extractors.ts | 11 ++- src/workspace-memory.ts | 15 +++- tests/extractors.test.ts | 29 ++++++++ tests/fixtures/real-workspaces-snapshot.ts | 58 +++++++-------- tests/workspace-memory.test.ts | 83 ++++++++++++++++++++++ 9 files changed, 267 insertions(+), 63 deletions(-) diff --git a/.gitignore b/.gitignore index d0becba..6ebddb7 100644 --- a/.gitignore +++ b/.gitignore @@ -51,3 +51,6 @@ pnpm-lock.yaml # Superpowers local planning artifacts docs/superpowers/plans/ + +# Local migration dry-run roots +scripts/dev/dry-run-roots.local.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index 66db849..9559f72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,14 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [1.4.0] - 2026-04-28 -### Memory Quality Cleanup +### Added -- Unified quality gate for compaction memory candidates and cleanup checks. +- Local migration audit log for the `2026-04-28-quality-cleanup` migration: + `~/.local/share/opencode-working-memory/migration-logs/2026-04-28-quality-cleanup.jsonl`. +- Local extraction rejection log for rejected compaction memory candidates: + `~/.local/share/opencode-working-memory/extraction-rejections.jsonl`. +- Sanitized real-workspace regression fixtures for memory cleanup migration behavior. + +### Changed + +- Unified memory quality rules in a shared quality gate for compaction memory candidates and cleanup checks. - Rewritten compaction memory prompt to reduce over-production of low-quality memories. -- Conservative one-time quality cleanup migration (`2026-04-28-quality-cleanup`) that supersedes only high-confidence garbage patterns: progress snapshots, raw errors, commit/CI snapshots, temporary status notes, active file snapshots, code/API signatures, path-heavy entries, and empty entries. -- Soft heuristic failures (`bad_feedback`, `bad_decision`) are intentionally excluded from automatic migration cleanup to protect durable declarative memories such as branding rules, API facts, release rules, and architecture decisions. -- Migration audit log: `~/.local/share/opencode-working-memory/migration-logs/2026-04-28-quality-cleanup.jsonl`. -- Extraction rejection log: `~/.local/share/opencode-working-memory/extraction-rejections.jsonl`. +- Changed quality cleanup migration to be conservative: it supersedes only high-confidence garbage patterns, including progress snapshots, raw errors, commit/CI snapshots, temporary status notes, active file snapshots, code/API signatures, path-heavy entries, and empty entries. +- Soft heuristic failures (`bad_feedback`, `bad_decision`) are intentionally excluded from automatic migration cleanup to protect durable declarative memories such as branding rules, API facts, release rules, user workflow preferences, and architecture decisions. ### Recovery note diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index de0435a..d3ad70f 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -4,37 +4,69 @@ ### Memory Quality Cleanup -This minor release automatically improves memory quality for all existing users on upgrade. Low-quality compaction memories are identified and superseded without requiring manual cleanup. +This release improves automatic workspace memory quality without risking broad cleanup of useful existing memories. + +The quality gate is now shared across compaction extraction and migration checks, the compaction prompt is stricter about what should become durable memory, and the one-time migration is intentionally conservative. ### What Changed -- **Unified quality gate**: All memory types (feedback, decision, project, reference) now share the same quality rules instead of only project entries having a quality check. -- **Hardened compaction prompt**: The model is explicitly instructed that most compactions should produce zero memories, with clear good/bad examples. -- **Auto-supersede migration**: On first load after upgrade, existing low-quality `compaction` memories are automatically marked as `superseded` with quality tags. Explicit and manual memories are never affected. +- **Unified quality rules**: memory quality checks now live in one shared module and apply consistently across feedback, decisions, project facts, and references. +- **Stricter compaction output**: the compaction prompt now tells the model to save fewer memories and prefer durable facts, user preferences, architecture decisions, and hard-to-rediscover references. +- **Conservative migration cleanup**: the `2026-04-28-quality-cleanup` migration only supersedes high-confidence garbage patterns, not every rejected memory. +- **Audit logs**: automatic migration cleanup writes local JSONL audit records so superseded entries can be inspected and restored. +- **Extraction rejection logs**: newly rejected compaction candidates are logged locally to help calibrate future quality rules. +- **Regression coverage**: migration behavior is tested against sanitized real-workspace patterns to prevent mass false positives from coming back. ### What Gets Cleaned Up -Low-quality memory patterns that are now rejected/superseded: +The migration may supersede existing `source: "compaction"` memories only when they match hard garbage patterns: -- Progress snapshots: "Wave 1 completed successfully", "180 tests passed" -- Session-internal notes: "The assistant reviewed feedback and updated the plan" -- Implementation notes: "Implemented X in plugin.ts" -- Commit/CI references: "Commit a762e86 contains the fix" +- Empty entries +- Progress snapshots, such as "Wave 1 completed successfully" +- Test or suite count snapshots, such as "180 tests passed" - Raw errors and stack traces -- Temporary status: "Currently running npm test" +- Commit or CI snapshots +- Temporary status notes, such as "Currently running npm test" +- Active file snapshots +- Code or API signatures +- Path-heavy entries that are just rediscoverable file lists + +### What Is Protected + +The migration does not supersede entries whose only issue is a soft heuristic failure, such as: + +- `bad_feedback` +- `bad_decision` + +This protects useful declarative memories like: + +- Product branding rules +- API facts +- Release rules +- Architecture decisions +- User workflow preferences + +Explicit and manual memories are also protected. ### Migration Behavior -- Runs exactly once per workspace (idempotent, non-destructive) -- Only affects `source: "compaction"` entries -- Explicit/manual memories are protected -- Superseded entries retain `status: "superseded"` and quality tags for audit -- No user action required +- Runs once per workspace. +- Only affects active `source: "compaction"` entries. +- Marks matching entries as `status: "superseded"` instead of deleting them. +- Adds `quality_cleanup` and `quality:` tags to superseded entries. +- Writes audit logs to: + `~/.local/share/opencode-working-memory/migration-logs/2026-04-28-quality-cleanup.jsonl` +- Writes extraction rejection logs to: + `~/.local/share/opencode-working-memory/extraction-rejections.jsonl` + +### Recovery + +If a useful memory is superseded, inspect the migration audit log and restore the entry by changing its status back to `"active"` in the workspace's `workspace-memory.json`. ### Upgrade Notes - No configuration changes required. -- Existing workspace memory files are automatically cleaned on first load. +- Existing workspace memory files remain compatible. - The OpenCode config entry stays the same: ```json @@ -45,7 +77,7 @@ Low-quality memory patterns that are now rejected/superseded: ### Validation -- `npm test` (196 tests) +- `npm test` - `npm run typecheck` --- diff --git a/scripts/dev/dry-run-migration.ts b/scripts/dev/dry-run-migration.ts index 807411d..76a2e4f 100644 --- a/scripts/dev/dry-run-migration.ts +++ b/scripts/dev/dry-run-migration.ts @@ -1,12 +1,45 @@ +/** + * Local helper to trigger migration on workspace roots. + * + * Usage: + * MIGRATION_DRY_RUN_ROOTS=/path/a:/path/b bun run scripts/dev/dry-run-migration.ts + * + * Or create a local file (gitignored): + * echo "/path/to/workspace1" > scripts/dev/dry-run-roots.local.txt + * echo "/path/to/workspace2" >> scripts/dev/dry-run-roots.local.txt + * bun run scripts/dev/dry-run-migration.ts + */ + +import { existsSync } from "node:fs"; +import { readFile } from "node:fs/promises"; +import { join } from "node:path"; import { loadWorkspaceMemory } from "../../src/workspace-memory.ts"; -const roots = [ - "/Users/sd_wo/work/opencode-working-memory", - "/Users/sd_wo/Documents/projects/Pre-cancer-atlas", - "/Users/sd_wo/work/opencode-record", - "/Users/sd_wo/work/pathology-agent-reports", - "/Users/sd_wo/work/pathology-extraction", -]; +async function getRoots(): Promise { + // Priority 1: environment variable + const envRoots = process.env.MIGRATION_DRY_RUN_ROOTS; + if (envRoots) { + return envRoots.split(":").filter(root => root.length > 0); + } + + // Priority 2: local file + const localFile = join(import.meta.dirname, "dry-run-roots.local.txt"); + if (existsSync(localFile)) { + const content = await readFile(localFile, "utf8"); + return content.trim().split("\n").filter(root => root.length > 0); + } + + // No roots configured + console.log("No workspace roots configured."); + console.log("Set MIGRATION_DRY_RUN_ROOTS=/path/a:/path/b or create dry-run-roots.local.txt"); + return []; +} + +const roots = await getRoots(); + +if (roots.length === 0) { + process.exit(0); +} for (const root of roots) { console.log(`Loading workspace memory: ${root}`); diff --git a/src/extractors.ts b/src/extractors.ts index 1fd4514..39feaa4 100644 --- a/src/extractors.ts +++ b/src/extractors.ts @@ -248,6 +248,15 @@ async function logExtractionRejection(entry: ExtractionRejectionLogEntry): Promi } } +function redactSensitiveText(text: string): string { + return text + .replace(/bearer\s+[a-zA-Z0-9._-]+/gi, "bearer [REDACTED]") + .replace(/token[=:]\s*[a-zA-Z0-9._-]+/gi, "token=[REDACTED]") + .replace(/password[=:]\s*[a-zA-Z0-9._-]+/gi, "password=[REDACTED]") + .replace(/secret[=:]\s*[a-zA-Z0-9._-]+/gi, "secret=[REDACTED]") + .replace(/api[-_]?key[=:]\s*[a-zA-Z0-9._-]+/gi, "api_key=[REDACTED]"); +} + function shouldAcceptWorkspaceMemoryCandidate( entry: { type: LongTermType; @@ -278,7 +287,7 @@ function shouldAcceptWorkspaceMemoryCandidate( void logExtractionRejection({ timestamp: new Date().toISOString(), type: entry.type, - text, + text: redactSensitiveText(text), reasons: quality.reasons, source: "compaction", }); diff --git a/src/workspace-memory.ts b/src/workspace-memory.ts index 15a978e..78083bd 100644 --- a/src/workspace-memory.ts +++ b/src/workspace-memory.ts @@ -208,14 +208,23 @@ export async function normalizeWorkspaceMemoryWithAccounting( // One-time migrations for legacy/low-quality snapshot violations. // Run quality cleanup first so hard violations receive quality audit tags // before the older P0 project-only cleanup marks progress snapshots. + const beforeQualityCleanup = result; const qualityCleanup = runMigrationQualityCleanup(result, nowIso); result = qualityCleanup.store; + let skipRemainingMigrations = false; if (qualityCleanup.events.length > 0) { - await appendQualityCleanupMigrationLog(qualityCleanup.events).catch(error => { + try { + await appendQualityCleanupMigrationLog(qualityCleanup.events); + } catch (error) { console.error("[memory] failed to write quality cleanup migration log:", error); - }); + console.error("[memory] aborting migration to maintain audit trail integrity"); + result = beforeQualityCleanup; + skipRemainingMigrations = true; + } + } + if (!skipRemainingMigrations) { + result = runMigrationP0Cleanup(result, nowIso); } - result = runMigrationP0Cleanup(result, nowIso); // P0 accounting only considers active entries. Entries that were already // superseded before this normalization are preserved in storage; entries that diff --git a/tests/extractors.test.ts b/tests/extractors.test.ts index 6c53563..1818d67 100644 --- a/tests/extractors.test.ts +++ b/tests/extractors.test.ts @@ -324,6 +324,35 @@ Memory candidates: } }); +test("parseWorkspaceMemoryCandidates redacts secrets in extraction rejection log", async () => { + const dataHome = await mkdtemp(join(tmpdir(), "wm-extraction-redact-data-")); + const previousXdgDataHome = process.env.XDG_DATA_HOME; + process.env.XDG_DATA_HOME = dataHome; + + try { + const summary = ` +Memory candidates: +- reference TypeError: bearer sk_test token=tok123 password=pass123 secret=sec123 api_key=key123 +`; + + const items = parseWorkspaceMemoryCandidates(summary); + + assert.equal(items.length, 0); + const logPath = join(dataHome, "opencode-working-memory", "extraction-rejections.jsonl"); + const lines = (await waitForFile(logPath)).trim().split("\n"); + assert.equal(lines.length, 1); + const event = JSON.parse(lines[0]); + assert.equal( + event.text, + "TypeError: bearer [REDACTED] token=[REDACTED] password=[REDACTED] secret=[REDACTED] api_key=[REDACTED]", + ); + } finally { + if (previousXdgDataHome === undefined) delete process.env.XDG_DATA_HOME; + else process.env.XDG_DATA_HOME = previousXdgDataHome; + await rm(dataHome, { recursive: true, force: true }); + } +}); + test("parseWorkspaceMemoryCandidates rejects exact file count snapshots", () => { const summary = ` Memory candidates: diff --git a/tests/fixtures/real-workspaces-snapshot.ts b/tests/fixtures/real-workspaces-snapshot.ts index 1ea0134..0bdce94 100644 --- a/tests/fixtures/real-workspaces-snapshot.ts +++ b/tests/fixtures/real-workspaces-snapshot.ts @@ -30,38 +30,38 @@ function mem( } export const REAL_WORKSPACE_FIXTURES: Record = { - "medical-atlas": [ - mem("ma_ui_rule", "feedback", "UI 要統一風格:兩個表格都要 scrollable,約 20 rows", "active", "durable UI rule without user preference keyword"), - mem("ma_csp_rule", "feedback", "架構師建議中期將 CSP 改為 nonce/hash,而非 'unsafe-inline'", "active", "durable architecture recommendation"), - mem("ma_form_rule", "decision", "Form 添加防御性 action/method 屬性,避免 JS 失效時 GET 首頁", "active", "declarative design rule"), - mem("ma_logging_rule", "decision", "Cloud Logging filter 需支援多種 log 格式(jsonPayload.event_type, jsonPayload.message, textPayload)", "active", "durable spec using 需支援"), + "workspace-alpha": [ + mem("alpha_ui_rule", "feedback", "UI should have consistent style: both tables scrollable, about 20 rows", "active", "durable UI rule without user preference keyword"), + mem("alpha_csp_rule", "feedback", "Architecture recommendation: migrate the content security policy to nonce or hash rules rather than unsafe inline scripts", "active", "durable architecture recommendation"), + mem("alpha_form_rule", "decision", "Form uses defensive action and method attributes so the fallback does not navigate to the home page when scripts fail", "active", "declarative design rule"), + mem("alpha_logging_rule", "decision", "Cloud logging filter supports multiple log formats: structured event type, structured message, and text payload", "active", "durable declarative logging spec"), ], - "opencode-record": [ - mem("or_phase_snapshot", "project", "後端健康改進計劃已完成 Phase 1-4", "superseded", "progress snapshot"), - mem("or_test_snapshot", "project", "測試套件:1237 tests pass, 226 suites", "superseded", "test count snapshot"), - mem("or_sync_snapshot", "project", "USB 同步:37 個文件(bundles, server, frontend, tests, docs)", "superseded", "file sync snapshot"), + "workspace-beta": [ + mem("beta_phase_snapshot", "project", "Backend health improvement plan completed Phase 1-4", "superseded", "progress snapshot"), + mem("beta_test_snapshot", "project", "Test suite: 1237 tests pass, 226 suites", "superseded", "test count snapshot"), + mem("beta_sync_snapshot", "project", "External drive synced 37 files including bundles, service, frontend, tests, and docs", "superseded", "file sync snapshot"), ], - "agent-reports": [ - mem("ar_plan_decision", "feedback", "架構師建議執行 P3 前先確認有實際需求", "active", "durable plan decision"), - mem("ar_reviewer_fallback", "feedback", "`comprehensive-code-reviewer` subagent unreliable; use `phase-verifier` as fallback", "active", "durable workaround rule"), - mem("ar_wave_rule", "feedback", "每個 Wave 結束要找 verifier 確認,全部結束找 code review", "active", "durable workflow rule"), - mem("ar_remote_headers", "decision", "Remote headers 透過 `requestInit: { headers }` 傳入 `StreamableHTTPClientTransport`", "active", "declarative API rule"), - mem("ar_signal_order", "decision", "Graceful process cleanup signal order: SIGINT (300ms) → SIGTERM (700ms) → SIGKILL", "active", "durable process cleanup spec"), - mem("ar_ownership", "decision", "`McpRuntimeState` ownership model: CLI owns both runtime and mcpRuntime, dispose order is runtime first", "active", "durable ownership model"), - mem("ar_retry_policy", "decision", "Recovery retry policy: only once per tool call, only for transport/session failures", "active", "durable retry policy"), + "workspace-gamma": [ + mem("gamma_need_check", "feedback", "Architecture recommendation: confirm actual demand before executing the later priority phase", "active", "durable plan decision"), + mem("gamma_review_fallback", "feedback", "Primary review automation can be unreliable; use phase verification as the fallback", "active", "durable workaround rule"), + mem("gamma_wave_rule", "feedback", "Each wave should end with verifier confirmation, and the full implementation should end with code review", "active", "durable workflow rule"), + mem("gamma_remote_headers", "decision", "Remote headers are passed through the HTTP transport request initialization headers option", "active", "declarative API rule"), + mem("gamma_signal_order", "decision", "Graceful process cleanup signal order: interrupt for 300ms, terminate for 700ms, then kill", "active", "durable process cleanup spec"), + mem("gamma_ownership", "decision", "Runtime state ownership model: the command-line entrypoint owns both runtime objects, and disposal order is primary runtime first", "active", "durable ownership model"), + mem("gamma_retry_policy", "decision", "Recovery retry policy: only once per tool call, only for transport or session failures", "active", "durable retry policy"), ], - "pdf-extraction": [ - mem("pe_user_cycle", "feedback", "User 要求完整的 plan-review-feedback-modify-verify 循環,不是直接執行", "active", "mixed-language user workflow preference"), - mem("pe_ollama_batch", "feedback", "Ollama 大批量嵌入需要控制批次大小(20-50)和請求間隔", "active", "durable operational knowledge"), - mem("pe_option_b", "decision", "Phase 2 Fix 採用 Option B:multi-profile search grouping", "active", "design decision using 採用"), - mem("pe_single_source", "decision", "MCP source 維持單一 `book`,書籍身份在 source ID", "active", "design constraint using 維持"), - mem("pe_endpoint", "decision", "Ollama endpoint is `/api/embed` (not `/api/embeddings`) with `\"input\"` field", "active", "declarative API fact"), - mem("pe_filter_pipeline", "decision", "Filter pipeline: pre-chunk filtering (not post-chunk) to prevent embedding contamination", "active", "durable architecture rule"), - mem("pe_do_not_delete", "decision", "不刪除孤立的 reference-like 行(正文中的 \"et al.\" 等是合法引用)", "active", "do-not rule not matching current 不要 pattern"), + "workspace-delta": [ + mem("delta_user_cycle", "feedback", "User requires a complete plan, review, feedback, modify, and verify loop rather than direct execution", "active", "user workflow preference"), + mem("delta_batching", "feedback", "Large-batch embedding requires controlled batch size around 20 to 50 items and a delay between requests", "active", "durable operational knowledge"), + mem("delta_option_b", "decision", "Phase 2 fix adopted Option B: grouped search across multiple profiles", "active", "design decision using adopted"), + mem("delta_single_source", "decision", "MCP source keeps a single generic source type, with item identity encoded in the source ID", "active", "design constraint using keeps"), + mem("delta_endpoint", "decision", "Embedding service endpoint is `/api/embed` rather than `/api/embeddings`, with the input field in the request body", "active", "declarative API fact"), + mem("delta_filter_pipeline", "decision", "Filter pipeline uses pre-chunk filtering rather than post-chunk filtering to prevent embedding contamination", "active", "durable architecture rule"), + mem("delta_do_not_delete", "decision", "Do not delete isolated reference-like lines because citation fragments in body text can be valid references", "active", "do-not rule"), ], - "self-repo": [ - mem("sr_author_credit", "feedback", "User insists on preserving external contributor author credit and uses merge workflow", "active", "durable preference using insists"), - mem("sr_branding", "decision", "Product branding is \"OpenCode Working Memory\" without \"Plugin\" in the name", "active", "durable branding rule"), - mem("sr_changelog", "decision", "CHANGELOG version scope follows git tags: changes from v1.2.3 tag through HEAD belong to next version", "active", "durable release rule"), + "workspace-epsilon": [ + mem("epsilon_author_credit", "feedback", "User insists on preserving external contributor author credit and uses merge workflow", "active", "durable preference using insists"), + mem("epsilon_branding", "decision", "Product branding is \"Generic Working Memory\" without \"Plugin\" in the name", "active", "durable branding rule"), + mem("epsilon_changelog", "decision", "Changelog version scope follows release tags: changes from the previous version tag through the current branch belong to the next version", "active", "durable release rule"), ], }; diff --git a/tests/workspace-memory.test.ts b/tests/workspace-memory.test.ts index 91f92f9..ed42ca5 100644 --- a/tests/workspace-memory.test.ts +++ b/tests/workspace-memory.test.ts @@ -1080,6 +1080,89 @@ test("quality cleanup migration writes audit log for hard supersedes", async () } }); +test("quality cleanup migration aborts supersede when audit log cannot be written", async () => { + const sandbox = await mkdtemp(join(tmpdir(), "wm-quality-audit-fail-")); + const dataHome = join(sandbox, "xdg-data-home"); + const root = join(sandbox, "workspace"); + const previousXdgDataHome = process.env.XDG_DATA_HOME; + const previousConsoleError = console.error; + process.env.XDG_DATA_HOME = dataHome; + console.error = () => {}; + + try { + await mkdir(root, { recursive: true }); + const now = "2026-04-28T00:00:00.000Z"; + const storePath = await workspaceMemoryPath(root); + await mkdir(dirname(storePath), { recursive: true }); + await writeFile(storePath, JSON.stringify({ + version: 1, + workspace: { root, key: await workspaceKey(root) }, + limits: { maxRenderedChars: LONG_TERM_LIMITS.maxRenderedChars, maxEntries: LONG_TERM_LIMITS.maxEntries }, + entries: [{ + id: "hard_progress", + type: "project", + text: "Test suite: 1237 tests pass, 226 suites", + source: "compaction", + confidence: 0.75, + status: "active", + createdAt: now, + updatedAt: now, + staleAfterDays: 60, + }], + migrations: [], + updatedAt: now, + }, null, 2), "utf8"); + + const blockedLogDir = join(dataHome, "opencode-working-memory", "migration-logs"); + await writeFile(blockedLogDir, "not a directory", "utf8"); + + const loaded = await loadWorkspaceMemory(root); + const persisted = JSON.parse(await readFile(storePath, "utf8")) as WorkspaceMemoryStore; + + assert.equal(loaded.entries.find(entry => entry.id === "hard_progress")?.status, "active"); + assert.equal(persisted.entries.find(entry => entry.id === "hard_progress")?.status, "active"); + assert.equal(loaded.migrations?.includes("2026-04-28-quality-cleanup"), false); + assert.equal(persisted.migrations?.includes("2026-04-28-quality-cleanup"), false); + } finally { + console.error = previousConsoleError; + if (previousXdgDataHome === undefined) delete process.env.XDG_DATA_HOME; + else process.env.XDG_DATA_HOME = previousXdgDataHome; + await rm(sandbox, { recursive: true, force: true }); + } +}); + +test("real workspace regression fixture is de-identified and English-only", () => { + const cjkText = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]/u; + const identifyingTerms = [ + "medical-atlas", + "opencode-record", + "agent-reports", + "pdf-extraction", + "self-repo", + "OpenCode Working Memory", + ]; + const failures: string[] = []; + + for (const [workspaceName, fixtureEntries] of Object.entries(REAL_WORKSPACE_FIXTURES)) { + if (identifyingTerms.some(term => workspaceName.includes(term))) { + failures.push(`${workspaceName}: workspace key should be generalized`); + } + + for (const entry of fixtureEntries) { + if (cjkText.test(entry.text)) { + failures.push(`${workspaceName}/${entry.id}: text must be English-only`); + } + for (const term of identifyingTerms) { + if (entry.text.includes(term)) { + failures.push(`${workspaceName}/${entry.id}: text contains identifying term ${term}`); + } + } + } + } + + assert.equal(failures.length, 0, `Fixture privacy failures:\n${failures.join("\n")}`); +}); + test("quality cleanup migration regression against real workspace samples", async () => { const failures: string[] = []; const now = "2026-04-28T00:00:00.000Z";